diff --git a/.docker-compose/Dockerfile b/.docker-compose/Dockerfile index 319a6ea..703424a 100644 --- a/.docker-compose/Dockerfile +++ b/.docker-compose/Dockerfile @@ -1,18 +1,15 @@ FROM python:3.9.6 -RUN mkdir /app -WORKDIR /app - +RUN apt-get update +RUN apt-get -y install coinor-cbc RUN python -m pip install pulp -RUN svn checkout https://projects.coin-or.org/svn/Cbc/releases/2.9.8 Cbc-2.9.8 -RUN cd Cbc-2.9.8 && \ - ./configure && \ - make && \ - make install RUN python -m pip install pydantic RUN python -m pip install pySqsListener RUN python -m pip install daemonize +RUN mkdir /app +WORKDIR /app + # Bundle app source COPY . /app diff --git a/Dockerfile b/Dockerfile index 319a6ea..703424a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,15 @@ FROM python:3.9.6 -RUN mkdir /app -WORKDIR /app - +RUN apt-get update +RUN apt-get -y install coinor-cbc RUN python -m pip install pulp -RUN svn checkout https://projects.coin-or.org/svn/Cbc/releases/2.9.8 Cbc-2.9.8 -RUN cd Cbc-2.9.8 && \ - ./configure && \ - make && \ - make install RUN python -m pip install pydantic RUN python -m pip install pySqsListener RUN python -m pip install daemonize +RUN mkdir /app +WORKDIR /app + # Bundle app source COPY . /app diff --git a/app/helpers/irt_helper.py b/app/helpers/irt_helper.py new file mode 100644 index 0000000..401e875 --- /dev/null +++ b/app/helpers/irt_helper.py @@ -0,0 +1,22 @@ +from lib.irt.test_response_function import TestResponseFunction +from lib.irt.test_information_function import TestInformationFunction + +from models.target import Target + +def generate_tif_results(items, solver_run): + targets = [] + + for target in solver_run.objective_function.tif_targets: + tif = TestInformationFunction(solver_run.irt_model).calculate(items, theta=target.theta) + targets.append(Target(theta=target.theta, value=target.value, result=tif)) + + return targets + +def generate_tcc_results(items, solver_run): + targets = [] + + for target in solver_run.objective_function.tcc_targets: + tcc = TestResponseFunction(solver_run.irt_model).calculate(items, theta=target.theta) + targets.append(Target(theta=target.theta, value=target.value, result=tcc)) + + return targets diff --git a/app/helpers/service_helper.py b/app/helpers/service_helper.py index 07b6900..399882a 100644 --- a/app/helpers/service_helper.py +++ b/app/helpers/service_helper.py @@ -2,7 +2,7 @@ import csv import io import re -def items_csv_to_dict(items_csv_reader): +def items_csv_to_dict(items_csv_reader, irt_model): items = [] headers = [] @@ -16,8 +16,9 @@ def items_csv_to_dict(items_csv_reader): for key, col in enumerate(headers): if key == 0: item[col] = row[key] - elif col == 'b_param': - item[col] = row[key] + # b param - tmep fix! use irt model b param for proper reference + elif key == (1 - len(headers)): + item['b_param'] = row[key] elif key > 1: item['attributes'].append({ 'id': col, @@ -33,14 +34,27 @@ def solution_to_file(buffer, total_form_items, forms): wr = csv.writer(buffer, dialect='excel', delimiter=',') # write header row for first row utilizing the total items all forms will have - # and the cut score as the last item - header = [x + 1 for x in range(total_form_items)] + ['cut score'] + # fill the rows with the targets and cut score then the items + header = ['status'] + + for result in forms[0].tif_results: + header += [f'tif @ {round(result.theta, 2)}'] + + for result in forms[0].tcc_results: + header += [f'tcc @ {round(result.theta, 2)}'] + + header += ['cut score'] + [x + 1 for x in range(total_form_items)] wr.writerow(header) # add each form as row to processed csv for form in forms: + row = [form.status] + + for result in form.tif_results + form.tcc_results: + row += [f'value - {result.value}\nresult - {round(result.result, 2)}'] + # provide generated items and cut score - row = form.items + [form.cut_score] + row += [round(form.cut_score, 2)] + [item.id for item in form.items] wr.writerow(row) buff2 = io.BytesIO(buffer.getvalue().encode()) @@ -49,3 +63,15 @@ def solution_to_file(buffer, total_form_items, forms): def key_to_uuid(key): return re.split("_", key)[0] + +def solution_items(variables, solver_run): + form_items = [] + + for v in variables: + if v.varValue > 0: + item_id = v.name.replace('Item_', '') + item = solver_run.get_item(item_id) + # add item to list and then remove from master item list + form_items.append(item) + + return form_items diff --git a/app/helpers/solver_helper.py b/app/helpers/solver_helper.py new file mode 100644 index 0000000..f8a321e --- /dev/null +++ b/app/helpers/solver_helper.py @@ -0,0 +1,22 @@ +from pulp import lpSum + +def build_constraints(solver_run, problem, items): + total_form_items = solver_run.total_form_items + constraints = solver_run.constraints + + for constraint in constraints: + attribute = constraint.reference_attribute + min = constraint.minimum + max = constraint.maximum + + con = dict(zip([item.id for item in solver_run.items], + [item.attribute_exists(attribute) + for item in solver_run.items])) + problem += lpSum([con[item.id] + * items[item.id] + for item in solver_run.items]) >= round(total_form_items * (min / 100)), f'{attribute.id} - {attribute.value} - min' + problem += lpSum([con[item.id] + * items[item.id] + for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max' + + return problem diff --git a/app/lib/irt/item_information_function.py b/app/lib/irt/item_information_function.py new file mode 100644 index 0000000..ddb3cbb --- /dev/null +++ b/app/lib/irt/item_information_function.py @@ -0,0 +1,17 @@ +from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic + +class ItemInformationFunction(): + def __init__(self, irt_model): + self.model_data = irt_model + + # determines the amount of information for a given question at a given theta (ability level) + # further detailed on page 161, equation 4 here: + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf + def calculate(self, **kwargs): + if self.model_data.model == '3PL': + p = ThreeParameterLogistic(self.model_data, kwargs).result() + q = 1 - p + return self.model_data.a_param**2 * ((q / p) * ((p - (self.model_data.c_param**2)) / (1 - (self.model_data.c_param**2)))) + else: + # potentially error out + return None diff --git a/app/lib/irt/item_response_function.py b/app/lib/irt/item_response_function.py new file mode 100644 index 0000000..df66a0e --- /dev/null +++ b/app/lib/irt/item_response_function.py @@ -0,0 +1,12 @@ +from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic + +class ItemResponseFunction(): + def __init__(self, irt_model): + self.model_data = irt_model + + def calculate(self, **kwargs): + if self.model_data.model == '3PL': + return ThreeParameterLogistic(self.model_data, kwargs).result() + else: + # potentially error out + return None diff --git a/app/lib/irt/models/three_parameter_logistic.py b/app/lib/irt/models/three_parameter_logistic.py new file mode 100644 index 0000000..755331e --- /dev/null +++ b/app/lib/irt/models/three_parameter_logistic.py @@ -0,0 +1,16 @@ +class ThreeParameterLogistic: + def __init__(self, model_params, kwargs): + self.model_params = model_params + # check if exists, if not error out + self.b_param = kwargs['b_param'] + self.e = 2.71828 + self.theta = kwargs['theta'] + + # contains the primary 3pl function, determining the probably of an inidividual + # that an individual at a certain theta would get a particular question correct + # detailed further on page 161, equation 1 here: + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf + def result(self): + a = self.model_params.a_param + c = self.model_params.c_param + return c + (1 - c) * (1 / (1 + self.e**(-a * (self.theta - self.b_param)))) diff --git a/app/lib/irt/test_information_function.py b/app/lib/irt/test_information_function.py new file mode 100644 index 0000000..b91f9b5 --- /dev/null +++ b/app/lib/irt/test_information_function.py @@ -0,0 +1,19 @@ +from lib.irt.item_information_function import ItemInformationFunction + +class TestInformationFunction(): + def __init__(self, irt_model): + self.irt_model = irt_model + self.iif = ItemInformationFunction(irt_model) + + # determins the amount of information + # at a certain theta (ability level) of the sum of a question set correct + # detailed further on page 166, equation 4 here: + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf + def calculate(self, items, **kwargs): + sum = 0 + + for item in items: + result = self.iif.calculate(b_param=item.b_param, theta=kwargs['theta']) + sum += result + + return sum diff --git a/app/lib/irt/test_response_function.py b/app/lib/irt/test_response_function.py new file mode 100644 index 0000000..d06aa83 --- /dev/null +++ b/app/lib/irt/test_response_function.py @@ -0,0 +1,20 @@ +from lib.irt.item_response_function import ItemResponseFunction + +# otherwise known as the Test Characteristic Curve (TCC) +class TestResponseFunction(): + def __init__(self, irt_model): + self.irt_model = irt_model + self.irf = ItemResponseFunction(irt_model) + + # determins the probably of an inidividual + # at a certain theta (ability level) would get a sum of questions correct + # detailed further on page 166, equation 3 here: + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf + def calculate(self, items, **kwargs): + sum = 0 + + for item in items: + result = self.irf.calculate(b_param=item.b_param, theta=kwargs['theta']) + sum += result + + return sum diff --git a/app/main.py b/app/main.py index 748323b..2c601bc 100644 --- a/app/main.py +++ b/app/main.py @@ -19,7 +19,7 @@ class ServiceListener(SqsListener): logging.info('Process complete for %s', service.file_name) def main(): - logging.info('Starting Solver Service (v0.4.3)...') + logging.info('Starting Solver Service (v1.0.0)...') listener = ServiceListener( 'measure-development-solver-ingest', region_name=os.environ['AWS_REGION'], diff --git a/app/models/advanced_options.py b/app/models/advanced_options.py index 1a9fb34..1d7d420 100644 --- a/app/models/advanced_options.py +++ b/app/models/advanced_options.py @@ -2,8 +2,8 @@ from pydantic import BaseModel from typing import List, Optional, Dict class AdvancedOptions(BaseModel): - linearity_check: bool - show_progress: bool + linearity_check: Optional[bool] + show_progress: Optional[bool] max_solution_time: Optional[int] brand_bound_tolerance: Optional[float] max_forms: Optional[int] diff --git a/app/models/constraint.py b/app/models/constraint.py index 84eb1b2..28a3939 100644 --- a/app/models/constraint.py +++ b/app/models/constraint.py @@ -1,9 +1,8 @@ from pydantic import BaseModel -from typing import Optional from models.attribute import Attribute class Constraint(BaseModel): reference_attribute: Attribute - minimum: int - maximum: int + minimum: float + maximum: float diff --git a/app/models/form.py b/app/models/form.py index ea7844f..72863ae 100644 --- a/app/models/form.py +++ b/app/models/form.py @@ -1,8 +1,26 @@ from pydantic import BaseModel from typing import List +from helpers import irt_helper + from models.item import Item +from models.target import Target + +from lib.irt.test_response_function import TestResponseFunction class Form(BaseModel): - items: List[int] + items: List[Item] cut_score: float + tif_results: List[Target] + tcc_results: List[Target] + status: str = 'Not Optimized' + + @classmethod + def create(cls, items, solver_run, status): + return cls( + items=items, + cut_score=TestResponseFunction(solver_run.irt_model).calculate(items, theta=solver_run.theta_cut_score), + tif_results=irt_helper.generate_tif_results(items, solver_run), + tcc_results=irt_helper.generate_tcc_results(items, solver_run), + status=status + ) diff --git a/app/models/irt_model.py b/app/models/irt_model.py index 4751ba9..2596686 100644 --- a/app/models/irt_model.py +++ b/app/models/irt_model.py @@ -1,7 +1,8 @@ from pydantic import BaseModel +from typing import Dict class IRTModel(BaseModel): a_param: float - b_param: float + b_param: Dict = {"schema_bson_id": str, "field_bson_id": str} c_param: float model: str diff --git a/app/models/item.py b/app/models/item.py index 8ae3b70..9cb1d8d 100644 --- a/app/models/item.py +++ b/app/models/item.py @@ -3,7 +3,28 @@ from typing import List from models.attribute import Attribute +from lib.irt.item_response_function import ItemResponseFunction +from lib.irt.item_information_function import ItemInformationFunction + class Item(BaseModel): id: int attributes: List[Attribute] - b_param: int + b_param: float = 0.00 + + def iif(self, solver_run, theta): + return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta) + + def irf(self, solver_run, theta): + return ItemResponseFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta) + + def get_attribute(self, ref_attribute): + for attribute in self.attributes: + if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value: + return attribute.value + return False + + def attribute_exists(self, ref_attribute): + for attribute in self.attributes: + if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value: + return True + return False \ No newline at end of file diff --git a/app/models/solver_run.py b/app/models/solver_run.py index 1c0727b..e8c8543 100644 --- a/app/models/solver_run.py +++ b/app/models/solver_run.py @@ -13,6 +13,17 @@ class SolverRun(BaseModel): irt_model: IRTModel objective_function: ObjectiveFunction total_form_items: int + total_forms: int = 1 theta_cut_score: float = 0.00 advanced_options: Optional[AdvancedOptions] engine: str + + def get_item(self, item_id): + for item in self.items: + if str(item.id) == item_id: + return item + return False + + def remove_items(self, items): + self.items = [item for item in self.items if item not in items] + return True diff --git a/app/models/target.py b/app/models/target.py index 1383208..3999d83 100644 --- a/app/models/target.py +++ b/app/models/target.py @@ -1,5 +1,7 @@ from pydantic import BaseModel +from typing import Optional class Target(BaseModel): theta: float value: float + result: Optional[float] diff --git a/app/services/loft_service.py b/app/services/loft_service.py index ec0cb96..07bfe2a 100644 --- a/app/services/loft_service.py +++ b/app/services/loft_service.py @@ -1,6 +1,8 @@ import os, json, random, io, logging -from helpers import aws_helper, tar_helper, csv_helper, service_helper +from pulp import LpProblem, LpVariable, LpMinimize, LpStatus, lpSum + +from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper from models.solver_run import SolverRun from models.solution import Solution @@ -31,31 +33,72 @@ class LoftService(Base): items_csv_reader = csv_helper.file_stream_reader(items_csv) # add items to attributes dict - attributes['items'] = service_helper.items_csv_to_dict(items_csv_reader) + attributes['items'] = service_helper.items_csv_to_dict(items_csv_reader, attributes['irt_model']) logging.info('Processed Attributes...') return attributes def generate_solution(self): - logging.info('Processing Solution...') - # temporary data for mocks - form_count = 10 - - # items will be generated from real solver process, this is for mock purposes - # real solver will return N forms and process a cut score, this is for mock purposes - return Solution( - response_id=random.randint(100,5000), - forms=[ - Form( - items=[item.id for item in random.sample(self.solver_run.items, self.solver_run.total_form_items)], - cut_score=120 - ) for x in range(form_count) - ] + # unsolved solution + solution = Solution( + response_id=random.randint(100, 5000), + forms=[] ) + # counter for number of forms + f = 0 + + # iterate for number of forms that require creation + # currently creates distinc forms with no item overlap + while f < self.solver_run.total_forms: + # setup vars + items = LpVariable.dicts( + "Item", [item.id for item in self.solver_run.items], lowBound=1, upBound=1, cat='Binary') + problem_objection_functions = [] + + # create problem + problem = LpProblem("ata-form-generate", LpMinimize) + + # constraints + problem += lpSum([items[item.id] + for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items' + + # generic constraints + problem = solver_helper.build_constraints(self.solver_run, problem, items) + + # multi-objective functions and constraints + for target in self.solver_run.objective_function.tif_targets: + tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id] + for item in self.solver_run.items]) + problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id] + for item in self.solver_run.items]) <= target.value, f'min tif theta ({target.theta}) target value {target.value}' + problem_objection_functions.append(tif) + + for target in self.solver_run.objective_function.tcc_targets: + tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id] + for item in self.solver_run.items]) + problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id] + for item in self.solver_run.items]) <= target.value, f'min tcc theta ({target.theta}) target value {target.value}' + problem_objection_functions.append(tcc) + + # solve problem + problem.sequentialSolve(problem_objection_functions) + + # add return items and create as a form + form_items = service_helper.solution_items(problem.variables(), self.solver_run) + # remove items + self.solver_run.remove_items(form_items) + # add form to solution + solution.forms.append(Form.create(form_items, self.solver_run, LpStatus[problem.status])) + + # successfull form, increment + f += 1 + + return solution + def stream_to_s3_bucket(self): self.file_name = f'{service_helper.key_to_uuid(self.key)}.csv' - logging.info('Streaming to %s s3 bucket %s', self.file_name, os.environ['S3_PROCESSED_BUCKET']) + logging.info('Streaming %s to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET']) # setup writer buffer and write processed forms to file buffer = io.StringIO() solution_file = service_helper.solution_to_file(buffer, self.solver_run.total_form_items, self.solution.forms)