Merge pull request #9 from yardstick/feature/QUANT-1196-solve-all-the-things
QUANT-1196: SOLVEALLTHETHINGS
This commit is contained in:
@ -1,18 +1,15 @@
|
||||
FROM python:3.9.6
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install coinor-cbc
|
||||
RUN python -m pip install pulp
|
||||
RUN svn checkout https://projects.coin-or.org/svn/Cbc/releases/2.9.8 Cbc-2.9.8
|
||||
RUN cd Cbc-2.9.8 && \
|
||||
./configure && \
|
||||
make && \
|
||||
make install
|
||||
RUN python -m pip install pydantic
|
||||
RUN python -m pip install pySqsListener
|
||||
RUN python -m pip install daemonize
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
# Bundle app source
|
||||
COPY . /app
|
||||
|
||||
|
13
Dockerfile
13
Dockerfile
@ -1,18 +1,15 @@
|
||||
FROM python:3.9.6
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install coinor-cbc
|
||||
RUN python -m pip install pulp
|
||||
RUN svn checkout https://projects.coin-or.org/svn/Cbc/releases/2.9.8 Cbc-2.9.8
|
||||
RUN cd Cbc-2.9.8 && \
|
||||
./configure && \
|
||||
make && \
|
||||
make install
|
||||
RUN python -m pip install pydantic
|
||||
RUN python -m pip install pySqsListener
|
||||
RUN python -m pip install daemonize
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
# Bundle app source
|
||||
COPY . /app
|
||||
|
||||
|
22
app/helpers/irt_helper.py
Normal file
22
app/helpers/irt_helper.py
Normal file
@ -0,0 +1,22 @@
|
||||
from lib.irt.test_response_function import TestResponseFunction
|
||||
from lib.irt.test_information_function import TestInformationFunction
|
||||
|
||||
from models.target import Target
|
||||
|
||||
def generate_tif_results(items, solver_run):
|
||||
targets = []
|
||||
|
||||
for target in solver_run.objective_function.tif_targets:
|
||||
tif = TestInformationFunction(solver_run.irt_model).calculate(items, theta=target.theta)
|
||||
targets.append(Target(theta=target.theta, value=target.value, result=tif))
|
||||
|
||||
return targets
|
||||
|
||||
def generate_tcc_results(items, solver_run):
|
||||
targets = []
|
||||
|
||||
for target in solver_run.objective_function.tcc_targets:
|
||||
tcc = TestResponseFunction(solver_run.irt_model).calculate(items, theta=target.theta)
|
||||
targets.append(Target(theta=target.theta, value=target.value, result=tcc))
|
||||
|
||||
return targets
|
@ -2,7 +2,7 @@ import csv
|
||||
import io
|
||||
import re
|
||||
|
||||
def items_csv_to_dict(items_csv_reader):
|
||||
def items_csv_to_dict(items_csv_reader, irt_model):
|
||||
items = []
|
||||
headers = []
|
||||
|
||||
@ -16,8 +16,9 @@ def items_csv_to_dict(items_csv_reader):
|
||||
for key, col in enumerate(headers):
|
||||
if key == 0:
|
||||
item[col] = row[key]
|
||||
elif col == 'b_param':
|
||||
item[col] = row[key]
|
||||
# b param - tmep fix! use irt model b param for proper reference
|
||||
elif key == (1 - len(headers)):
|
||||
item['b_param'] = row[key]
|
||||
elif key > 1:
|
||||
item['attributes'].append({
|
||||
'id': col,
|
||||
@ -33,14 +34,27 @@ def solution_to_file(buffer, total_form_items, forms):
|
||||
wr = csv.writer(buffer, dialect='excel', delimiter=',')
|
||||
|
||||
# write header row for first row utilizing the total items all forms will have
|
||||
# and the cut score as the last item
|
||||
header = [x + 1 for x in range(total_form_items)] + ['cut score']
|
||||
# fill the rows with the targets and cut score then the items
|
||||
header = ['status']
|
||||
|
||||
for result in forms[0].tif_results:
|
||||
header += [f'tif @ {round(result.theta, 2)}']
|
||||
|
||||
for result in forms[0].tcc_results:
|
||||
header += [f'tcc @ {round(result.theta, 2)}']
|
||||
|
||||
header += ['cut score'] + [x + 1 for x in range(total_form_items)]
|
||||
wr.writerow(header)
|
||||
|
||||
# add each form as row to processed csv
|
||||
for form in forms:
|
||||
row = [form.status]
|
||||
|
||||
for result in form.tif_results + form.tcc_results:
|
||||
row += [f'value - {result.value}\nresult - {round(result.result, 2)}']
|
||||
|
||||
# provide generated items and cut score
|
||||
row = form.items + [form.cut_score]
|
||||
row += [round(form.cut_score, 2)] + [item.id for item in form.items]
|
||||
wr.writerow(row)
|
||||
|
||||
buff2 = io.BytesIO(buffer.getvalue().encode())
|
||||
@ -49,3 +63,15 @@ def solution_to_file(buffer, total_form_items, forms):
|
||||
|
||||
def key_to_uuid(key):
|
||||
return re.split("_", key)[0]
|
||||
|
||||
def solution_items(variables, solver_run):
|
||||
form_items = []
|
||||
|
||||
for v in variables:
|
||||
if v.varValue > 0:
|
||||
item_id = v.name.replace('Item_', '')
|
||||
item = solver_run.get_item(item_id)
|
||||
# add item to list and then remove from master item list
|
||||
form_items.append(item)
|
||||
|
||||
return form_items
|
||||
|
22
app/helpers/solver_helper.py
Normal file
22
app/helpers/solver_helper.py
Normal file
@ -0,0 +1,22 @@
|
||||
from pulp import lpSum
|
||||
|
||||
def build_constraints(solver_run, problem, items):
|
||||
total_form_items = solver_run.total_form_items
|
||||
constraints = solver_run.constraints
|
||||
|
||||
for constraint in constraints:
|
||||
attribute = constraint.reference_attribute
|
||||
min = constraint.minimum
|
||||
max = constraint.maximum
|
||||
|
||||
con = dict(zip([item.id for item in solver_run.items],
|
||||
[item.attribute_exists(attribute)
|
||||
for item in solver_run.items]))
|
||||
problem += lpSum([con[item.id]
|
||||
* items[item.id]
|
||||
for item in solver_run.items]) >= round(total_form_items * (min / 100)), f'{attribute.id} - {attribute.value} - min'
|
||||
problem += lpSum([con[item.id]
|
||||
* items[item.id]
|
||||
for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
|
||||
|
||||
return problem
|
17
app/lib/irt/item_information_function.py
Normal file
17
app/lib/irt/item_information_function.py
Normal file
@ -0,0 +1,17 @@
|
||||
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
|
||||
|
||||
class ItemInformationFunction():
|
||||
def __init__(self, irt_model):
|
||||
self.model_data = irt_model
|
||||
|
||||
# determines the amount of information for a given question at a given theta (ability level)
|
||||
# further detailed on page 161, equation 4 here:
|
||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||
def calculate(self, **kwargs):
|
||||
if self.model_data.model == '3PL':
|
||||
p = ThreeParameterLogistic(self.model_data, kwargs).result()
|
||||
q = 1 - p
|
||||
return self.model_data.a_param**2 * ((q / p) * ((p - (self.model_data.c_param**2)) / (1 - (self.model_data.c_param**2))))
|
||||
else:
|
||||
# potentially error out
|
||||
return None
|
12
app/lib/irt/item_response_function.py
Normal file
12
app/lib/irt/item_response_function.py
Normal file
@ -0,0 +1,12 @@
|
||||
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
|
||||
|
||||
class ItemResponseFunction():
|
||||
def __init__(self, irt_model):
|
||||
self.model_data = irt_model
|
||||
|
||||
def calculate(self, **kwargs):
|
||||
if self.model_data.model == '3PL':
|
||||
return ThreeParameterLogistic(self.model_data, kwargs).result()
|
||||
else:
|
||||
# potentially error out
|
||||
return None
|
16
app/lib/irt/models/three_parameter_logistic.py
Normal file
16
app/lib/irt/models/three_parameter_logistic.py
Normal file
@ -0,0 +1,16 @@
|
||||
class ThreeParameterLogistic:
|
||||
def __init__(self, model_params, kwargs):
|
||||
self.model_params = model_params
|
||||
# check if exists, if not error out
|
||||
self.b_param = kwargs['b_param']
|
||||
self.e = 2.71828
|
||||
self.theta = kwargs['theta']
|
||||
|
||||
# contains the primary 3pl function, determining the probably of an inidividual
|
||||
# that an individual at a certain theta would get a particular question correct
|
||||
# detailed further on page 161, equation 1 here:
|
||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||
def result(self):
|
||||
a = self.model_params.a_param
|
||||
c = self.model_params.c_param
|
||||
return c + (1 - c) * (1 / (1 + self.e**(-a * (self.theta - self.b_param))))
|
19
app/lib/irt/test_information_function.py
Normal file
19
app/lib/irt/test_information_function.py
Normal file
@ -0,0 +1,19 @@
|
||||
from lib.irt.item_information_function import ItemInformationFunction
|
||||
|
||||
class TestInformationFunction():
|
||||
def __init__(self, irt_model):
|
||||
self.irt_model = irt_model
|
||||
self.iif = ItemInformationFunction(irt_model)
|
||||
|
||||
# determins the amount of information
|
||||
# at a certain theta (ability level) of the sum of a question set correct
|
||||
# detailed further on page 166, equation 4 here:
|
||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||
def calculate(self, items, **kwargs):
|
||||
sum = 0
|
||||
|
||||
for item in items:
|
||||
result = self.iif.calculate(b_param=item.b_param, theta=kwargs['theta'])
|
||||
sum += result
|
||||
|
||||
return sum
|
20
app/lib/irt/test_response_function.py
Normal file
20
app/lib/irt/test_response_function.py
Normal file
@ -0,0 +1,20 @@
|
||||
from lib.irt.item_response_function import ItemResponseFunction
|
||||
|
||||
# otherwise known as the Test Characteristic Curve (TCC)
|
||||
class TestResponseFunction():
|
||||
def __init__(self, irt_model):
|
||||
self.irt_model = irt_model
|
||||
self.irf = ItemResponseFunction(irt_model)
|
||||
|
||||
# determins the probably of an inidividual
|
||||
# at a certain theta (ability level) would get a sum of questions correct
|
||||
# detailed further on page 166, equation 3 here:
|
||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||
def calculate(self, items, **kwargs):
|
||||
sum = 0
|
||||
|
||||
for item in items:
|
||||
result = self.irf.calculate(b_param=item.b_param, theta=kwargs['theta'])
|
||||
sum += result
|
||||
|
||||
return sum
|
@ -19,7 +19,7 @@ class ServiceListener(SqsListener):
|
||||
logging.info('Process complete for %s', service.file_name)
|
||||
|
||||
def main():
|
||||
logging.info('Starting Solver Service (v0.4.3)...')
|
||||
logging.info('Starting Solver Service (v1.0.0)...')
|
||||
listener = ServiceListener(
|
||||
'measure-development-solver-ingest',
|
||||
region_name=os.environ['AWS_REGION'],
|
||||
|
@ -2,8 +2,8 @@ from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict
|
||||
|
||||
class AdvancedOptions(BaseModel):
|
||||
linearity_check: bool
|
||||
show_progress: bool
|
||||
linearity_check: Optional[bool]
|
||||
show_progress: Optional[bool]
|
||||
max_solution_time: Optional[int]
|
||||
brand_bound_tolerance: Optional[float]
|
||||
max_forms: Optional[int]
|
||||
|
@ -1,9 +1,8 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
|
||||
from models.attribute import Attribute
|
||||
|
||||
class Constraint(BaseModel):
|
||||
reference_attribute: Attribute
|
||||
minimum: int
|
||||
maximum: int
|
||||
minimum: float
|
||||
maximum: float
|
||||
|
@ -1,8 +1,26 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
from helpers import irt_helper
|
||||
|
||||
from models.item import Item
|
||||
from models.target import Target
|
||||
|
||||
from lib.irt.test_response_function import TestResponseFunction
|
||||
|
||||
class Form(BaseModel):
|
||||
items: List[int]
|
||||
items: List[Item]
|
||||
cut_score: float
|
||||
tif_results: List[Target]
|
||||
tcc_results: List[Target]
|
||||
status: str = 'Not Optimized'
|
||||
|
||||
@classmethod
|
||||
def create(cls, items, solver_run, status):
|
||||
return cls(
|
||||
items=items,
|
||||
cut_score=TestResponseFunction(solver_run.irt_model).calculate(items, theta=solver_run.theta_cut_score),
|
||||
tif_results=irt_helper.generate_tif_results(items, solver_run),
|
||||
tcc_results=irt_helper.generate_tcc_results(items, solver_run),
|
||||
status=status
|
||||
)
|
||||
|
@ -1,7 +1,8 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict
|
||||
|
||||
class IRTModel(BaseModel):
|
||||
a_param: float
|
||||
b_param: float
|
||||
b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
|
||||
c_param: float
|
||||
model: str
|
||||
|
@ -3,7 +3,28 @@ from typing import List
|
||||
|
||||
from models.attribute import Attribute
|
||||
|
||||
from lib.irt.item_response_function import ItemResponseFunction
|
||||
from lib.irt.item_information_function import ItemInformationFunction
|
||||
|
||||
class Item(BaseModel):
|
||||
id: int
|
||||
attributes: List[Attribute]
|
||||
b_param: int
|
||||
b_param: float = 0.00
|
||||
|
||||
def iif(self, solver_run, theta):
|
||||
return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta)
|
||||
|
||||
def irf(self, solver_run, theta):
|
||||
return ItemResponseFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta)
|
||||
|
||||
def get_attribute(self, ref_attribute):
|
||||
for attribute in self.attributes:
|
||||
if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value:
|
||||
return attribute.value
|
||||
return False
|
||||
|
||||
def attribute_exists(self, ref_attribute):
|
||||
for attribute in self.attributes:
|
||||
if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value:
|
||||
return True
|
||||
return False
|
@ -13,6 +13,17 @@ class SolverRun(BaseModel):
|
||||
irt_model: IRTModel
|
||||
objective_function: ObjectiveFunction
|
||||
total_form_items: int
|
||||
total_forms: int = 1
|
||||
theta_cut_score: float = 0.00
|
||||
advanced_options: Optional[AdvancedOptions]
|
||||
engine: str
|
||||
|
||||
def get_item(self, item_id):
|
||||
for item in self.items:
|
||||
if str(item.id) == item_id:
|
||||
return item
|
||||
return False
|
||||
|
||||
def remove_items(self, items):
|
||||
self.items = [item for item in self.items if item not in items]
|
||||
return True
|
||||
|
@ -1,5 +1,7 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
|
||||
class Target(BaseModel):
|
||||
theta: float
|
||||
value: float
|
||||
result: Optional[float]
|
||||
|
@ -1,6 +1,8 @@
|
||||
import os, json, random, io, logging
|
||||
|
||||
from helpers import aws_helper, tar_helper, csv_helper, service_helper
|
||||
from pulp import LpProblem, LpVariable, LpMinimize, LpStatus, lpSum
|
||||
|
||||
from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper
|
||||
|
||||
from models.solver_run import SolverRun
|
||||
from models.solution import Solution
|
||||
@ -31,31 +33,72 @@ class LoftService(Base):
|
||||
items_csv_reader = csv_helper.file_stream_reader(items_csv)
|
||||
|
||||
# add items to attributes dict
|
||||
attributes['items'] = service_helper.items_csv_to_dict(items_csv_reader)
|
||||
attributes['items'] = service_helper.items_csv_to_dict(items_csv_reader, attributes['irt_model'])
|
||||
logging.info('Processed Attributes...')
|
||||
|
||||
return attributes
|
||||
|
||||
def generate_solution(self):
|
||||
logging.info('Processing Solution...')
|
||||
# temporary data for mocks
|
||||
form_count = 10
|
||||
|
||||
# items will be generated from real solver process, this is for mock purposes
|
||||
# real solver will return N forms and process a cut score, this is for mock purposes
|
||||
return Solution(
|
||||
response_id=random.randint(100,5000),
|
||||
forms=[
|
||||
Form(
|
||||
items=[item.id for item in random.sample(self.solver_run.items, self.solver_run.total_form_items)],
|
||||
cut_score=120
|
||||
) for x in range(form_count)
|
||||
]
|
||||
# unsolved solution
|
||||
solution = Solution(
|
||||
response_id=random.randint(100, 5000),
|
||||
forms=[]
|
||||
)
|
||||
|
||||
# counter for number of forms
|
||||
f = 0
|
||||
|
||||
# iterate for number of forms that require creation
|
||||
# currently creates distinc forms with no item overlap
|
||||
while f < self.solver_run.total_forms:
|
||||
# setup vars
|
||||
items = LpVariable.dicts(
|
||||
"Item", [item.id for item in self.solver_run.items], lowBound=1, upBound=1, cat='Binary')
|
||||
problem_objection_functions = []
|
||||
|
||||
# create problem
|
||||
problem = LpProblem("ata-form-generate", LpMinimize)
|
||||
|
||||
# constraints
|
||||
problem += lpSum([items[item.id]
|
||||
for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items'
|
||||
|
||||
# generic constraints
|
||||
problem = solver_helper.build_constraints(self.solver_run, problem, items)
|
||||
|
||||
# multi-objective functions and constraints
|
||||
for target in self.solver_run.objective_function.tif_targets:
|
||||
tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
|
||||
for item in self.solver_run.items])
|
||||
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
|
||||
for item in self.solver_run.items]) <= target.value, f'min tif theta ({target.theta}) target value {target.value}'
|
||||
problem_objection_functions.append(tif)
|
||||
|
||||
for target in self.solver_run.objective_function.tcc_targets:
|
||||
tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
|
||||
for item in self.solver_run.items])
|
||||
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
|
||||
for item in self.solver_run.items]) <= target.value, f'min tcc theta ({target.theta}) target value {target.value}'
|
||||
problem_objection_functions.append(tcc)
|
||||
|
||||
# solve problem
|
||||
problem.sequentialSolve(problem_objection_functions)
|
||||
|
||||
# add return items and create as a form
|
||||
form_items = service_helper.solution_items(problem.variables(), self.solver_run)
|
||||
# remove items
|
||||
self.solver_run.remove_items(form_items)
|
||||
# add form to solution
|
||||
solution.forms.append(Form.create(form_items, self.solver_run, LpStatus[problem.status]))
|
||||
|
||||
# successfull form, increment
|
||||
f += 1
|
||||
|
||||
return solution
|
||||
|
||||
def stream_to_s3_bucket(self):
|
||||
self.file_name = f'{service_helper.key_to_uuid(self.key)}.csv'
|
||||
logging.info('Streaming to %s s3 bucket %s', self.file_name, os.environ['S3_PROCESSED_BUCKET'])
|
||||
logging.info('Streaming %s to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET'])
|
||||
# setup writer buffer and write processed forms to file
|
||||
buffer = io.StringIO()
|
||||
solution_file = service_helper.solution_to_file(buffer, self.solver_run.total_form_items, self.solution.forms)
|
||||
|
Reference in New Issue
Block a user