tcc and tif drift, compensated for string value casing, csv to dict improved process

This commit is contained in:
Josh Burman 2021-12-22 22:39:46 +00:00
parent 13f0f383e0
commit 107abcb73a
7 changed files with 61 additions and 47 deletions

View File

@ -2,7 +2,7 @@ import csv
import io
import re
def items_csv_to_dict(items_csv_reader):
def items_csv_to_dict(items_csv_reader, solver_run):
items = []
headers = []
@ -16,21 +16,17 @@ def items_csv_to_dict(items_csv_reader):
# ensure that the b param is formatted correctly
if len(re.findall(".", row[len(headers) - 1])) >= 3:
for key, col in enumerate(headers):
if key == 0:
item[col] = row[key]
if key == 2:
# make sure passage id exists
if row[key]:
item['passage_id'] = row[key]
# b param - tmep fix! use irt model b param for proper reference
elif key == len(headers) - 1:
if solver_run.irt_model.formatted_b_param() == col:
item['b_param'] = row[key]
elif key > 2 and key < len(headers) - 1:
item['attributes'].append({
'id': col,
'value': row[key],
'type': 'metadata'
})
elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle':
if row[key]:
item[solver_run.get_constraint(col).reference_attribute.id] = row[key]
elif solver_run.get_constraint(col):
constraint = solver_run.get_constraint(col)
item['attributes'].append(constraint.reference_attribute)
else:
if row[key]:
item[col] = row[key]
items.append(item)

View File

@ -26,29 +26,29 @@ def build_constraints(solver_run, problem, items):
for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
elif attribute.type == 'bundle':
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
total_bundles = randint(constraint.minimum, constraint.maximum)
selected_bundles = sample(solver_run.bundles, total_bundles)
total_bundle_items = 0
if solver_run.bundles != None:
total_bundles = randint(constraint.minimum, constraint.maximum)
selected_bundles = sample(solver_run.bundles, total_bundles)
total_bundle_items = 0
for bundle in selected_bundles:
for bundle in selected_bundles:
con = dict(zip([item.id for item in solver_run.items],
[(getattr(item, bundle.type, False) == bundle.id)
for item in solver_run.items]))
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
total_bundle_items += bundle.count
# make sure all other items added to the form
# are not a part of any bundle
# currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
con = dict(zip([item.id for item in solver_run.items],
[(getattr(item, bundle.type, False) == bundle.id)
for item in solver_run.items]))
[(getattr(item, attribute.id, None) == None)
for item in solver_run.items]))
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
total_bundle_items += bundle.count
# make sure all other items added to the form
# are not a part of any bundle
# currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
con = dict(zip([item.id for item in solver_run.items],
[(getattr(item, attribute.id, None) == None)
for item in solver_run.items]))
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
return problem
except ValueError as error:

View File

@ -19,7 +19,7 @@ class ServiceListener(SqsListener):
logging.info('Process complete for %s', service.file_name)
def main():
logging.info('Starting Solver Service (v1.1.0)...')
logging.info('Starting Solver Service (v1.1.1)...')
listener = ServiceListener(
os.environ['SQS_QUEUE'],
region_name=os.environ['AWS_REGION'],

View File

@ -6,3 +6,7 @@ class IRTModel(BaseModel):
b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
c_param: float
model: str
def formatted_b_param(self):
return self.b_param['schema_bson_id'] + '-' + self.b_param['field_bson_id']

View File

@ -9,6 +9,7 @@ from lib.irt.item_information_function import ItemInformationFunction
class Item(BaseModel):
id: int
passage_id: Optional[int]
workflow_state: Optional[str]
attributes: List[Attribute]
b_param: float = 0.00
@ -20,12 +21,12 @@ class Item(BaseModel):
def get_attribute(self, ref_attribute):
for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value:
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
return attribute.value
return False
def attribute_exists(self, ref_attribute):
for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value:
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
return True
return False

View File

@ -9,7 +9,7 @@ from models.objective_function import ObjectiveFunction
from models.advanced_options import AdvancedOptions
class SolverRun(BaseModel):
items: List[Item]
items: List[Item] = []
bundles: Optional[Bundle]
constraints: List[Constraint]
irt_model: IRTModel
@ -63,3 +63,6 @@ class SolverRun(BaseModel):
count=1,
type=type_attribute
)]
def get_constraint(self, name):
return next((constraint for constraint in self.constraints if constraint.reference_attribute.id == name), None)

View File

@ -8,13 +8,14 @@ from lib.errors.item_generation_error import ItemGenerationError
from models.solver_run import SolverRun
from models.solution import Solution
from models.form import Form
from models.item import Item
from services.base import Base
class LoftService(Base):
def process(self):
try:
self.solver_run = SolverRun.parse_obj(self.retreive_attributes_from_message())
self.solver_run = self.create_solver_run_from_attributes()
self.solver_run.generate_bundles()
self.solution = self.generate_solution()
self.result = self.stream_to_s3_bucket()
@ -24,7 +25,7 @@ class LoftService(Base):
logging.error(error)
self.result = self.stream_to_s3_bucket(ItemGenerationError("Provided params causing error in calculation results"))
def retreive_attributes_from_message(self):
def create_solver_run_from_attributes(self):
logging.info('Retrieving attributes from message...')
# get s3 object
self.key = aws_helper.get_key_from_message(self.source)
@ -36,15 +37,20 @@ class LoftService(Base):
# get attributes file and convert to dict
attributes = json.loads(tar_helper.extract_file_from_tar(self.tar , 'solver_run_attributes.json').read())
# create solver run
solver_run = SolverRun.parse_obj(attributes)
# get items file and convert to dict
items_csv = tar_helper.extract_file_from_tar(self.tar , 'items.csv')
items_csv_reader = csv_helper.file_stream_reader(items_csv)
# add items to attributes dict
attributes['items'] = service_helper.items_csv_to_dict(items_csv_reader)
# add items to solver run
for item in service_helper.items_csv_to_dict(items_csv_reader, solver_run):
solver_run.items.append(Item.parse_obj(item))
logging.info('Processed Attributes...')
return attributes
return solver_run
def generate_solution(self):
# unsolved solution
@ -80,12 +86,16 @@ class LoftService(Base):
# multi-objective constraints
for target in self.solver_run.objective_function.tif_targets:
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value, f'min tif theta ({target.theta}) target value {target.value}'
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) >= target.value - 5, f'max tif theta ({target.theta}) target value {target.value}'
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value + 5, f'min tif theta ({target.theta}) target value {target.value}'
for target in self.solver_run.objective_function.tcc_targets:
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value, f'min tcc theta ({target.theta}) target value {target.value}'
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) >= target.value - 15, f'max tcc theta ({target.theta}) target value {target.value}'
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value + 15, f'min tcc theta ({target.theta}) target value {target.value}'
# solve problem
problem.solve()