tcc and tif drift, compensated for string value casing, csv to dict improved process

This commit is contained in:
Josh Burman 2021-12-22 22:39:46 +00:00
parent 13f0f383e0
commit 107abcb73a
7 changed files with 61 additions and 47 deletions

View File

@ -2,7 +2,7 @@ import csv
import io import io
import re import re
def items_csv_to_dict(items_csv_reader): def items_csv_to_dict(items_csv_reader, solver_run):
items = [] items = []
headers = [] headers = []
@ -16,21 +16,17 @@ def items_csv_to_dict(items_csv_reader):
# ensure that the b param is formatted correctly # ensure that the b param is formatted correctly
if len(re.findall(".", row[len(headers) - 1])) >= 3: if len(re.findall(".", row[len(headers) - 1])) >= 3:
for key, col in enumerate(headers): for key, col in enumerate(headers):
if key == 0: if solver_run.irt_model.formatted_b_param() == col:
item[col] = row[key]
if key == 2:
# make sure passage id exists
if row[key]:
item['passage_id'] = row[key]
# b param - tmep fix! use irt model b param for proper reference
elif key == len(headers) - 1:
item['b_param'] = row[key] item['b_param'] = row[key]
elif key > 2 and key < len(headers) - 1: elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle':
item['attributes'].append({ if row[key]:
'id': col, item[solver_run.get_constraint(col).reference_attribute.id] = row[key]
'value': row[key], elif solver_run.get_constraint(col):
'type': 'metadata' constraint = solver_run.get_constraint(col)
}) item['attributes'].append(constraint.reference_attribute)
else:
if row[key]:
item[col] = row[key]
items.append(item) items.append(item)

View File

@ -26,6 +26,7 @@ def build_constraints(solver_run, problem, items):
for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max' for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
elif attribute.type == 'bundle': elif attribute.type == 'bundle':
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates # TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
if solver_run.bundles != None:
total_bundles = randint(constraint.minimum, constraint.maximum) total_bundles = randint(constraint.minimum, constraint.maximum)
selected_bundles = sample(solver_run.bundles, total_bundles) selected_bundles = sample(solver_run.bundles, total_bundles)
total_bundle_items = 0 total_bundle_items = 0
@ -49,7 +50,6 @@ def build_constraints(solver_run, problem, items):
* items[item.id] * items[item.id]
for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type' for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
return problem return problem
except ValueError as error: except ValueError as error:
logging.error(error) logging.error(error)

View File

@ -19,7 +19,7 @@ class ServiceListener(SqsListener):
logging.info('Process complete for %s', service.file_name) logging.info('Process complete for %s', service.file_name)
def main(): def main():
logging.info('Starting Solver Service (v1.1.0)...') logging.info('Starting Solver Service (v1.1.1)...')
listener = ServiceListener( listener = ServiceListener(
os.environ['SQS_QUEUE'], os.environ['SQS_QUEUE'],
region_name=os.environ['AWS_REGION'], region_name=os.environ['AWS_REGION'],

View File

@ -6,3 +6,7 @@ class IRTModel(BaseModel):
b_param: Dict = {"schema_bson_id": str, "field_bson_id": str} b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
c_param: float c_param: float
model: str model: str
def formatted_b_param(self):
return self.b_param['schema_bson_id'] + '-' + self.b_param['field_bson_id']

View File

@ -9,6 +9,7 @@ from lib.irt.item_information_function import ItemInformationFunction
class Item(BaseModel): class Item(BaseModel):
id: int id: int
passage_id: Optional[int] passage_id: Optional[int]
workflow_state: Optional[str]
attributes: List[Attribute] attributes: List[Attribute]
b_param: float = 0.00 b_param: float = 0.00
@ -20,12 +21,12 @@ class Item(BaseModel):
def get_attribute(self, ref_attribute): def get_attribute(self, ref_attribute):
for attribute in self.attributes: for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value: if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
return attribute.value return attribute.value
return False return False
def attribute_exists(self, ref_attribute): def attribute_exists(self, ref_attribute):
for attribute in self.attributes: for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value: if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
return True return True
return False return False

View File

@ -9,7 +9,7 @@ from models.objective_function import ObjectiveFunction
from models.advanced_options import AdvancedOptions from models.advanced_options import AdvancedOptions
class SolverRun(BaseModel): class SolverRun(BaseModel):
items: List[Item] items: List[Item] = []
bundles: Optional[Bundle] bundles: Optional[Bundle]
constraints: List[Constraint] constraints: List[Constraint]
irt_model: IRTModel irt_model: IRTModel
@ -63,3 +63,6 @@ class SolverRun(BaseModel):
count=1, count=1,
type=type_attribute type=type_attribute
)] )]
def get_constraint(self, name):
return next((constraint for constraint in self.constraints if constraint.reference_attribute.id == name), None)

View File

@ -8,13 +8,14 @@ from lib.errors.item_generation_error import ItemGenerationError
from models.solver_run import SolverRun from models.solver_run import SolverRun
from models.solution import Solution from models.solution import Solution
from models.form import Form from models.form import Form
from models.item import Item
from services.base import Base from services.base import Base
class LoftService(Base): class LoftService(Base):
def process(self): def process(self):
try: try:
self.solver_run = SolverRun.parse_obj(self.retreive_attributes_from_message()) self.solver_run = self.create_solver_run_from_attributes()
self.solver_run.generate_bundles() self.solver_run.generate_bundles()
self.solution = self.generate_solution() self.solution = self.generate_solution()
self.result = self.stream_to_s3_bucket() self.result = self.stream_to_s3_bucket()
@ -24,7 +25,7 @@ class LoftService(Base):
logging.error(error) logging.error(error)
self.result = self.stream_to_s3_bucket(ItemGenerationError("Provided params causing error in calculation results")) self.result = self.stream_to_s3_bucket(ItemGenerationError("Provided params causing error in calculation results"))
def retreive_attributes_from_message(self): def create_solver_run_from_attributes(self):
logging.info('Retrieving attributes from message...') logging.info('Retrieving attributes from message...')
# get s3 object # get s3 object
self.key = aws_helper.get_key_from_message(self.source) self.key = aws_helper.get_key_from_message(self.source)
@ -36,15 +37,20 @@ class LoftService(Base):
# get attributes file and convert to dict # get attributes file and convert to dict
attributes = json.loads(tar_helper.extract_file_from_tar(self.tar , 'solver_run_attributes.json').read()) attributes = json.loads(tar_helper.extract_file_from_tar(self.tar , 'solver_run_attributes.json').read())
# create solver run
solver_run = SolverRun.parse_obj(attributes)
# get items file and convert to dict # get items file and convert to dict
items_csv = tar_helper.extract_file_from_tar(self.tar , 'items.csv') items_csv = tar_helper.extract_file_from_tar(self.tar , 'items.csv')
items_csv_reader = csv_helper.file_stream_reader(items_csv) items_csv_reader = csv_helper.file_stream_reader(items_csv)
# add items to attributes dict # add items to solver run
attributes['items'] = service_helper.items_csv_to_dict(items_csv_reader) for item in service_helper.items_csv_to_dict(items_csv_reader, solver_run):
solver_run.items.append(Item.parse_obj(item))
logging.info('Processed Attributes...') logging.info('Processed Attributes...')
return attributes return solver_run
def generate_solution(self): def generate_solution(self):
# unsolved solution # unsolved solution
@ -81,11 +87,15 @@ class LoftService(Base):
# multi-objective constraints # multi-objective constraints
for target in self.solver_run.objective_function.tif_targets: for target in self.solver_run.objective_function.tif_targets:
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id] problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value, f'min tif theta ({target.theta}) target value {target.value}' for item in self.solver_run.items]) >= target.value - 5, f'max tif theta ({target.theta}) target value {target.value}'
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value + 5, f'min tif theta ({target.theta}) target value {target.value}'
for target in self.solver_run.objective_function.tcc_targets: for target in self.solver_run.objective_function.tcc_targets:
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id] problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value, f'min tcc theta ({target.theta}) target value {target.value}' for item in self.solver_run.items]) >= target.value - 15, f'max tcc theta ({target.theta}) target value {target.value}'
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value + 15, f'min tcc theta ({target.theta}) target value {target.value}'
# solve problem # solve problem
problem.solve() problem.solve()