tcc and tif drift, compensated for string value casing, csv to dict improved process

2021-12-22 22:39:46 +00:00
parent 13f0f383e0
commit 107abcb73a
7 changed files with 61 additions and 47 deletions
--- a/app/helpers/service_helper.py
+++ b/app/helpers/service_helper.py
@ -2,7 +2,7 @@ import csv
 import io
 import re

-def items_csv_to_dict(items_csv_reader):
+def items_csv_to_dict(items_csv_reader, solver_run):
  items = []
  headers = []

@ -16,21 +16,17 @@ def items_csv_to_dict(items_csv_reader):
      # ensure that the b param is formatted correctly
      if len(re.findall(".", row[len(headers) - 1])) >= 3:
        for key, col in enumerate(headers):
-          if key == 0:
-            item[col] = row[key]
-          if key == 2:
-            # make sure passage id exists
-            if row[key]:
-              item['passage_id'] = row[key]
-          # b param - tmep fix! use irt model b param for proper reference
-          elif key == len(headers) - 1:
+          if solver_run.irt_model.formatted_b_param() == col:
            item['b_param'] = row[key]
-          elif key > 2 and key < len(headers) - 1:
-            item['attributes'].append({
-              'id': col,
-              'value': row[key],
-              'type': 'metadata'
-            })
+          elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle':
+            if row[key]:
+              item[solver_run.get_constraint(col).reference_attribute.id] = row[key]
+          elif solver_run.get_constraint(col):
+            constraint = solver_run.get_constraint(col)
+            item['attributes'].append(constraint.reference_attribute)
+          else:
+            if row[key]:
+              item[col] = row[key]

        items.append(item)

--- a/app/helpers/solver_helper.py
+++ b/app/helpers/solver_helper.py
@ -26,29 +26,29 @@ def build_constraints(solver_run, problem, items):
                        for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
      elif attribute.type == 'bundle':
        # TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
-        total_bundles = randint(constraint.minimum, constraint.maximum)
-        selected_bundles = sample(solver_run.bundles, total_bundles)
-        total_bundle_items = 0
+        if solver_run.bundles != None:
+          total_bundles = randint(constraint.minimum, constraint.maximum)
+          selected_bundles = sample(solver_run.bundles, total_bundles)
+          total_bundle_items = 0

-        for bundle in selected_bundles:
+          for bundle in selected_bundles:
+            con = dict(zip([item.id for item in solver_run.items],
+                        [(getattr(item, bundle.type, False) == bundle.id)
+                        for item in solver_run.items]))
+            problem += lpSum([con[item.id]
+                            * items[item.id]
+                            for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
+            total_bundle_items += bundle.count
+
+          # make sure all other items added to the form
+          # are not a part of any bundle
+          # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
          con = dict(zip([item.id for item in solver_run.items],
-                      [(getattr(item, bundle.type, False) == bundle.id)
-                      for item in solver_run.items]))
+                    [(getattr(item, attribute.id, None) == None)
+                    for item in solver_run.items]))
          problem += lpSum([con[item.id]
                          * items[item.id]
-                          for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
-          total_bundle_items += bundle.count
-
-        # make sure all other items added to the form
-        # are not a part of any bundle
-        # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
-        con = dict(zip([item.id for item in solver_run.items],
-                  [(getattr(item, attribute.id, None) == None)
-                  for item in solver_run.items]))
-        problem += lpSum([con[item.id]
-                        * items[item.id]
-                        for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
-
+                          for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'

    return problem
  except ValueError as error:
--- a/app/main.py
+++ b/app/main.py
@ -19,7 +19,7 @@ class ServiceListener(SqsListener):
    logging.info('Process complete for %s', service.file_name)

 def main():
-  logging.info('Starting Solver Service (v1.1.0)...')
+  logging.info('Starting Solver Service (v1.1.1)...')
  listener = ServiceListener(
    os.environ['SQS_QUEUE'],
    region_name=os.environ['AWS_REGION'],
--- a/app/models/irt_model.py
+++ b/app/models/irt_model.py
@ -6,3 +6,7 @@ class IRTModel(BaseModel):
  b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
  c_param: float
  model: str
+
+
+  def formatted_b_param(self):
+    return self.b_param['schema_bson_id'] + '-' + self.b_param['field_bson_id']
--- a/app/models/item.py
+++ b/app/models/item.py
@ -9,6 +9,7 @@ from lib.irt.item_information_function import ItemInformationFunction
 class Item(BaseModel):
  id: int
  passage_id: Optional[int]
+  workflow_state: Optional[str]
  attributes: List[Attribute]
  b_param: float = 0.00

@ -20,12 +21,12 @@ class Item(BaseModel):

  def get_attribute(self, ref_attribute):
    for attribute in self.attributes:
-      if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value:
+      if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
        return attribute.value
    return False

  def attribute_exists(self, ref_attribute):
    for attribute in self.attributes:
-      if attribute.id == ref_attribute.id and attribute.value == ref_attribute.value:
+      if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
        return True
    return False
--- a/app/models/solver_run.py
+++ b/app/models/solver_run.py
@ -9,7 +9,7 @@ from models.objective_function import ObjectiveFunction
 from models.advanced_options import AdvancedOptions

 class SolverRun(BaseModel):
-  items: List[Item]
+  items: List[Item] = []
  bundles: Optional[Bundle]
  constraints: List[Constraint]
  irt_model: IRTModel
@ -63,3 +63,6 @@ class SolverRun(BaseModel):
              count=1,
              type=type_attribute
            )]
+
+  def get_constraint(self, name):
+    return next((constraint for constraint in self.constraints if constraint.reference_attribute.id == name), None)
--- a/app/services/loft_service.py
+++ b/app/services/loft_service.py
@ -8,13 +8,14 @@ from lib.errors.item_generation_error import ItemGenerationError
 from models.solver_run import SolverRun
 from models.solution import Solution
 from models.form import Form
+from models.item import Item

 from services.base import Base

 class LoftService(Base):
  def process(self):
    try:
-      self.solver_run = SolverRun.parse_obj(self.retreive_attributes_from_message())
+      self.solver_run = self.create_solver_run_from_attributes()
      self.solver_run.generate_bundles()
      self.solution = self.generate_solution()
      self.result = self.stream_to_s3_bucket()
@ -24,7 +25,7 @@ class LoftService(Base):
      logging.error(error)
      self.result = self.stream_to_s3_bucket(ItemGenerationError("Provided params causing error in calculation results"))

-  def retreive_attributes_from_message(self):
+  def create_solver_run_from_attributes(self):
    logging.info('Retrieving attributes from message...')
    # get s3 object
    self.key = aws_helper.get_key_from_message(self.source)
@ -36,15 +37,20 @@ class LoftService(Base):
    # get attributes file and convert to dict
    attributes = json.loads(tar_helper.extract_file_from_tar(self.tar , 'solver_run_attributes.json').read())

+    # create solver run
+    solver_run = SolverRun.parse_obj(attributes)
+
    # get items file and convert to dict
    items_csv = tar_helper.extract_file_from_tar(self.tar , 'items.csv')
    items_csv_reader = csv_helper.file_stream_reader(items_csv)

-    # add items to attributes dict
-    attributes['items'] = service_helper.items_csv_to_dict(items_csv_reader)
+    # add items to solver run
+    for item in service_helper.items_csv_to_dict(items_csv_reader, solver_run):
+      solver_run.items.append(Item.parse_obj(item))
+
    logging.info('Processed Attributes...')

-    return attributes
+    return solver_run

  def generate_solution(self):
    # unsolved solution
@ -80,12 +86,16 @@ class LoftService(Base):

      # multi-objective constraints
      for target in self.solver_run.objective_function.tif_targets:
-          problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
-                         for item in self.solver_run.items]) <= target.value, f'min tif theta ({target.theta}) target value {target.value}'
+        problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
+                         for item in self.solver_run.items]) >= target.value - 5, f'max tif theta ({target.theta}) target value {target.value}'
+        problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
+                         for item in self.solver_run.items]) <= target.value + 5, f'min tif theta ({target.theta}) target value {target.value}'

      for target in self.solver_run.objective_function.tcc_targets:
-          problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
-                         for item in self.solver_run.items]) <= target.value, f'min tcc theta ({target.theta}) target value {target.value}'
+        problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
+                         for item in self.solver_run.items]) >= target.value - 15, f'max tcc theta ({target.theta}) target value {target.value}'
+        problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
+                         for item in self.solver_run.items]) <= target.value + 15, f'min tcc theta ({target.theta}) target value {target.value}'

      # solve problem
      problem.solve()