the big format

2022-02-10 20:29:50 -05:00
parent 19a37ab33a
commit deb6b9014e
25 changed files with 682 additions and 466 deletions
--- a/app/helpers/aws_helper.py
+++ b/app/helpers/aws_helper.py
@ -3,37 +3,34 @@ import os
 import json

 session = boto3.Session(
-  aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
-  aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']
-)
+    aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
+    aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])

 s3 = session.resource('s3', region_name=os.environ['AWS_REGION'])
 sqs = session.client('sqs', region_name=os.environ['AWS_REGION'])

+
 def get_key_from_message(body):
-  return body['Records'][0]['s3']['object']['key']
+    return body['Records'][0]['s3']['object']['key']
+

 def get_bucket_from_message(body):
-  return body['Records'][0]['s3']['bucket']['name']
+    return body['Records'][0]['s3']['bucket']['name']
+

 def get_object(key, bucket):
-  return s3.Object(
-    bucket_name=bucket,
-    key=key
-  ).get()['Body'].read()
+    return s3.Object(bucket_name=bucket, key=key).get()['Body'].read()
+

 def file_stream_upload(buffer, name, bucket):
-  return s3.Bucket(bucket).upload_fileobj(buffer, name)
+    return s3.Bucket(bucket).upload_fileobj(buffer, name)
+

 def receive_message(queue, message_num=1, wait_time=1):
-  return sqs.receive_message(
-    QueueUrl=queue,
-    MaxNumberOfMessages=message_num,
-    WaitTimeSeconds=wait_time
-  )
+    return sqs.receive_message(QueueUrl=queue,
+                               MaxNumberOfMessages=message_num,
+                               WaitTimeSeconds=wait_time)
+

 def delete_message(queue, receipt):
-  return sqs.delete_message(
-    QueueUrl=queue,
-    ReceiptHandle=receipt
-  )
+    return sqs.delete_message(QueueUrl=queue, ReceiptHandle=receipt)
--- a/app/helpers/csv_helper.py
+++ b/app/helpers/csv_helper.py
@ -1,5 +1,6 @@
 import csv
 import io

+
 def file_stream_reader(f):
-  return csv.reader(io.StringIO(f.read().decode('ascii')))
+    return csv.reader(io.StringIO(f.read().decode('ascii')))
--- a/app/helpers/service_helper.py
+++ b/app/helpers/service_helper.py
@ -3,98 +3,120 @@ import io
 import re
 from tokenize import String

+
 def items_csv_to_dict(items_csv_reader, solver_run):
-  items = []
-  headers = []
+    items = []
+    headers = []

-  # get headers and items
-  for key, row in enumerate(items_csv_reader):
-    if key == 0:
-      headers = row
-    else:
-      item = { 'attributes': [] }
+    # get headers and items
+    for key, row in enumerate(items_csv_reader):
+        if key == 0:
+            headers = row
+        else:
+            item = {'attributes': []}

-      # ensure that the b param is formatted correctly
-      if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]):
-        for key, col in enumerate(headers):
-          if solver_run.irt_model.formatted_b_param() == col:
-            value = float(row[key])
-            item['b_param'] = value
-          elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle':
-            if row[key]:
-              item[solver_run.get_constraint(col).reference_attribute.id] = row[key]
-          elif solver_run.get_constraint(col):
-            constraint = solver_run.get_constraint(col)
-            item['attributes'].append({
-              'id': col,
-              'value': row[key],
-              'type': constraint.reference_attribute.type
-            })
-          else:
-            if row[key]:
-              item[col] = row[key]
+            # ensure that the b param is formatted correctly
+            if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]):
+                for key, col in enumerate(headers):
+                    if solver_run.irt_model.formatted_b_param() == col:
+                        value = float(row[key])
+                        item['b_param'] = value
+                    elif solver_run.get_constraint(
+                            col) and solver_run.get_constraint(
+                                col).reference_attribute.type == 'bundle':
+                        if row[key]:
+                            item[solver_run.get_constraint(
+                                col).reference_attribute.id] = row[key]
+                    elif solver_run.get_constraint(col):
+                        constraint = solver_run.get_constraint(col)
+                        item['attributes'].append({
+                            'id':
+                            col,
+                            'value':
+                            row[key],
+                            'type':
+                            constraint.reference_attribute.type
+                        })
+                    else:
+                        if row[key]:
+                            item[col] = row[key]

-        items.append(item)
+                items.append(item)
+
+    return items

-  return items

 def solution_to_file(buffer, total_form_items, forms):
-  wr = csv.writer(buffer, dialect='excel', delimiter=',')
+    wr = csv.writer(buffer, dialect='excel', delimiter=',')

-  # write header row for first row utilizing the total items all forms will have
-  # fill the rows with the targets and cut score then the items
-  header = ['status']
+    # write header row for first row utilizing the total items all forms will have
+    # fill the rows with the targets and cut score then the items
+    header = ['status']

-  for result in forms[0].tif_results:
-    header += [f'tif @ {round(result.theta, 2)}']
+    for result in forms[0].tif_results:
+        header += [f'tif @ {round(result.theta, 2)}']

-  for result in forms[0].tcc_results:
-    header += [f'tcc @ {round(result.theta, 2)}']
+    for result in forms[0].tcc_results:
+        header += [f'tcc @ {round(result.theta, 2)}']

-  header += ['cut score'] + [x + 1 for x in range(total_form_items)]
-  wr.writerow(header)
+    header += ['cut score'] + [x + 1 for x in range(total_form_items)]
+    wr.writerow(header)

-  # add each form as row to processed csv
-  for form in forms:
-    row = [form.status]
+    # add each form as row to processed csv
+    for form in forms:
+        row = [form.status]

-    for result in form.tif_results + form.tcc_results:
-      row += [f'target - {result.value}\nresult - {round(result.result, 2)}']
+        for result in form.tif_results + form.tcc_results:
+            row += [
+                f'target - {result.value}\nresult - {round(result.result, 2)}'
+            ]

-    # provide generated items and cut score
-    row += [round(form.cut_score, 2)] + [item.id for item in form.items]
-    wr.writerow(row)
+        # provide generated items and cut score
+        row += [round(form.cut_score, 2)] + [item.id for item in form.items]
+        wr.writerow(row)

-  buff2 = io.BytesIO(buffer.getvalue().encode())
+    buff2 = io.BytesIO(buffer.getvalue().encode())
+
+    return buff2

-  return buff2

 def error_to_file(buffer, error):
-  wr = csv.writer(buffer, dialect='excel', delimiter=',')
-  wr.writerow(['status'])
-  wr.writerow([error.args[0]])
+    wr = csv.writer(buffer, dialect='excel', delimiter=',')
+    wr.writerow(['status'])
+    wr.writerow([error.args[0]])
+
+    return io.BytesIO(buffer.getvalue().encode())

-  return io.BytesIO(buffer.getvalue().encode())

 def key_to_uuid(key):
-  return re.split("_", key)[0]
+    return re.split("_", key)[0]
+

 def solution_items(variables, solver_run):
-  form_items = []
+    form_items = []

-  for v in variables:
-    if v.varValue > 0 and 'Item_' in v.name:
-      item_id = v.name.replace('Item_', '')
-      item = solver_run.get_item(item_id)
-      # add item to list and then remove from master item list
-      form_items.append(item)
+    for v in variables:
+        if v.varValue > 0:
+            if 'Item_' in v.name:
+                item_id = v.name.replace('Item_', '')
+                item = solver_run.get_item(item_id)
+                # add item to list and then remove from master item list
+                if item: form_items.append(item)
+            elif 'Bundle_' in v.name:
+                bundle_id = v.name.replace('Bundle_', '')
+                bundle = solver_run.get_bundle(bundle_id)
+
+                if bundle:
+                    for item in bundle.items:
+                        if item: form_items.append(item)
+
+    return form_items

-  return form_items

 # probably a better place for this...
 def is_float(element: String) -> bool:
-  try:
-    float(element)
-    return True
-  except ValueError:
-    return False
+    try:
+        float(element)
+        return True
+    except ValueError:
+        return False
--- a/app/helpers/solver_helper.py
+++ b/app/helpers/solver_helper.py
@ -9,75 +9,95 @@ from models.item import Item

 from lib.errors.item_generation_error import ItemGenerationError

-def build_constraints(solver_run: SolverRun, problem: LpProblem, items: list[Item]) -> LpProblem:
-  logging.info('Creating Constraints...')

-  try:
-    total_form_items = solver_run.total_form_items
-    constraints = solver_run.constraints
+def build_constraints(solver_run: SolverRun, problem: LpProblem,
+                      items: list[Item], bundles: list[Bundle]) -> LpProblem:
+    logging.info('Creating Constraints...')

-    for constraint in constraints:
-      attribute = constraint.reference_attribute
-      min = constraint.minimum
-      max = constraint.maximum
+    try:
+        total_form_items = solver_run.total_form_items
+        constraints = solver_run.constraints

-      if attribute.type == 'metadata':
-        logging.info('Metadata Constraint Generating...')
-        con = dict(zip([item.id for item in solver_run.items],
-                      [item.attribute_exists(attribute)
-                      for item in solver_run.items]))
-        problem += lpSum([con[item.id]
-                        * items[item.id]
-                        for item in solver_run.items]) >= round(total_form_items * (min / 100)), f'{attribute.id} - {attribute.value} - min'
-        problem += lpSum([con[item.id]
-                        * items[item.id]
-                        for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
-      elif attribute.type == 'bundle':
-        logging.info('Bundles Constraint Generating...')
-        # TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
-        if solver_run.bundles != None:
-          total_bundle_items = 0
-          selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
+        for constraint in constraints:
+            attribute = constraint.reference_attribute
+            min = constraint.minimum
+            max = constraint.maximum

-          for bundle in selected_bundles:
-            con = dict(zip([item.id for item in solver_run.items],
-                        [(getattr(item, bundle.type, False) == bundle.id)
-                        for item in solver_run.items]))
-            problem += lpSum([con[item.id]
-                            * items[item.id]
-                            for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
-            total_bundle_items += bundle.count
+            if attribute.type == 'metadata':
+                logging.info('Metadata Constraint Generating...')
+                con = dict(
+                    zip([item.id for item in solver_run.items], [
+                        item.attribute_exists(attribute)
+                        for item in solver_run.items
+                    ]))
+                problem += lpSum([
+                    con[item.id] * items[item.id] for item in solver_run.items
+                ]) >= round(
+                    total_form_items *
+                    (min / 100)), f'{attribute.id} - {attribute.value} - min'
+                problem += lpSum([
+                    con[item.id] * items[item.id] for item in solver_run.items
+                ]) <= round(
+                    total_form_items *
+                    (max / 100)), f'{attribute.id} - {attribute.value} - max'
+            elif attribute.type == 'bundle':
+                logging.info('Bundles Constraint Generating...')
+                # TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
+                if solver_run.bundles != None:
+                    # make sure the total bundles used in generated form is limited between min-max set
+                    problem += lpSum([
+                        bundles[bundle.id] for bundle in solver_run.bundles
+                    ]) == randint(int(constraint.minimum),
+                                  int(constraint.maximum))
+                    # total_bundle_items = 0
+                    # selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))

-          # make sure all other items added to the form
-          # are not a part of any bundle
-          # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
-          con = dict(zip([item.id for item in solver_run.items],
-                    [(getattr(item, attribute.id, None) == None)
-                    for item in solver_run.items]))
-          problem += lpSum([con[item.id]
-                          * items[item.id]
-                          for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
+                    # for bundle in selected_bundles:
+                    #   con = dict(zip([item.id for item in solver_run.items],
+                    #               [(getattr(item, bundle.type, False) == bundle.id)
+                    #               for item in solver_run.items]))
+                    #   problem += lpSum([con[item.id]
+                    #                   * items[item.id]
+                    #                   for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
+                    #   total_bundle_items += bundle.count

-    logging.info('Constraints Created...')
-    return problem
-  except ValueError as error:
-    logging.error(error)
-    raise ItemGenerationError("Bundle min and/or max larger than bundle amount provided", error.args[0])
+                    # # make sure all other items added to the form
+                    # # are not a part of any bundle
+                    # # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
+                    # con = dict(zip([item.id for item in solver_run.items],
+                    #           [(getattr(item, attribute.id, None) == None)
+                    #           for item in solver_run.items]))
+                    # problem += lpSum([con[item.id]
+                    #                 * items[item.id]
+                    #                 for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'

-def get_random_bundles(total_form_items: int, bundles: list[Bundle], min: int , max: int, found_bundles = False) -> list[Bundle]:
-  selected_bundles = None
-  total_bundle_items = 0
-  total_bundles = randint(min, max)
-  logging.info(f'Selecting Bundles (total of {total_bundles})...')
+        logging.info('Constraints Created...')
+        return problem
+    except ValueError as error:
+        logging.error(error)
+        raise ItemGenerationError(
+            "Bundle min and/or max larger than bundle amount provided",
+            error.args[0])

-  while found_bundles == False:
-    selected_bundles = sample(bundles, total_bundles)
-    total_bundle_items = sum(bundle.count for bundle in selected_bundles)

-    if total_bundle_items <= total_form_items:
-      found_bundles = True
+def get_random_bundles(total_form_items: int,
+                       bundles: list[Bundle],
+                       min: int,
+                       max: int,
+                       found_bundles=False) -> list[Bundle]:
+    selected_bundles = None
+    total_bundle_items = 0
+    total_bundles = randint(min, max)
+    logging.info(f'Selecting Bundles (total of {total_bundles})...')

-  if found_bundles == True:
-    return selected_bundles
-  else:
-    return get_random_bundles(total_form_items, total_bundles - 1, bundles)
+    while found_bundles == False:
+        selected_bundles = sample(bundles, total_bundles)
+        total_bundle_items = sum(bundle.count for bundle in selected_bundles)
+
+        if total_bundle_items <= total_form_items:
+            found_bundles = True
+
+    if found_bundles == True:
+        return selected_bundles
+    else:
+        return get_random_bundles(total_form_items, total_bundles - 1, bundles)
--- a/app/helpers/tar_helper.py
+++ b/app/helpers/tar_helper.py
@ -1,9 +1,11 @@
 import io
 import tarfile

+
 def raw_to_tar(raw_object):
-  tarball = io.BytesIO(raw_object)
-  return tarfile.open(fileobj=tarball, mode='r:gz')
+    tarball = io.BytesIO(raw_object)
+    return tarfile.open(fileobj=tarball, mode='r:gz')
+

 def extract_file_from_tar(tar, file_name):
-  return tar.extractfile(tar.getmember(file_name))
+    return tar.extractfile(tar.getmember(file_name))