the big format

This commit is contained in:
Joshua Burman 2022-02-10 20:29:50 -05:00
parent 19a37ab33a
commit deb6b9014e
25 changed files with 682 additions and 466 deletions

View File

@ -4,36 +4,33 @@ import json
session = boto3.Session( session = boto3.Session(
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'] aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
)
s3 = session.resource('s3', region_name=os.environ['AWS_REGION']) s3 = session.resource('s3', region_name=os.environ['AWS_REGION'])
sqs = session.client('sqs', region_name=os.environ['AWS_REGION']) sqs = session.client('sqs', region_name=os.environ['AWS_REGION'])
def get_key_from_message(body): def get_key_from_message(body):
return body['Records'][0]['s3']['object']['key'] return body['Records'][0]['s3']['object']['key']
def get_bucket_from_message(body): def get_bucket_from_message(body):
return body['Records'][0]['s3']['bucket']['name'] return body['Records'][0]['s3']['bucket']['name']
def get_object(key, bucket): def get_object(key, bucket):
return s3.Object( return s3.Object(bucket_name=bucket, key=key).get()['Body'].read()
bucket_name=bucket,
key=key
).get()['Body'].read()
def file_stream_upload(buffer, name, bucket): def file_stream_upload(buffer, name, bucket):
return s3.Bucket(bucket).upload_fileobj(buffer, name) return s3.Bucket(bucket).upload_fileobj(buffer, name)
def receive_message(queue, message_num=1, wait_time=1): def receive_message(queue, message_num=1, wait_time=1):
return sqs.receive_message( return sqs.receive_message(QueueUrl=queue,
QueueUrl=queue,
MaxNumberOfMessages=message_num, MaxNumberOfMessages=message_num,
WaitTimeSeconds=wait_time WaitTimeSeconds=wait_time)
)
def delete_message(queue, receipt): def delete_message(queue, receipt):
return sqs.delete_message( return sqs.delete_message(QueueUrl=queue, ReceiptHandle=receipt)
QueueUrl=queue,
ReceiptHandle=receipt
)

View File

@ -1,5 +1,6 @@
import csv import csv
import io import io
def file_stream_reader(f): def file_stream_reader(f):
return csv.reader(io.StringIO(f.read().decode('ascii'))) return csv.reader(io.StringIO(f.read().decode('ascii')))

View File

@ -3,6 +3,7 @@ import io
import re import re
from tokenize import String from tokenize import String
def items_csv_to_dict(items_csv_reader, solver_run): def items_csv_to_dict(items_csv_reader, solver_run):
items = [] items = []
headers = [] headers = []
@ -20,15 +21,21 @@ def items_csv_to_dict(items_csv_reader, solver_run):
if solver_run.irt_model.formatted_b_param() == col: if solver_run.irt_model.formatted_b_param() == col:
value = float(row[key]) value = float(row[key])
item['b_param'] = value item['b_param'] = value
elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle': elif solver_run.get_constraint(
col) and solver_run.get_constraint(
col).reference_attribute.type == 'bundle':
if row[key]: if row[key]:
item[solver_run.get_constraint(col).reference_attribute.id] = row[key] item[solver_run.get_constraint(
col).reference_attribute.id] = row[key]
elif solver_run.get_constraint(col): elif solver_run.get_constraint(col):
constraint = solver_run.get_constraint(col) constraint = solver_run.get_constraint(col)
item['attributes'].append({ item['attributes'].append({
'id': col, 'id':
'value': row[key], col,
'type': constraint.reference_attribute.type 'value':
row[key],
'type':
constraint.reference_attribute.type
}) })
else: else:
if row[key]: if row[key]:
@ -38,6 +45,7 @@ def items_csv_to_dict(items_csv_reader, solver_run):
return items return items
def solution_to_file(buffer, total_form_items, forms): def solution_to_file(buffer, total_form_items, forms):
wr = csv.writer(buffer, dialect='excel', delimiter=',') wr = csv.writer(buffer, dialect='excel', delimiter=',')
@ -59,7 +67,9 @@ def solution_to_file(buffer, total_form_items, forms):
row = [form.status] row = [form.status]
for result in form.tif_results + form.tcc_results: for result in form.tif_results + form.tcc_results:
row += [f'target - {result.value}\nresult - {round(result.result, 2)}'] row += [
f'target - {result.value}\nresult - {round(result.result, 2)}'
]
# provide generated items and cut score # provide generated items and cut score
row += [round(form.cut_score, 2)] + [item.id for item in form.items] row += [round(form.cut_score, 2)] + [item.id for item in form.items]
@ -69,6 +79,7 @@ def solution_to_file(buffer, total_form_items, forms):
return buff2 return buff2
def error_to_file(buffer, error): def error_to_file(buffer, error):
wr = csv.writer(buffer, dialect='excel', delimiter=',') wr = csv.writer(buffer, dialect='excel', delimiter=',')
wr.writerow(['status']) wr.writerow(['status'])
@ -76,21 +87,32 @@ def error_to_file(buffer, error):
return io.BytesIO(buffer.getvalue().encode()) return io.BytesIO(buffer.getvalue().encode())
def key_to_uuid(key): def key_to_uuid(key):
return re.split("_", key)[0] return re.split("_", key)[0]
def solution_items(variables, solver_run): def solution_items(variables, solver_run):
form_items = [] form_items = []
for v in variables: for v in variables:
if v.varValue > 0 and 'Item_' in v.name: if v.varValue > 0:
if 'Item_' in v.name:
item_id = v.name.replace('Item_', '') item_id = v.name.replace('Item_', '')
item = solver_run.get_item(item_id) item = solver_run.get_item(item_id)
# add item to list and then remove from master item list # add item to list and then remove from master item list
form_items.append(item) if item: form_items.append(item)
elif 'Bundle_' in v.name:
bundle_id = v.name.replace('Bundle_', '')
bundle = solver_run.get_bundle(bundle_id)
if bundle:
for item in bundle.items:
if item: form_items.append(item)
return form_items return form_items
# probably a better place for this... # probably a better place for this...
def is_float(element: String) -> bool: def is_float(element: String) -> bool:
try: try:

View File

@ -9,7 +9,9 @@ from models.item import Item
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
def build_constraints(solver_run: SolverRun, problem: LpProblem, items: list[Item]) -> LpProblem:
def build_constraints(solver_run: SolverRun, problem: LpProblem,
items: list[Item], bundles: list[Bundle]) -> LpProblem:
logging.info('Creating Constraints...') logging.info('Creating Constraints...')
try: try:
@ -23,48 +25,66 @@ def build_constraints(solver_run: SolverRun, problem: LpProblem, items: list[Ite
if attribute.type == 'metadata': if attribute.type == 'metadata':
logging.info('Metadata Constraint Generating...') logging.info('Metadata Constraint Generating...')
con = dict(zip([item.id for item in solver_run.items], con = dict(
[item.attribute_exists(attribute) zip([item.id for item in solver_run.items], [
for item in solver_run.items])) item.attribute_exists(attribute)
problem += lpSum([con[item.id] for item in solver_run.items
* items[item.id] ]))
for item in solver_run.items]) >= round(total_form_items * (min / 100)), f'{attribute.id} - {attribute.value} - min' problem += lpSum([
problem += lpSum([con[item.id] con[item.id] * items[item.id] for item in solver_run.items
* items[item.id] ]) >= round(
for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max' total_form_items *
(min / 100)), f'{attribute.id} - {attribute.value} - min'
problem += lpSum([
con[item.id] * items[item.id] for item in solver_run.items
]) <= round(
total_form_items *
(max / 100)), f'{attribute.id} - {attribute.value} - max'
elif attribute.type == 'bundle': elif attribute.type == 'bundle':
logging.info('Bundles Constraint Generating...') logging.info('Bundles Constraint Generating...')
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates # TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
if solver_run.bundles != None: if solver_run.bundles != None:
total_bundle_items = 0 # make sure the total bundles used in generated form is limited between min-max set
selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum)) problem += lpSum([
bundles[bundle.id] for bundle in solver_run.bundles
]) == randint(int(constraint.minimum),
int(constraint.maximum))
# total_bundle_items = 0
# selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
for bundle in selected_bundles: # for bundle in selected_bundles:
con = dict(zip([item.id for item in solver_run.items], # con = dict(zip([item.id for item in solver_run.items],
[(getattr(item, bundle.type, False) == bundle.id) # [(getattr(item, bundle.type, False) == bundle.id)
for item in solver_run.items])) # for item in solver_run.items]))
problem += lpSum([con[item.id] # problem += lpSum([con[item.id]
* items[item.id] # * items[item.id]
for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})' # for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
total_bundle_items += bundle.count # total_bundle_items += bundle.count
# make sure all other items added to the form # # make sure all other items added to the form
# are not a part of any bundle # # are not a part of any bundle
# currently only supports single bundle constraints, will need refactoring for multiple bundle constraints # # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
con = dict(zip([item.id for item in solver_run.items], # con = dict(zip([item.id for item in solver_run.items],
[(getattr(item, attribute.id, None) == None) # [(getattr(item, attribute.id, None) == None)
for item in solver_run.items])) # for item in solver_run.items]))
problem += lpSum([con[item.id] # problem += lpSum([con[item.id]
* items[item.id] # * items[item.id]
for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type' # for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
logging.info('Constraints Created...') logging.info('Constraints Created...')
return problem return problem
except ValueError as error: except ValueError as error:
logging.error(error) logging.error(error)
raise ItemGenerationError("Bundle min and/or max larger than bundle amount provided", error.args[0]) raise ItemGenerationError(
"Bundle min and/or max larger than bundle amount provided",
error.args[0])
def get_random_bundles(total_form_items: int, bundles: list[Bundle], min: int , max: int, found_bundles = False) -> list[Bundle]:
def get_random_bundles(total_form_items: int,
bundles: list[Bundle],
min: int,
max: int,
found_bundles=False) -> list[Bundle]:
selected_bundles = None selected_bundles = None
total_bundle_items = 0 total_bundle_items = 0
total_bundles = randint(min, max) total_bundles = randint(min, max)

View File

@ -1,9 +1,11 @@
import io import io
import tarfile import tarfile
def raw_to_tar(raw_object): def raw_to_tar(raw_object):
tarball = io.BytesIO(raw_object) tarball = io.BytesIO(raw_object)
return tarfile.open(fileobj=tarball, mode='r:gz') return tarfile.open(fileobj=tarball, mode='r:gz')
def extract_file_from_tar(tar, file_name): def extract_file_from_tar(tar, file_name):
return tar.extractfile(tar.getmember(file_name)) return tar.extractfile(tar.getmember(file_name))

View File

@ -3,7 +3,9 @@ import logging
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
class ItemInformationFunction(): class ItemInformationFunction():
def __init__(self, irt_model): def __init__(self, irt_model):
self.model_data = irt_model self.model_data = irt_model
@ -15,10 +17,14 @@ class ItemInformationFunction():
if self.model_data.model == '3PL': if self.model_data.model == '3PL':
p = ThreeParameterLogistic(self.model_data, kwargs).result() p = ThreeParameterLogistic(self.model_data, kwargs).result()
q = 1 - p q = 1 - p
return (self.model_data.a_param * q * (p - self.model_data.c_param)**2) / (p * ((1 - self.model_data.c_param)**2)) return (self.model_data.a_param * q *
(p - self.model_data.c_param)**2) / (p * (
(1 - self.model_data.c_param)**2))
else: else:
# potentially error out # potentially error out
raise ItemGenerationError("irt model not supported or provided") raise ItemGenerationError(
"irt model not supported or provided")
except ZeroDivisionError as error: except ZeroDivisionError as error:
logging.error(error) logging.error(error)
raise ItemGenerationError("params not well formatted", error.args[0]) raise ItemGenerationError("params not well formatted",
error.args[0])

View File

@ -1,7 +1,9 @@
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
class ItemResponseFunction(): class ItemResponseFunction():
def __init__(self, irt_model): def __init__(self, irt_model):
self.model_data = irt_model self.model_data = irt_model

View File

@ -1,4 +1,5 @@
class ThreeParameterLogistic: class ThreeParameterLogistic:
def __init__(self, model_params, kwargs): def __init__(self, model_params, kwargs):
self.model_params = model_params self.model_params = model_params
# check if exists, if not error out # check if exists, if not error out
@ -13,4 +14,5 @@ class ThreeParameterLogistic:
def result(self): def result(self):
a = self.model_params.a_param a = self.model_params.a_param
c = self.model_params.c_param c = self.model_params.c_param
return c + (1 - c) * (1 / (1 + self.e**(-a * (self.theta - self.b_param)))) return c + (1 - c) * (1 / (1 + self.e**(-a *
(self.theta - self.b_param))))

View File

@ -1,6 +1,8 @@
from lib.irt.item_information_function import ItemInformationFunction from lib.irt.item_information_function import ItemInformationFunction
class TestInformationFunction(): class TestInformationFunction():
def __init__(self, irt_model): def __init__(self, irt_model):
self.irt_model = irt_model self.irt_model = irt_model
self.iif = ItemInformationFunction(irt_model) self.iif = ItemInformationFunction(irt_model)
@ -13,7 +15,8 @@ class TestInformationFunction():
sum = 0 sum = 0
for item in items: for item in items:
result = self.iif.calculate(b_param=item.b_param, theta=kwargs['theta']) result = self.iif.calculate(b_param=item.b_param,
theta=kwargs['theta'])
sum += result sum += result
return sum return sum

View File

@ -1,7 +1,9 @@
from lib.irt.item_response_function import ItemResponseFunction from lib.irt.item_response_function import ItemResponseFunction
# otherwise known as the Test Characteristic Curve (TCC) # otherwise known as the Test Characteristic Curve (TCC)
class TestResponseFunction(): class TestResponseFunction():
def __init__(self, irt_model): def __init__(self, irt_model):
self.irt_model = irt_model self.irt_model = irt_model
self.irf = ItemResponseFunction(irt_model) self.irf = ItemResponseFunction(irt_model)
@ -14,7 +16,8 @@ class TestResponseFunction():
sum = 0 sum = 0
for item in items: for item in items:
result = self.irf.calculate(b_param=item.b_param, theta=kwargs['theta']) result = self.irf.calculate(b_param=item.b_param,
theta=kwargs['theta'])
sum += result sum += result
return sum return sum

View File

@ -6,9 +6,13 @@ from helpers import aws_helper
from daemonize import Daemonize from daemonize import Daemonize
from sqs_listener import SqsListener from sqs_listener import SqsListener
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s %(asctime)s - %(message)s") logging.basicConfig(stream=sys.stdout,
level=logging.INFO,
format="%(levelname)s %(asctime)s - %(message)s")
class ServiceListener(SqsListener): class ServiceListener(SqsListener):
def handle_message(self, body, attributes, messages_attributes): def handle_message(self, body, attributes, messages_attributes):
# gather/manage/process data based on the particular needs # gather/manage/process data based on the particular needs
logging.info('Incoming message: %s', body) logging.info('Incoming message: %s', body)
@ -18,6 +22,7 @@ class ServiceListener(SqsListener):
logging.info('Process complete for %s', service.file_name) logging.info('Process complete for %s', service.file_name)
def main(): def main():
logging.info('Starting Solver Service (v1.1.2)...') logging.info('Starting Solver Service (v1.1.2)...')
listener = ServiceListener( listener = ServiceListener(
@ -25,10 +30,10 @@ def main():
region_name=os.environ['AWS_REGION'], region_name=os.environ['AWS_REGION'],
aws_access_key=os.environ['AWS_ACCESS_KEY_ID'], aws_access_key=os.environ['AWS_ACCESS_KEY_ID'],
aws_secret_key=os.environ['AWS_SECRET_ACCESS_KEY'], aws_secret_key=os.environ['AWS_SECRET_ACCESS_KEY'],
queue_url=os.environ['SQS_QUEUE'] queue_url=os.environ['SQS_QUEUE'])
)
listener.listen() listener.listen()
if __name__ == '__main__': if __name__ == '__main__':
myname = os.path.basename(sys.argv[0]) myname = os.path.basename(sys.argv[0])
pidfile = '/tmp/%s' % myname pidfile = '/tmp/%s' % myname

View File

@ -1,6 +1,7 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import List, Optional, Dict from typing import List, Optional, Dict
class AdvancedOptions(BaseModel): class AdvancedOptions(BaseModel):
linearity_check: Optional[bool] linearity_check: Optional[bool]
show_progress: Optional[bool] show_progress: Optional[bool]

View File

@ -1,6 +1,7 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional from typing import Optional
class Attribute(BaseModel): class Attribute(BaseModel):
value: Optional[str] value: Optional[str]
type: Optional[str] type: Optional[str]

View File

@ -1,6 +1,23 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import List
from lib.irt.test_information_function import TestInformationFunction
from lib.irt.test_response_function import TestResponseFunction
from models.item import Item
from models.irt_model import IRTModel
class Bundle(BaseModel): class Bundle(BaseModel):
id: int id: int
count: int count: int
items: List[Item]
type: str type: str
def tif(self, irt_model: IRTModel, theta: float) -> float:
return TestInformationFunction(irt_model).calculate(self.items,
theta=theta)
def trf(self, irt_model: IRTModel, theta: float) -> float:
return TestResponseFunction(irt_model).calculate(self.items,
theta=theta)

View File

@ -2,6 +2,7 @@ from pydantic import BaseModel
from models.attribute import Attribute from models.attribute import Attribute
class Constraint(BaseModel): class Constraint(BaseModel):
reference_attribute: Attribute reference_attribute: Attribute
minimum: float minimum: float

View File

@ -8,6 +8,7 @@ from models.target import Target
from lib.irt.test_response_function import TestResponseFunction from lib.irt.test_response_function import TestResponseFunction
class Form(BaseModel): class Form(BaseModel):
items: List[Item] items: List[Item]
cut_score: float cut_score: float
@ -19,8 +20,8 @@ class Form(BaseModel):
def create(cls, items, solver_run, status): def create(cls, items, solver_run, status):
return cls( return cls(
items=items, items=items,
cut_score=TestResponseFunction(solver_run.irt_model).calculate(items, theta=solver_run.theta_cut_score), cut_score=TestResponseFunction(solver_run.irt_model).calculate(
items, theta=solver_run.theta_cut_score),
tif_results=irt_helper.generate_tif_results(items, solver_run), tif_results=irt_helper.generate_tif_results(items, solver_run),
tcc_results=irt_helper.generate_tcc_results(items, solver_run), tcc_results=irt_helper.generate_tcc_results(items, solver_run),
status=status status=status)
)

View File

@ -1,12 +1,13 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Dict from typing import Dict
class IRTModel(BaseModel): class IRTModel(BaseModel):
a_param: float a_param: float
b_param: Dict = {"schema_bson_id": str, "field_bson_id": str} b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
c_param: float c_param: float
model: str model: str
def formatted_b_param(self): def formatted_b_param(self):
return self.b_param['schema_bson_id'] + '-' + self.b_param['field_bson_id'] return self.b_param['schema_bson_id'] + '-' + self.b_param[
'field_bson_id']

View File

@ -6,6 +6,7 @@ from models.attribute import Attribute
from lib.irt.item_response_function import ItemResponseFunction from lib.irt.item_response_function import ItemResponseFunction
from lib.irt.item_information_function import ItemInformationFunction from lib.irt.item_information_function import ItemInformationFunction
class Item(BaseModel): class Item(BaseModel):
id: int id: int
passage_id: Optional[int] passage_id: Optional[int]
@ -14,19 +15,23 @@ class Item(BaseModel):
b_param: float = 0.00 b_param: float = 0.00
def iif(self, solver_run, theta): def iif(self, solver_run, theta):
return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta) return ItemInformationFunction(solver_run.irt_model).calculate(
b_param=self.b_param, theta=theta)
def irf(self, solver_run, theta): def irf(self, solver_run, theta):
return ItemResponseFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta) return ItemResponseFunction(solver_run.irt_model).calculate(
b_param=self.b_param, theta=theta)
def get_attribute(self, ref_attribute): def get_attribute(self, ref_attribute):
for attribute in self.attributes: for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower(): if attribute.id == ref_attribute.id and attribute.value.lower(
) == ref_attribute.value.lower():
return attribute.value return attribute.value
return False return False
def attribute_exists(self, ref_attribute): def attribute_exists(self, ref_attribute):
for attribute in self.attributes: for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower(): if attribute.id == ref_attribute.id and attribute.value.lower(
) == ref_attribute.value.lower():
return True return True
return False return False

View File

@ -3,6 +3,7 @@ from typing import Dict, List, AnyStr
from models.target import Target from models.target import Target
class ObjectiveFunction(BaseModel): class ObjectiveFunction(BaseModel):
# minimizing tif/tcc target value is only option currently # minimizing tif/tcc target value is only option currently
# as we add more we can build this out to be more dynamic # as we add more we can build this out to be more dynamic

View File

@ -3,6 +3,7 @@ from typing import List
from models.form import Form from models.form import Form
class Solution(BaseModel): class Solution(BaseModel):
response_id: int response_id: int
forms: List[Form] forms: List[Form]

View File

@ -10,9 +10,10 @@ from models.bundle import Bundle
from models.objective_function import ObjectiveFunction from models.objective_function import ObjectiveFunction
from models.advanced_options import AdvancedOptions from models.advanced_options import AdvancedOptions
class SolverRun(BaseModel): class SolverRun(BaseModel):
items: List[Item] = [] items: List[Item] = []
bundles: Optional[Bundle] bundles: list[Bundle] = []
constraints: List[Constraint] constraints: List[Constraint]
irt_model: IRTModel irt_model: IRTModel
objective_function: ObjectiveFunction objective_function: ObjectiveFunction
@ -22,19 +23,31 @@ class SolverRun(BaseModel):
advanced_options: Optional[AdvancedOptions] advanced_options: Optional[AdvancedOptions]
engine: str engine: str
def get_item(self, item_id): def get_item(self, item_id: int) -> Item or None:
for item in self.items: for item in self.items:
if str(item.id) == item_id: if str(item.id) == item_id:
return item return item
return False
def remove_items(self, items): def get_bundle(self, bundle_id: int) -> Bundle or None:
for bundle in self.bundles:
if str(bundle.id) == bundle_id:
return bundle
def get_constraint_by_type(self, type: str) -> Constraint or None:
for constraint in self.constraints:
if type == constraint.reference_attribute.type:
return constraint
def remove_items(self, items: list[Item]) -> bool:
self.items = [item for item in self.items if item not in items] self.items = [item for item in self.items if item not in items]
return True return True
def generate_bundles(self): def generate_bundles(self):
logging.info('Generating Bundles...') logging.info('Generating Bundles...')
bundle_constraints = (constraint.reference_attribute for constraint in self.constraints if constraint.reference_attribute.type == 'bundle') # confirms bundle constraints exists
bundle_constraints = (
constraint.reference_attribute for constraint in self.constraints
if constraint.reference_attribute.type == 'bundle')
for bundle_constraint in bundle_constraints: for bundle_constraint in bundle_constraints:
type_attribute = bundle_constraint.id type_attribute = bundle_constraint.id
@ -48,26 +61,36 @@ class SolverRun(BaseModel):
# else create array with new bundle # else create array with new bundle
if self.bundles != None: if self.bundles != None:
# get index of the bundle in the bundles list if exists or None if it doesn't # get index of the bundle in the bundles list if exists or None if it doesn't
bundle_index = next((index for (index, bundle) in enumerate(self.bundles) if bundle.id == attribute_id and bundle.type == type_attribute), None) bundle_index = next(
(index
for (index, bundle) in enumerate(self.bundles)
if bundle.id == attribute_id
and bundle.type == type_attribute), None)
# if the index doesn't exist add the new bundle of whatever type # if the index doesn't exist add the new bundle of whatever type
# else increment the count of the current bundle # else increment the count of the current bundle
if bundle_index == None: if bundle_index == None:
self.bundles.append(Bundle( self.bundles.append(
id=attribute_id, Bundle(id=attribute_id,
count=1, count=1,
type=type_attribute items=[item],
)) type=type_attribute))
else: else:
self.bundles[bundle_index].count += 1 self.bundles[bundle_index].count += 1
self.bundles[bundle_index].items.append(item)
else: else:
self.bundles = [Bundle( self.bundles = [
id=attribute_id, Bundle(id=attribute_id,
count=1, count=1,
type=type_attribute items=[item],
)] type=type_attribute)
]
logging.info('Bundles Generated...') logging.info('Bundles Generated...')
def get_constraint(self, name): def get_constraint(self, name: str) -> Constraint:
return next((constraint for constraint in self.constraints if constraint.reference_attribute.id == name), None) return next((constraint for constraint in self.constraints
if constraint.reference_attribute.id == name), None)
def unbundled_items(self) -> list:
return [item for item in self.items if item.passage_id == None]

View File

@ -1,6 +1,7 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional from typing import Optional
class Target(BaseModel): class Target(BaseModel):
theta: float theta: float
value: float value: float

View File

@ -1,4 +1,5 @@
class Base: class Base:
def __init__(self, source, ingest_type='message'): def __init__(self, source, ingest_type='message'):
self.ingest_type = ingest_type self.ingest_type = ingest_type
self.source = source self.source = source

View File

@ -1,6 +1,6 @@
import os, json, random, io, logging import os, json, random, io, logging
from pulp import LpProblem, LpVariable, LpMinimize, LpStatus, lpSum from pulp import LpProblem, LpVariable, LpMinimize, LpMaximize, LpStatus, lpSum
from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
@ -12,30 +12,87 @@ from models.item import Item
from services.base import Base from services.base import Base
class LoftService(Base): class LoftService(Base):
def process(self): def process(self):
try: try:
self.solver_run = self.create_solver_run_from_attributes() self.solver_run = self.create_solver_run_from_attributes()
self.solver_run.generate_bundles() self.solver_run.generate_bundles()
self.solution = self.generate_solution() self.solution = self.generate_solution()
# self.solution = self.generate_test_solution()
self.result = self.stream_to_s3_bucket() self.result = self.stream_to_s3_bucket()
except ItemGenerationError as error: except ItemGenerationError as error:
self.result = self.stream_to_s3_bucket(error) self.result = self.stream_to_s3_bucket(error)
except TypeError as error: except TypeError as error:
logging.error(error) logging.error(error)
self.result = self.stream_to_s3_bucket(ItemGenerationError("Provided params causing error in calculation results")) self.result = self.stream_to_s3_bucket(
ItemGenerationError(
"Provided params causing error in calculation results"))
def generate_test_solution(self) -> Solution:
solution = Solution(response_id=random.randint(100, 5000), forms=[])
problem = LpProblem("ata-form-generate-with-bundles", LpMinimize)
bundles = LpVariable.dicts(
"Bundle", [bundle.id for bundle in self.solver_run.bundles],
lowBound=1,
upBound=1,
cat='Binary')
items = LpVariable.dicts("Item",
[item.id for item in self.solver_run.items],
lowBound=1,
upBound=1,
cat='Binary')
problem += lpSum(
[bundles[bundle.id] for bundle in self.solver_run.bundles])
# problem += lpSum([items[item.id] for item in self.solver_run.items])
# problem += lpSum([bundles[bundle.id] for bundle in self.solver_run.bundles]) <= 3, 'max total bundles used'
# problem += lpSum([bundles[bundle.id] for bundle in self.solver_run.bundles]) >= 1, 'min total bundles used'
problem += lpSum(
[bundles[bundle.id] for bundle in self.solver_run.bundles]) == 3
problem += lpSum(
[
bundle.count * bundles[bundle.id]
for bundle in self.solver_run.bundles
] +
[1 * items[item.id] for item in self.solver_run.unbundled_items()]
) == self.solver_run.total_form_items, 'Total bundle form items for form'
problem.solve()
# for v in problem.variables():
# print(f'{v.name}: {v.varValue}')
# add return items and create as a form
form_items = service_helper.solution_items(problem.variables(),
self.solver_run)
# add form to solution
solution.forms.append(
Form.create(form_items, self.solver_run, LpStatus[problem.status]))
logging.info('Form generated and added to solution...')
return solution
def create_solver_run_from_attributes(self) -> SolverRun: def create_solver_run_from_attributes(self) -> SolverRun:
logging.info('Retrieving attributes from message...') logging.info('Retrieving attributes from message...')
# get s3 object # get s3 object
self.key = aws_helper.get_key_from_message(self.source) self.key = aws_helper.get_key_from_message(self.source)
s3_object = aws_helper.get_object(self.key, aws_helper.get_bucket_from_message(self.source)) s3_object = aws_helper.get_object(
self.key, aws_helper.get_bucket_from_message(self.source))
# convert to tar # convert to tar
self.tar = tar_helper.raw_to_tar(s3_object) self.tar = tar_helper.raw_to_tar(s3_object)
# get attributes file and convert to dict # get attributes file and convert to dict
attributes = json.loads(tar_helper.extract_file_from_tar(self.tar , 'solver_run_attributes.json').read()) attributes = json.loads(
tar_helper.extract_file_from_tar(
self.tar, 'solver_run_attributes.json').read())
# create solver run # create solver run
solver_run = SolverRun.parse_obj(attributes) solver_run = SolverRun.parse_obj(attributes)
@ -45,7 +102,8 @@ class LoftService(Base):
items_csv_reader = csv_helper.file_stream_reader(items_csv) items_csv_reader = csv_helper.file_stream_reader(items_csv)
# add items to solver run # add items to solver run
for item in service_helper.items_csv_to_dict(items_csv_reader, solver_run): for item in service_helper.items_csv_to_dict(items_csv_reader,
solver_run):
solver_run.items.append(Item.parse_obj(item)) solver_run.items.append(Item.parse_obj(item))
logging.info('Processed Attributes...') logging.info('Processed Attributes...')
@ -56,10 +114,7 @@ class LoftService(Base):
logging.info('Generating Solution...') logging.info('Generating Solution...')
# unsolved solution # unsolved solution
solution = Solution( solution = Solution(response_id=random.randint(100, 5000), forms=[])
response_id=random.randint(100, 5000),
forms=[]
)
# counter for number of forms # counter for number of forms
f = 0 f = 0
@ -69,57 +124,97 @@ class LoftService(Base):
while f < self.solver_run.total_forms: while f < self.solver_run.total_forms:
# setup vars # setup vars
items = LpVariable.dicts( items = LpVariable.dicts(
"Item", [item.id for item in self.solver_run.items], lowBound=1, upBound=1, cat='Binary') "Item", [item.id for item in self.solver_run.items],
# bundles = LpVariable.dicts( lowBound=1,
# "Bundle", [bundle.id for bundle in self.solver_run.bundles], lowBound=1, upBound=1, cat='Binary') upBound=1,
cat='Binary')
bundles = LpVariable.dicts(
"Bundle", [bundle.id for bundle in self.solver_run.bundles],
lowBound=1,
upBound=1,
cat='Binary')
problem_objection_functions = [] # problem_objection_functions = []
# create problem # create problem
problem = LpProblem("ata-form-generate", LpMinimize) problem = LpProblem("ata-form-generate", LpMinimize)
# dummy objective function, because it just makes things easier™ # dummy objective function, because it just makes things easier™
# problem += lpSum([items[item.id] problem += lpSum(
# for item in self.solver_run.items]) [items[item.id] for item in self.solver_run.items])
# constraints # constraints
problem += lpSum([items[item.id] # problem += lpSum([items[item.id]
for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items' # for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items'
problem += lpSum(
[
bundle.count * bundles[bundle.id]
for bundle in self.solver_run.bundles
] + [
1 * items[item.id]
for item in self.solver_run.unbundled_items()
]
) == self.solver_run.total_form_items, 'Total bundle form items for form'
# dynamic constraints # dynamic constraints
problem = solver_helper.build_constraints(self.solver_run, problem, items) problem = solver_helper.build_constraints(self.solver_run, problem,
items, bundles)
# multi-objective constraints # multi-objective constraints
logging.info('Creating TIF and TCC constraints') logging.info('Creating TIF and TCC constraints')
for target in self.solver_run.objective_function.tif_targets: for target in self.solver_run.objective_function.tif_targets:
tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) # tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
problem_objection_functions.append(tif) # for item in self.solver_run.items])
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id] # problem_objection_functions.append(tif)
for item in self.solver_run.items]) >= target.value - 8, f'max tif theta ({target.theta}) target value {target.value}' problem += lpSum([
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id] bundle.tif(self.solver_run.irt_model, target.theta) *
for item in self.solver_run.items]) <= target.value + 8, f'min tif theta ({target.theta}) target value {target.value}' bundles[bundle.id] for bundle in self.solver_run.bundles
] + [
item.iif(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) >= target.value - 8, f'max tif theta ({target.theta}) target value {target.value}'
problem += lpSum([
bundle.tif(self.solver_run.irt_model, target.theta) *
bundles[bundle.id] for bundle in self.solver_run.bundles
] + [
item.iif(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) <= target.value + 8, f'min tif theta ({target.theta}) target value {target.value}'
for target in self.solver_run.objective_function.tcc_targets: for target in self.solver_run.objective_function.tcc_targets:
tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id] # tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) # for item in self.solver_run.items])
problem_objection_functions.append(tcc) # problem_objection_functions.append(tcc)
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id] problem += lpSum([
for item in self.solver_run.items]) >= target.value - 20, f'max tcc theta ({target.theta}) target value {target.value}' bundle.trf(self.solver_run.irt_model, target.theta) *
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id] bundles[bundle.id] for bundle in self.solver_run.bundles
for item in self.solver_run.items]) <= target.value + 20, f'min tcc theta ({target.theta}) target value {target.value}' ] + [
item.irf(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) >= target.value - 20, f'max tcc theta ({target.theta}) target value {target.value}'
problem += lpSum([
bundle.trf(self.solver_run.irt_model, target.theta) *
bundles[bundle.id] for bundle in self.solver_run.bundles
] + [
item.irf(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) <= target.value + 20, f'min tcc theta ({target.theta}) target value {target.value}'
# solve problem # solve problem
logging.info('Solving...') logging.info('Solving...')
# problem.solve() problem.solve()
problem.sequentialSolve(problem_objection_functions) # problem.sequentialSolve(problem_objection_functions)
logging.info('Solved...generating form and adding to solution') logging.info('Solved...generating form and adding to solution')
# add return items and create as a form # add return items and create as a form
form_items = service_helper.solution_items(problem.variables(), self.solver_run) form_items = service_helper.solution_items(problem.variables(),
self.solver_run)
# add form to solution # add form to solution
solution.forms.append(Form.create(form_items, self.solver_run, LpStatus[problem.status])) solution.forms.append(
Form.create(form_items, self.solver_run,
LpStatus[problem.status]))
logging.info('Form generated and added to solution...') logging.info('Form generated and added to solution...')
# successfull form, increment # successfull form, increment
@ -135,11 +230,15 @@ class LoftService(Base):
buffer = io.StringIO() buffer = io.StringIO()
if error: if error:
logging.info('Streaming %s error response to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET']) logging.info('Streaming %s error response to s3 bucket - %s',
self.file_name, os.environ['S3_PROCESSED_BUCKET'])
solution_file = service_helper.error_to_file(buffer, error) solution_file = service_helper.error_to_file(buffer, error)
else: else:
logging.info('Streaming %s to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET']) logging.info('Streaming %s to s3 bucket - %s', self.file_name,
solution_file = service_helper.solution_to_file(buffer, self.solver_run.total_form_items, self.solution.forms) os.environ['S3_PROCESSED_BUCKET'])
solution_file = service_helper.solution_to_file(
buffer, self.solver_run.total_form_items, self.solution.forms)
# upload generated file to s3 and return result # upload generated file to s3 and return result
return aws_helper.file_stream_upload(solution_file, self.file_name, os.environ['S3_PROCESSED_BUCKET']) return aws_helper.file_stream_upload(solution_file, self.file_name,
os.environ['S3_PROCESSED_BUCKET'])