the big format

This commit is contained in:
Joshua Burman 2022-02-10 20:29:50 -05:00
parent 19a37ab33a
commit deb6b9014e
25 changed files with 682 additions and 466 deletions

View File

@ -3,37 +3,34 @@ import os
import json import json
session = boto3.Session( session = boto3.Session(
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'] aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
)
s3 = session.resource('s3', region_name=os.environ['AWS_REGION']) s3 = session.resource('s3', region_name=os.environ['AWS_REGION'])
sqs = session.client('sqs', region_name=os.environ['AWS_REGION']) sqs = session.client('sqs', region_name=os.environ['AWS_REGION'])
def get_key_from_message(body): def get_key_from_message(body):
return body['Records'][0]['s3']['object']['key'] return body['Records'][0]['s3']['object']['key']
def get_bucket_from_message(body): def get_bucket_from_message(body):
return body['Records'][0]['s3']['bucket']['name'] return body['Records'][0]['s3']['bucket']['name']
def get_object(key, bucket): def get_object(key, bucket):
return s3.Object( return s3.Object(bucket_name=bucket, key=key).get()['Body'].read()
bucket_name=bucket,
key=key
).get()['Body'].read()
def file_stream_upload(buffer, name, bucket): def file_stream_upload(buffer, name, bucket):
return s3.Bucket(bucket).upload_fileobj(buffer, name) return s3.Bucket(bucket).upload_fileobj(buffer, name)
def receive_message(queue, message_num=1, wait_time=1): def receive_message(queue, message_num=1, wait_time=1):
return sqs.receive_message( return sqs.receive_message(QueueUrl=queue,
QueueUrl=queue, MaxNumberOfMessages=message_num,
MaxNumberOfMessages=message_num, WaitTimeSeconds=wait_time)
WaitTimeSeconds=wait_time
)
def delete_message(queue, receipt): def delete_message(queue, receipt):
return sqs.delete_message( return sqs.delete_message(QueueUrl=queue, ReceiptHandle=receipt)
QueueUrl=queue,
ReceiptHandle=receipt
)

View File

@ -1,5 +1,6 @@
import csv import csv
import io import io
def file_stream_reader(f): def file_stream_reader(f):
return csv.reader(io.StringIO(f.read().decode('ascii'))) return csv.reader(io.StringIO(f.read().decode('ascii')))

View File

@ -3,98 +3,120 @@ import io
import re import re
from tokenize import String from tokenize import String
def items_csv_to_dict(items_csv_reader, solver_run): def items_csv_to_dict(items_csv_reader, solver_run):
items = [] items = []
headers = [] headers = []
# get headers and items # get headers and items
for key, row in enumerate(items_csv_reader): for key, row in enumerate(items_csv_reader):
if key == 0: if key == 0:
headers = row headers = row
else: else:
item = { 'attributes': [] } item = {'attributes': []}
# ensure that the b param is formatted correctly # ensure that the b param is formatted correctly
if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]): if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]):
for key, col in enumerate(headers): for key, col in enumerate(headers):
if solver_run.irt_model.formatted_b_param() == col: if solver_run.irt_model.formatted_b_param() == col:
value = float(row[key]) value = float(row[key])
item['b_param'] = value item['b_param'] = value
elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle': elif solver_run.get_constraint(
if row[key]: col) and solver_run.get_constraint(
item[solver_run.get_constraint(col).reference_attribute.id] = row[key] col).reference_attribute.type == 'bundle':
elif solver_run.get_constraint(col): if row[key]:
constraint = solver_run.get_constraint(col) item[solver_run.get_constraint(
item['attributes'].append({ col).reference_attribute.id] = row[key]
'id': col, elif solver_run.get_constraint(col):
'value': row[key], constraint = solver_run.get_constraint(col)
'type': constraint.reference_attribute.type item['attributes'].append({
}) 'id':
else: col,
if row[key]: 'value':
item[col] = row[key] row[key],
'type':
constraint.reference_attribute.type
})
else:
if row[key]:
item[col] = row[key]
items.append(item) items.append(item)
return items
return items
def solution_to_file(buffer, total_form_items, forms): def solution_to_file(buffer, total_form_items, forms):
wr = csv.writer(buffer, dialect='excel', delimiter=',') wr = csv.writer(buffer, dialect='excel', delimiter=',')
# write header row for first row utilizing the total items all forms will have # write header row for first row utilizing the total items all forms will have
# fill the rows with the targets and cut score then the items # fill the rows with the targets and cut score then the items
header = ['status'] header = ['status']
for result in forms[0].tif_results: for result in forms[0].tif_results:
header += [f'tif @ {round(result.theta, 2)}'] header += [f'tif @ {round(result.theta, 2)}']
for result in forms[0].tcc_results: for result in forms[0].tcc_results:
header += [f'tcc @ {round(result.theta, 2)}'] header += [f'tcc @ {round(result.theta, 2)}']
header += ['cut score'] + [x + 1 for x in range(total_form_items)] header += ['cut score'] + [x + 1 for x in range(total_form_items)]
wr.writerow(header) wr.writerow(header)
# add each form as row to processed csv # add each form as row to processed csv
for form in forms: for form in forms:
row = [form.status] row = [form.status]
for result in form.tif_results + form.tcc_results: for result in form.tif_results + form.tcc_results:
row += [f'target - {result.value}\nresult - {round(result.result, 2)}'] row += [
f'target - {result.value}\nresult - {round(result.result, 2)}'
]
# provide generated items and cut score # provide generated items and cut score
row += [round(form.cut_score, 2)] + [item.id for item in form.items] row += [round(form.cut_score, 2)] + [item.id for item in form.items]
wr.writerow(row) wr.writerow(row)
buff2 = io.BytesIO(buffer.getvalue().encode()) buff2 = io.BytesIO(buffer.getvalue().encode())
return buff2
return buff2
def error_to_file(buffer, error): def error_to_file(buffer, error):
wr = csv.writer(buffer, dialect='excel', delimiter=',') wr = csv.writer(buffer, dialect='excel', delimiter=',')
wr.writerow(['status']) wr.writerow(['status'])
wr.writerow([error.args[0]]) wr.writerow([error.args[0]])
return io.BytesIO(buffer.getvalue().encode())
return io.BytesIO(buffer.getvalue().encode())
def key_to_uuid(key): def key_to_uuid(key):
return re.split("_", key)[0] return re.split("_", key)[0]
def solution_items(variables, solver_run): def solution_items(variables, solver_run):
form_items = [] form_items = []
for v in variables: for v in variables:
if v.varValue > 0 and 'Item_' in v.name: if v.varValue > 0:
item_id = v.name.replace('Item_', '') if 'Item_' in v.name:
item = solver_run.get_item(item_id) item_id = v.name.replace('Item_', '')
# add item to list and then remove from master item list item = solver_run.get_item(item_id)
form_items.append(item) # add item to list and then remove from master item list
if item: form_items.append(item)
elif 'Bundle_' in v.name:
bundle_id = v.name.replace('Bundle_', '')
bundle = solver_run.get_bundle(bundle_id)
if bundle:
for item in bundle.items:
if item: form_items.append(item)
return form_items
return form_items
# probably a better place for this... # probably a better place for this...
def is_float(element: String) -> bool: def is_float(element: String) -> bool:
try: try:
float(element) float(element)
return True return True
except ValueError: except ValueError:
return False return False

View File

@ -9,75 +9,95 @@ from models.item import Item
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
def build_constraints(solver_run: SolverRun, problem: LpProblem, items: list[Item]) -> LpProblem:
logging.info('Creating Constraints...')
try: def build_constraints(solver_run: SolverRun, problem: LpProblem,
total_form_items = solver_run.total_form_items items: list[Item], bundles: list[Bundle]) -> LpProblem:
constraints = solver_run.constraints logging.info('Creating Constraints...')
for constraint in constraints: try:
attribute = constraint.reference_attribute total_form_items = solver_run.total_form_items
min = constraint.minimum constraints = solver_run.constraints
max = constraint.maximum
if attribute.type == 'metadata': for constraint in constraints:
logging.info('Metadata Constraint Generating...') attribute = constraint.reference_attribute
con = dict(zip([item.id for item in solver_run.items], min = constraint.minimum
[item.attribute_exists(attribute) max = constraint.maximum
for item in solver_run.items]))
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) >= round(total_form_items * (min / 100)), f'{attribute.id} - {attribute.value} - min'
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
elif attribute.type == 'bundle':
logging.info('Bundles Constraint Generating...')
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
if solver_run.bundles != None:
total_bundle_items = 0
selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
for bundle in selected_bundles: if attribute.type == 'metadata':
con = dict(zip([item.id for item in solver_run.items], logging.info('Metadata Constraint Generating...')
[(getattr(item, bundle.type, False) == bundle.id) con = dict(
for item in solver_run.items])) zip([item.id for item in solver_run.items], [
problem += lpSum([con[item.id] item.attribute_exists(attribute)
* items[item.id] for item in solver_run.items
for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})' ]))
total_bundle_items += bundle.count problem += lpSum([
con[item.id] * items[item.id] for item in solver_run.items
]) >= round(
total_form_items *
(min / 100)), f'{attribute.id} - {attribute.value} - min'
problem += lpSum([
con[item.id] * items[item.id] for item in solver_run.items
]) <= round(
total_form_items *
(max / 100)), f'{attribute.id} - {attribute.value} - max'
elif attribute.type == 'bundle':
logging.info('Bundles Constraint Generating...')
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
if solver_run.bundles != None:
# make sure the total bundles used in generated form is limited between min-max set
problem += lpSum([
bundles[bundle.id] for bundle in solver_run.bundles
]) == randint(int(constraint.minimum),
int(constraint.maximum))
# total_bundle_items = 0
# selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
# make sure all other items added to the form # for bundle in selected_bundles:
# are not a part of any bundle # con = dict(zip([item.id for item in solver_run.items],
# currently only supports single bundle constraints, will need refactoring for multiple bundle constraints # [(getattr(item, bundle.type, False) == bundle.id)
con = dict(zip([item.id for item in solver_run.items], # for item in solver_run.items]))
[(getattr(item, attribute.id, None) == None) # problem += lpSum([con[item.id]
for item in solver_run.items])) # * items[item.id]
problem += lpSum([con[item.id] # for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
* items[item.id] # total_bundle_items += bundle.count
for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
logging.info('Constraints Created...') # # make sure all other items added to the form
return problem # # are not a part of any bundle
except ValueError as error: # # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
logging.error(error) # con = dict(zip([item.id for item in solver_run.items],
raise ItemGenerationError("Bundle min and/or max larger than bundle amount provided", error.args[0]) # [(getattr(item, attribute.id, None) == None)
# for item in solver_run.items]))
# problem += lpSum([con[item.id]
# * items[item.id]
# for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
def get_random_bundles(total_form_items: int, bundles: list[Bundle], min: int , max: int, found_bundles = False) -> list[Bundle]: logging.info('Constraints Created...')
selected_bundles = None return problem
total_bundle_items = 0 except ValueError as error:
total_bundles = randint(min, max) logging.error(error)
logging.info(f'Selecting Bundles (total of {total_bundles})...') raise ItemGenerationError(
"Bundle min and/or max larger than bundle amount provided",
error.args[0])
while found_bundles == False:
selected_bundles = sample(bundles, total_bundles)
total_bundle_items = sum(bundle.count for bundle in selected_bundles)
if total_bundle_items <= total_form_items: def get_random_bundles(total_form_items: int,
found_bundles = True bundles: list[Bundle],
min: int,
max: int,
found_bundles=False) -> list[Bundle]:
selected_bundles = None
total_bundle_items = 0
total_bundles = randint(min, max)
logging.info(f'Selecting Bundles (total of {total_bundles})...')
if found_bundles == True: while found_bundles == False:
return selected_bundles selected_bundles = sample(bundles, total_bundles)
else: total_bundle_items = sum(bundle.count for bundle in selected_bundles)
return get_random_bundles(total_form_items, total_bundles - 1, bundles)
if total_bundle_items <= total_form_items:
found_bundles = True
if found_bundles == True:
return selected_bundles
else:
return get_random_bundles(total_form_items, total_bundles - 1, bundles)

View File

@ -1,9 +1,11 @@
import io import io
import tarfile import tarfile
def raw_to_tar(raw_object): def raw_to_tar(raw_object):
tarball = io.BytesIO(raw_object) tarball = io.BytesIO(raw_object)
return tarfile.open(fileobj=tarball, mode='r:gz') return tarfile.open(fileobj=tarball, mode='r:gz')
def extract_file_from_tar(tar, file_name): def extract_file_from_tar(tar, file_name):
return tar.extractfile(tar.getmember(file_name)) return tar.extractfile(tar.getmember(file_name))

View File

@ -1,2 +1,2 @@
class ItemGenerationError(Exception): class ItemGenerationError(Exception):
pass pass

View File

@ -3,22 +3,28 @@ import logging
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
class ItemInformationFunction():
def __init__(self, irt_model):
self.model_data = irt_model
# determines the amount of information for a given question at a given theta (ability level) class ItemInformationFunction():
# further detailed on page 161, equation 4 here:
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf def __init__(self, irt_model):
def calculate(self, **kwargs): self.model_data = irt_model
try:
if self.model_data.model == '3PL': # determines the amount of information for a given question at a given theta (ability level)
p = ThreeParameterLogistic(self.model_data, kwargs).result() # further detailed on page 161, equation 4 here:
q = 1 - p # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
return (self.model_data.a_param * q * (p - self.model_data.c_param)**2) / (p * ((1 - self.model_data.c_param)**2)) def calculate(self, **kwargs):
else: try:
# potentially error out if self.model_data.model == '3PL':
raise ItemGenerationError("irt model not supported or provided") p = ThreeParameterLogistic(self.model_data, kwargs).result()
except ZeroDivisionError as error: q = 1 - p
logging.error(error) return (self.model_data.a_param * q *
raise ItemGenerationError("params not well formatted", error.args[0]) (p - self.model_data.c_param)**2) / (p * (
(1 - self.model_data.c_param)**2))
else:
# potentially error out
raise ItemGenerationError(
"irt model not supported or provided")
except ZeroDivisionError as error:
logging.error(error)
raise ItemGenerationError("params not well formatted",
error.args[0])

View File

@ -1,12 +1,14 @@
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
class ItemResponseFunction():
def __init__(self, irt_model):
self.model_data = irt_model
def calculate(self, **kwargs): class ItemResponseFunction():
if self.model_data.model == '3PL':
return ThreeParameterLogistic(self.model_data, kwargs).result() def __init__(self, irt_model):
else: self.model_data = irt_model
raise ItemGenerationError("irt model not supported or provided")
def calculate(self, **kwargs):
if self.model_data.model == '3PL':
return ThreeParameterLogistic(self.model_data, kwargs).result()
else:
raise ItemGenerationError("irt model not supported or provided")

View File

@ -1,16 +1,18 @@
class ThreeParameterLogistic: class ThreeParameterLogistic:
def __init__(self, model_params, kwargs):
self.model_params = model_params
# check if exists, if not error out
self.b_param = kwargs['b_param']
self.e = 2.71828
self.theta = kwargs['theta']
# contains the primary 3pl function, determining the probably of an inidividual def __init__(self, model_params, kwargs):
# that an individual at a certain theta would get a particular question correct self.model_params = model_params
# detailed further on page 161, equation 1 here: # check if exists, if not error out
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf self.b_param = kwargs['b_param']
def result(self): self.e = 2.71828
a = self.model_params.a_param self.theta = kwargs['theta']
c = self.model_params.c_param
return c + (1 - c) * (1 / (1 + self.e**(-a * (self.theta - self.b_param)))) # contains the primary 3pl function, determining the probably of an inidividual
# that an individual at a certain theta would get a particular question correct
# detailed further on page 161, equation 1 here:
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
def result(self):
a = self.model_params.a_param
c = self.model_params.c_param
return c + (1 - c) * (1 / (1 + self.e**(-a *
(self.theta - self.b_param))))

View File

@ -1,19 +1,22 @@
from lib.irt.item_information_function import ItemInformationFunction from lib.irt.item_information_function import ItemInformationFunction
class TestInformationFunction(): class TestInformationFunction():
def __init__(self, irt_model):
self.irt_model = irt_model
self.iif = ItemInformationFunction(irt_model)
# determins the amount of information def __init__(self, irt_model):
# at a certain theta (ability level) of the sum of a question set correct self.irt_model = irt_model
# detailed further on page 166, equation 4 here: self.iif = ItemInformationFunction(irt_model)
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
def calculate(self, items, **kwargs):
sum = 0
for item in items: # determins the amount of information
result = self.iif.calculate(b_param=item.b_param, theta=kwargs['theta']) # at a certain theta (ability level) of the sum of a question set correct
sum += result # detailed further on page 166, equation 4 here:
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
def calculate(self, items, **kwargs):
sum = 0
return sum for item in items:
result = self.iif.calculate(b_param=item.b_param,
theta=kwargs['theta'])
sum += result
return sum

View File

@ -1,20 +1,23 @@
from lib.irt.item_response_function import ItemResponseFunction from lib.irt.item_response_function import ItemResponseFunction
# otherwise known as the Test Characteristic Curve (TCC) # otherwise known as the Test Characteristic Curve (TCC)
class TestResponseFunction(): class TestResponseFunction():
def __init__(self, irt_model):
self.irt_model = irt_model
self.irf = ItemResponseFunction(irt_model)
# determins the probably of an inidividual def __init__(self, irt_model):
# at a certain theta (ability level) would get a sum of questions correct self.irt_model = irt_model
# detailed further on page 166, equation 3 here: self.irf = ItemResponseFunction(irt_model)
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
def calculate(self, items, **kwargs):
sum = 0
for item in items: # determins the probably of an inidividual
result = self.irf.calculate(b_param=item.b_param, theta=kwargs['theta']) # at a certain theta (ability level) would get a sum of questions correct
sum += result # detailed further on page 166, equation 3 here:
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
def calculate(self, items, **kwargs):
sum = 0
return sum for item in items:
result = self.irf.calculate(b_param=item.b_param,
theta=kwargs['theta'])
sum += result
return sum

View File

@ -6,31 +6,36 @@ from helpers import aws_helper
from daemonize import Daemonize from daemonize import Daemonize
from sqs_listener import SqsListener from sqs_listener import SqsListener
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s %(asctime)s - %(message)s") logging.basicConfig(stream=sys.stdout,
level=logging.INFO,
format="%(levelname)s %(asctime)s - %(message)s")
class ServiceListener(SqsListener): class ServiceListener(SqsListener):
def handle_message(self, body, attributes, messages_attributes):
# gather/manage/process data based on the particular needs
logging.info('Incoming message: %s', body)
service = LoftService(body) def handle_message(self, body, attributes, messages_attributes):
service.process() # gather/manage/process data based on the particular needs
logging.info('Incoming message: %s', body)
service = LoftService(body)
service.process()
logging.info('Process complete for %s', service.file_name)
logging.info('Process complete for %s', service.file_name)
def main(): def main():
logging.info('Starting Solver Service (v1.1.2)...') logging.info('Starting Solver Service (v1.1.2)...')
listener = ServiceListener( listener = ServiceListener(
os.environ['SQS_QUEUE'], os.environ['SQS_QUEUE'],
region_name=os.environ['AWS_REGION'], region_name=os.environ['AWS_REGION'],
aws_access_key=os.environ['AWS_ACCESS_KEY_ID'], aws_access_key=os.environ['AWS_ACCESS_KEY_ID'],
aws_secret_key=os.environ['AWS_SECRET_ACCESS_KEY'], aws_secret_key=os.environ['AWS_SECRET_ACCESS_KEY'],
queue_url=os.environ['SQS_QUEUE'] queue_url=os.environ['SQS_QUEUE'])
) listener.listen()
listener.listen()
if __name__ == '__main__': if __name__ == '__main__':
myname=os.path.basename(sys.argv[0]) myname = os.path.basename(sys.argv[0])
pidfile='/tmp/%s' % myname pidfile = '/tmp/%s' % myname
daemon = Daemonize(app=myname,pid=pidfile, action=main, foreground=True) daemon = Daemonize(app=myname, pid=pidfile, action=main, foreground=True)
daemon.start() daemon.start()

View File

@ -1,11 +1,12 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import List, Optional, Dict from typing import List, Optional, Dict
class AdvancedOptions(BaseModel): class AdvancedOptions(BaseModel):
linearity_check: Optional[bool] linearity_check: Optional[bool]
show_progress: Optional[bool] show_progress: Optional[bool]
max_solution_time: Optional[int] max_solution_time: Optional[int]
brand_bound_tolerance: Optional[float] brand_bound_tolerance: Optional[float]
max_forms: Optional[int] max_forms: Optional[int]
precision: Optional[float] precision: Optional[float]
extra_param_range: Optional[List[Dict]] extra_param_range: Optional[List[Dict]]

View File

@ -1,7 +1,8 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional from typing import Optional
class Attribute(BaseModel): class Attribute(BaseModel):
value: Optional[str] value: Optional[str]
type: Optional[str] type: Optional[str]
id: str id: str

View File

@ -1,6 +1,23 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import List
from lib.irt.test_information_function import TestInformationFunction
from lib.irt.test_response_function import TestResponseFunction
from models.item import Item
from models.irt_model import IRTModel
class Bundle(BaseModel): class Bundle(BaseModel):
id: int id: int
count: int count: int
type: str items: List[Item]
type: str
def tif(self, irt_model: IRTModel, theta: float) -> float:
return TestInformationFunction(irt_model).calculate(self.items,
theta=theta)
def trf(self, irt_model: IRTModel, theta: float) -> float:
return TestResponseFunction(irt_model).calculate(self.items,
theta=theta)

View File

@ -2,7 +2,8 @@ from pydantic import BaseModel
from models.attribute import Attribute from models.attribute import Attribute
class Constraint(BaseModel): class Constraint(BaseModel):
reference_attribute: Attribute reference_attribute: Attribute
minimum: float minimum: float
maximum: float maximum: float

View File

@ -8,19 +8,20 @@ from models.target import Target
from lib.irt.test_response_function import TestResponseFunction from lib.irt.test_response_function import TestResponseFunction
class Form(BaseModel):
items: List[Item]
cut_score: float
tif_results: List[Target]
tcc_results: List[Target]
status: str = 'Not Optimized'
@classmethod class Form(BaseModel):
def create(cls, items, solver_run, status): items: List[Item]
return cls( cut_score: float
items=items, tif_results: List[Target]
cut_score=TestResponseFunction(solver_run.irt_model).calculate(items, theta=solver_run.theta_cut_score), tcc_results: List[Target]
tif_results=irt_helper.generate_tif_results(items, solver_run), status: str = 'Not Optimized'
tcc_results=irt_helper.generate_tcc_results(items, solver_run),
status=status @classmethod
) def create(cls, items, solver_run, status):
return cls(
items=items,
cut_score=TestResponseFunction(solver_run.irt_model).calculate(
items, theta=solver_run.theta_cut_score),
tif_results=irt_helper.generate_tif_results(items, solver_run),
tcc_results=irt_helper.generate_tcc_results(items, solver_run),
status=status)

View File

@ -1,12 +1,13 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Dict from typing import Dict
class IRTModel(BaseModel): class IRTModel(BaseModel):
a_param: float a_param: float
b_param: Dict = {"schema_bson_id": str, "field_bson_id": str} b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
c_param: float c_param: float
model: str model: str
def formatted_b_param(self):
def formatted_b_param(self): return self.b_param['schema_bson_id'] + '-' + self.b_param[
return self.b_param['schema_bson_id'] + '-' + self.b_param['field_bson_id'] 'field_bson_id']

View File

@ -6,27 +6,32 @@ from models.attribute import Attribute
from lib.irt.item_response_function import ItemResponseFunction from lib.irt.item_response_function import ItemResponseFunction
from lib.irt.item_information_function import ItemInformationFunction from lib.irt.item_information_function import ItemInformationFunction
class Item(BaseModel): class Item(BaseModel):
id: int id: int
passage_id: Optional[int] passage_id: Optional[int]
workflow_state: Optional[str] workflow_state: Optional[str]
attributes: List[Attribute] attributes: List[Attribute]
b_param: float = 0.00 b_param: float = 0.00
def iif(self, solver_run, theta): def iif(self, solver_run, theta):
return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta) return ItemInformationFunction(solver_run.irt_model).calculate(
b_param=self.b_param, theta=theta)
def irf(self, solver_run, theta): def irf(self, solver_run, theta):
return ItemResponseFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta) return ItemResponseFunction(solver_run.irt_model).calculate(
b_param=self.b_param, theta=theta)
def get_attribute(self, ref_attribute): def get_attribute(self, ref_attribute):
for attribute in self.attributes: for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower(): if attribute.id == ref_attribute.id and attribute.value.lower(
return attribute.value ) == ref_attribute.value.lower():
return False return attribute.value
return False
def attribute_exists(self, ref_attribute): def attribute_exists(self, ref_attribute):
for attribute in self.attributes: for attribute in self.attributes:
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower(): if attribute.id == ref_attribute.id and attribute.value.lower(
return True ) == ref_attribute.value.lower():
return False return True
return False

View File

@ -3,11 +3,12 @@ from typing import Dict, List, AnyStr
from models.target import Target from models.target import Target
class ObjectiveFunction(BaseModel): class ObjectiveFunction(BaseModel):
# minimizing tif/tcc target value is only option currently # minimizing tif/tcc target value is only option currently
# as we add more we can build this out to be more dynamic # as we add more we can build this out to be more dynamic
# likely with models representing each objective function type # likely with models representing each objective function type
tif_targets: List[Target] tif_targets: List[Target]
tcc_targets: List[Target] tcc_targets: List[Target]
objective: AnyStr = "minimize" objective: AnyStr = "minimize"
weight: Dict = {'tif': 1, 'tcc': 1} weight: Dict = {'tif': 1, 'tcc': 1}

View File

@ -3,6 +3,7 @@ from typing import List
from models.form import Form from models.form import Form
class Solution(BaseModel): class Solution(BaseModel):
response_id: int response_id: int
forms: List[Form] forms: List[Form]

View File

@ -10,64 +10,87 @@ from models.bundle import Bundle
from models.objective_function import ObjectiveFunction from models.objective_function import ObjectiveFunction
from models.advanced_options import AdvancedOptions from models.advanced_options import AdvancedOptions
class SolverRun(BaseModel): class SolverRun(BaseModel):
items: List[Item] = [] items: List[Item] = []
bundles: Optional[Bundle] bundles: list[Bundle] = []
constraints: List[Constraint] constraints: List[Constraint]
irt_model: IRTModel irt_model: IRTModel
objective_function: ObjectiveFunction objective_function: ObjectiveFunction
total_form_items: int total_form_items: int
total_forms: int = 1 total_forms: int = 1
theta_cut_score: float = 0.00 theta_cut_score: float = 0.00
advanced_options: Optional[AdvancedOptions] advanced_options: Optional[AdvancedOptions]
engine: str engine: str
def get_item(self, item_id): def get_item(self, item_id: int) -> Item or None:
for item in self.items: for item in self.items:
if str(item.id) == item_id: if str(item.id) == item_id:
return item return item
return False
def remove_items(self, items): def get_bundle(self, bundle_id: int) -> Bundle or None:
self.items = [item for item in self.items if item not in items] for bundle in self.bundles:
return True if str(bundle.id) == bundle_id:
return bundle
def generate_bundles(self): def get_constraint_by_type(self, type: str) -> Constraint or None:
logging.info('Generating Bundles...') for constraint in self.constraints:
bundle_constraints = (constraint.reference_attribute for constraint in self.constraints if constraint.reference_attribute.type == 'bundle') if type == constraint.reference_attribute.type:
return constraint
for bundle_constraint in bundle_constraints: def remove_items(self, items: list[Item]) -> bool:
type_attribute = bundle_constraint.id self.items = [item for item in self.items if item not in items]
return True
for item in self.items: def generate_bundles(self):
attribute_id = getattr(item, type_attribute, None) logging.info('Generating Bundles...')
# confirms bundle constraints exists
bundle_constraints = (
constraint.reference_attribute for constraint in self.constraints
if constraint.reference_attribute.type == 'bundle')
# make sure the item has said attribute for bundle_constraint in bundle_constraints:
if attribute_id != None: type_attribute = bundle_constraint.id
# if there are pre-existing bundles, add new or increment existing
# else create array with new bundle
if self.bundles != None:
# get index of the bundle in the bundles list if exists or None if it doesn't
bundle_index = next((index for (index, bundle) in enumerate(self.bundles) if bundle.id == attribute_id and bundle.type == type_attribute), None)
# if the index doesn't exist add the new bundle of whatever type for item in self.items:
# else increment the count of the current bundle attribute_id = getattr(item, type_attribute, None)
if bundle_index == None:
self.bundles.append(Bundle(
id=attribute_id,
count=1,
type=type_attribute
))
else:
self.bundles[bundle_index].count += 1
else:
self.bundles = [Bundle(
id=attribute_id,
count=1,
type=type_attribute
)]
logging.info('Bundles Generated...') # make sure the item has said attribute
if attribute_id != None:
# if there are pre-existing bundles, add new or increment existing
# else create array with new bundle
if self.bundles != None:
# get index of the bundle in the bundles list if exists or None if it doesn't
bundle_index = next(
(index
for (index, bundle) in enumerate(self.bundles)
if bundle.id == attribute_id
and bundle.type == type_attribute), None)
def get_constraint(self, name): # if the index doesn't exist add the new bundle of whatever type
return next((constraint for constraint in self.constraints if constraint.reference_attribute.id == name), None) # else increment the count of the current bundle
if bundle_index == None:
self.bundles.append(
Bundle(id=attribute_id,
count=1,
items=[item],
type=type_attribute))
else:
self.bundles[bundle_index].count += 1
self.bundles[bundle_index].items.append(item)
else:
self.bundles = [
Bundle(id=attribute_id,
count=1,
items=[item],
type=type_attribute)
]
logging.info('Bundles Generated...')
def get_constraint(self, name: str) -> Constraint:
return next((constraint for constraint in self.constraints
if constraint.reference_attribute.id == name), None)
def unbundled_items(self) -> list:
return [item for item in self.items if item.passage_id == None]

View File

@ -1,7 +1,8 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional from typing import Optional
class Target(BaseModel): class Target(BaseModel):
theta: float theta: float
value: float value: float
result: Optional[float] result: Optional[float]

View File

@ -1,4 +1,5 @@
class Base: class Base:
def __init__(self, source, ingest_type='message'):
self.ingest_type = ingest_type def __init__(self, source, ingest_type='message'):
self.source = source self.ingest_type = ingest_type
self.source = source

View File

@ -1,6 +1,6 @@
import os, json, random, io, logging import os, json, random, io, logging
from pulp import LpProblem, LpVariable, LpMinimize, LpStatus, lpSum from pulp import LpProblem, LpVariable, LpMinimize, LpMaximize, LpStatus, lpSum
from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper
from lib.errors.item_generation_error import ItemGenerationError from lib.errors.item_generation_error import ItemGenerationError
@ -12,134 +12,233 @@ from models.item import Item
from services.base import Base from services.base import Base
class LoftService(Base): class LoftService(Base):
def process(self):
try:
self.solver_run = self.create_solver_run_from_attributes()
self.solver_run.generate_bundles()
self.solution = self.generate_solution()
self.result = self.stream_to_s3_bucket()
except ItemGenerationError as error:
self.result = self.stream_to_s3_bucket(error)
except TypeError as error:
logging.error(error)
self.result = self.stream_to_s3_bucket(ItemGenerationError("Provided params causing error in calculation results"))
def create_solver_run_from_attributes(self) -> SolverRun: def process(self):
logging.info('Retrieving attributes from message...') try:
# get s3 object self.solver_run = self.create_solver_run_from_attributes()
self.key = aws_helper.get_key_from_message(self.source) self.solver_run.generate_bundles()
s3_object = aws_helper.get_object(self.key, aws_helper.get_bucket_from_message(self.source)) self.solution = self.generate_solution()
# self.solution = self.generate_test_solution()
self.result = self.stream_to_s3_bucket()
except ItemGenerationError as error:
self.result = self.stream_to_s3_bucket(error)
except TypeError as error:
logging.error(error)
self.result = self.stream_to_s3_bucket(
ItemGenerationError(
"Provided params causing error in calculation results"))
# convert to tar def generate_test_solution(self) -> Solution:
self.tar = tar_helper.raw_to_tar(s3_object) solution = Solution(response_id=random.randint(100, 5000), forms=[])
# get attributes file and convert to dict problem = LpProblem("ata-form-generate-with-bundles", LpMinimize)
attributes = json.loads(tar_helper.extract_file_from_tar(self.tar , 'solver_run_attributes.json').read())
# create solver run bundles = LpVariable.dicts(
solver_run = SolverRun.parse_obj(attributes) "Bundle", [bundle.id for bundle in self.solver_run.bundles],
lowBound=1,
upBound=1,
cat='Binary')
items = LpVariable.dicts("Item",
[item.id for item in self.solver_run.items],
lowBound=1,
upBound=1,
cat='Binary')
# get items file and convert to dict problem += lpSum(
items_csv = tar_helper.extract_file_from_tar(self.tar , 'items.csv') [bundles[bundle.id] for bundle in self.solver_run.bundles])
items_csv_reader = csv_helper.file_stream_reader(items_csv) # problem += lpSum([items[item.id] for item in self.solver_run.items])
# add items to solver run # problem += lpSum([bundles[bundle.id] for bundle in self.solver_run.bundles]) <= 3, 'max total bundles used'
for item in service_helper.items_csv_to_dict(items_csv_reader, solver_run): # problem += lpSum([bundles[bundle.id] for bundle in self.solver_run.bundles]) >= 1, 'min total bundles used'
solver_run.items.append(Item.parse_obj(item)) problem += lpSum(
[bundles[bundle.id] for bundle in self.solver_run.bundles]) == 3
logging.info('Processed Attributes...') problem += lpSum(
[
bundle.count * bundles[bundle.id]
for bundle in self.solver_run.bundles
] +
[1 * items[item.id] for item in self.solver_run.unbundled_items()]
) == self.solver_run.total_form_items, 'Total bundle form items for form'
return solver_run problem.solve()
def generate_solution(self) -> Solution: # for v in problem.variables():
logging.info('Generating Solution...') # print(f'{v.name}: {v.varValue}')
# unsolved solution # add return items and create as a form
solution = Solution( form_items = service_helper.solution_items(problem.variables(),
response_id=random.randint(100, 5000), self.solver_run)
forms=[]
)
# counter for number of forms # add form to solution
f = 0 solution.forms.append(
Form.create(form_items, self.solver_run, LpStatus[problem.status]))
logging.info('Form generated and added to solution...')
# iterate for number of forms that require creation return solution
# currently creates distinc forms with no item overlap
while f < self.solver_run.total_forms:
# setup vars
items = LpVariable.dicts(
"Item", [item.id for item in self.solver_run.items], lowBound=1, upBound=1, cat='Binary')
# bundles = LpVariable.dicts(
# "Bundle", [bundle.id for bundle in self.solver_run.bundles], lowBound=1, upBound=1, cat='Binary')
problem_objection_functions = [] def create_solver_run_from_attributes(self) -> SolverRun:
logging.info('Retrieving attributes from message...')
# get s3 object
self.key = aws_helper.get_key_from_message(self.source)
s3_object = aws_helper.get_object(
self.key, aws_helper.get_bucket_from_message(self.source))
# create problem # convert to tar
problem = LpProblem("ata-form-generate", LpMinimize) self.tar = tar_helper.raw_to_tar(s3_object)
# dummy objective function, because it just makes things easier™ # get attributes file and convert to dict
# problem += lpSum([items[item.id] attributes = json.loads(
# for item in self.solver_run.items]) tar_helper.extract_file_from_tar(
self.tar, 'solver_run_attributes.json').read())
# constraints # create solver run
problem += lpSum([items[item.id] solver_run = SolverRun.parse_obj(attributes)
for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items'
# dynamic constraints # get items file and convert to dict
problem = solver_helper.build_constraints(self.solver_run, problem, items) items_csv = tar_helper.extract_file_from_tar(self.tar, 'items.csv')
items_csv_reader = csv_helper.file_stream_reader(items_csv)
# multi-objective constraints # add items to solver run
logging.info('Creating TIF and TCC constraints') for item in service_helper.items_csv_to_dict(items_csv_reader,
for target in self.solver_run.objective_function.tif_targets: solver_run):
tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id] solver_run.items.append(Item.parse_obj(item))
for item in self.solver_run.items])
problem_objection_functions.append(tif)
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) >= target.value - 8, f'max tif theta ({target.theta}) target value {target.value}'
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value + 8, f'min tif theta ({target.theta}) target value {target.value}'
for target in self.solver_run.objective_function.tcc_targets: logging.info('Processed Attributes...')
tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items])
problem_objection_functions.append(tcc)
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) >= target.value - 20, f'max tcc theta ({target.theta}) target value {target.value}'
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
for item in self.solver_run.items]) <= target.value + 20, f'min tcc theta ({target.theta}) target value {target.value}'
# solve problem return solver_run
logging.info('Solving...')
# problem.solve()
problem.sequentialSolve(problem_objection_functions)
logging.info('Solved...generating form and adding to solution')
# add return items and create as a form def generate_solution(self) -> Solution:
form_items = service_helper.solution_items(problem.variables(), self.solver_run) logging.info('Generating Solution...')
# add form to solution # unsolved solution
solution.forms.append(Form.create(form_items, self.solver_run, LpStatus[problem.status])) solution = Solution(response_id=random.randint(100, 5000), forms=[])
logging.info('Form generated and added to solution...')
# successfull form, increment # counter for number of forms
f += 1 f = 0
logging.info('Solution Generated.') # iterate for number of forms that require creation
return solution # currently creates distinc forms with no item overlap
while f < self.solver_run.total_forms:
# setup vars
items = LpVariable.dicts(
"Item", [item.id for item in self.solver_run.items],
lowBound=1,
upBound=1,
cat='Binary')
bundles = LpVariable.dicts(
"Bundle", [bundle.id for bundle in self.solver_run.bundles],
lowBound=1,
upBound=1,
cat='Binary')
def stream_to_s3_bucket(self, error = None): # problem_objection_functions = []
self.file_name = f'{service_helper.key_to_uuid(self.key)}.csv'
solution_file = None
# setup writer buffer and write processed forms to file
buffer = io.StringIO()
if error: # create problem
logging.info('Streaming %s error response to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET']) problem = LpProblem("ata-form-generate", LpMinimize)
solution_file = service_helper.error_to_file(buffer, error)
else:
logging.info('Streaming %s to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET'])
solution_file = service_helper.solution_to_file(buffer, self.solver_run.total_form_items, self.solution.forms)
# upload generated file to s3 and return result # dummy objective function, because it just makes things easier™
return aws_helper.file_stream_upload(solution_file, self.file_name, os.environ['S3_PROCESSED_BUCKET']) problem += lpSum(
[items[item.id] for item in self.solver_run.items])
# constraints
# problem += lpSum([items[item.id]
# for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items'
problem += lpSum(
[
bundle.count * bundles[bundle.id]
for bundle in self.solver_run.bundles
] + [
1 * items[item.id]
for item in self.solver_run.unbundled_items()
]
) == self.solver_run.total_form_items, 'Total bundle form items for form'
# dynamic constraints
problem = solver_helper.build_constraints(self.solver_run, problem,
items, bundles)
# multi-objective constraints
logging.info('Creating TIF and TCC constraints')
for target in self.solver_run.objective_function.tif_targets:
# tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
# for item in self.solver_run.items])
# problem_objection_functions.append(tif)
problem += lpSum([
bundle.tif(self.solver_run.irt_model, target.theta) *
bundles[bundle.id] for bundle in self.solver_run.bundles
] + [
item.iif(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) >= target.value - 8, f'max tif theta ({target.theta}) target value {target.value}'
problem += lpSum([
bundle.tif(self.solver_run.irt_model, target.theta) *
bundles[bundle.id] for bundle in self.solver_run.bundles
] + [
item.iif(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) <= target.value + 8, f'min tif theta ({target.theta}) target value {target.value}'
for target in self.solver_run.objective_function.tcc_targets:
# tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
# for item in self.solver_run.items])
# problem_objection_functions.append(tcc)
problem += lpSum([
bundle.trf(self.solver_run.irt_model, target.theta) *
bundles[bundle.id] for bundle in self.solver_run.bundles
] + [
item.irf(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) >= target.value - 20, f'max tcc theta ({target.theta}) target value {target.value}'
problem += lpSum([
bundle.trf(self.solver_run.irt_model, target.theta) *
bundles[bundle.id] for bundle in self.solver_run.bundles
] + [
item.irf(self.solver_run, target.theta) * items[item.id]
for item in self.solver_run.items
]) <= target.value + 20, f'min tcc theta ({target.theta}) target value {target.value}'
# solve problem
logging.info('Solving...')
problem.solve()
# problem.sequentialSolve(problem_objection_functions)
logging.info('Solved...generating form and adding to solution')
# add return items and create as a form
form_items = service_helper.solution_items(problem.variables(),
self.solver_run)
# add form to solution
solution.forms.append(
Form.create(form_items, self.solver_run,
LpStatus[problem.status]))
logging.info('Form generated and added to solution...')
# successfull form, increment
f += 1
logging.info('Solution Generated.')
return solution
def stream_to_s3_bucket(self, error=None):
self.file_name = f'{service_helper.key_to_uuid(self.key)}.csv'
solution_file = None
# setup writer buffer and write processed forms to file
buffer = io.StringIO()
if error:
logging.info('Streaming %s error response to s3 bucket - %s',
self.file_name, os.environ['S3_PROCESSED_BUCKET'])
solution_file = service_helper.error_to_file(buffer, error)
else:
logging.info('Streaming %s to s3 bucket - %s', self.file_name,
os.environ['S3_PROCESSED_BUCKET'])
solution_file = service_helper.solution_to_file(
buffer, self.solver_run.total_form_items, self.solution.forms)
# upload generated file to s3 and return result
return aws_helper.file_stream_upload(solution_file, self.file_name,
os.environ['S3_PROCESSED_BUCKET'])