the big format
This commit is contained in:
parent
19a37ab33a
commit
deb6b9014e
@ -3,37 +3,34 @@ import os
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
session = boto3.Session(
|
session = boto3.Session(
|
||||||
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
|
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
|
||||||
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']
|
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
|
||||||
)
|
|
||||||
|
|
||||||
s3 = session.resource('s3', region_name=os.environ['AWS_REGION'])
|
s3 = session.resource('s3', region_name=os.environ['AWS_REGION'])
|
||||||
sqs = session.client('sqs', region_name=os.environ['AWS_REGION'])
|
sqs = session.client('sqs', region_name=os.environ['AWS_REGION'])
|
||||||
|
|
||||||
|
|
||||||
def get_key_from_message(body):
|
def get_key_from_message(body):
|
||||||
return body['Records'][0]['s3']['object']['key']
|
return body['Records'][0]['s3']['object']['key']
|
||||||
|
|
||||||
|
|
||||||
def get_bucket_from_message(body):
|
def get_bucket_from_message(body):
|
||||||
return body['Records'][0]['s3']['bucket']['name']
|
return body['Records'][0]['s3']['bucket']['name']
|
||||||
|
|
||||||
|
|
||||||
def get_object(key, bucket):
|
def get_object(key, bucket):
|
||||||
return s3.Object(
|
return s3.Object(bucket_name=bucket, key=key).get()['Body'].read()
|
||||||
bucket_name=bucket,
|
|
||||||
key=key
|
|
||||||
).get()['Body'].read()
|
|
||||||
|
|
||||||
def file_stream_upload(buffer, name, bucket):
|
def file_stream_upload(buffer, name, bucket):
|
||||||
return s3.Bucket(bucket).upload_fileobj(buffer, name)
|
return s3.Bucket(bucket).upload_fileobj(buffer, name)
|
||||||
|
|
||||||
|
|
||||||
def receive_message(queue, message_num=1, wait_time=1):
|
def receive_message(queue, message_num=1, wait_time=1):
|
||||||
return sqs.receive_message(
|
return sqs.receive_message(QueueUrl=queue,
|
||||||
QueueUrl=queue,
|
MaxNumberOfMessages=message_num,
|
||||||
MaxNumberOfMessages=message_num,
|
WaitTimeSeconds=wait_time)
|
||||||
WaitTimeSeconds=wait_time
|
|
||||||
)
|
|
||||||
|
|
||||||
def delete_message(queue, receipt):
|
def delete_message(queue, receipt):
|
||||||
return sqs.delete_message(
|
return sqs.delete_message(QueueUrl=queue, ReceiptHandle=receipt)
|
||||||
QueueUrl=queue,
|
|
||||||
ReceiptHandle=receipt
|
|
||||||
)
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
|
|
||||||
|
|
||||||
def file_stream_reader(f):
|
def file_stream_reader(f):
|
||||||
return csv.reader(io.StringIO(f.read().decode('ascii')))
|
return csv.reader(io.StringIO(f.read().decode('ascii')))
|
||||||
|
@ -3,98 +3,120 @@ import io
|
|||||||
import re
|
import re
|
||||||
from tokenize import String
|
from tokenize import String
|
||||||
|
|
||||||
|
|
||||||
def items_csv_to_dict(items_csv_reader, solver_run):
|
def items_csv_to_dict(items_csv_reader, solver_run):
|
||||||
items = []
|
items = []
|
||||||
headers = []
|
headers = []
|
||||||
|
|
||||||
# get headers and items
|
# get headers and items
|
||||||
for key, row in enumerate(items_csv_reader):
|
for key, row in enumerate(items_csv_reader):
|
||||||
if key == 0:
|
if key == 0:
|
||||||
headers = row
|
headers = row
|
||||||
else:
|
else:
|
||||||
item = { 'attributes': [] }
|
item = {'attributes': []}
|
||||||
|
|
||||||
# ensure that the b param is formatted correctly
|
# ensure that the b param is formatted correctly
|
||||||
if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]):
|
if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]):
|
||||||
for key, col in enumerate(headers):
|
for key, col in enumerate(headers):
|
||||||
if solver_run.irt_model.formatted_b_param() == col:
|
if solver_run.irt_model.formatted_b_param() == col:
|
||||||
value = float(row[key])
|
value = float(row[key])
|
||||||
item['b_param'] = value
|
item['b_param'] = value
|
||||||
elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle':
|
elif solver_run.get_constraint(
|
||||||
if row[key]:
|
col) and solver_run.get_constraint(
|
||||||
item[solver_run.get_constraint(col).reference_attribute.id] = row[key]
|
col).reference_attribute.type == 'bundle':
|
||||||
elif solver_run.get_constraint(col):
|
if row[key]:
|
||||||
constraint = solver_run.get_constraint(col)
|
item[solver_run.get_constraint(
|
||||||
item['attributes'].append({
|
col).reference_attribute.id] = row[key]
|
||||||
'id': col,
|
elif solver_run.get_constraint(col):
|
||||||
'value': row[key],
|
constraint = solver_run.get_constraint(col)
|
||||||
'type': constraint.reference_attribute.type
|
item['attributes'].append({
|
||||||
})
|
'id':
|
||||||
else:
|
col,
|
||||||
if row[key]:
|
'value':
|
||||||
item[col] = row[key]
|
row[key],
|
||||||
|
'type':
|
||||||
|
constraint.reference_attribute.type
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
if row[key]:
|
||||||
|
item[col] = row[key]
|
||||||
|
|
||||||
items.append(item)
|
items.append(item)
|
||||||
|
|
||||||
|
return items
|
||||||
|
|
||||||
return items
|
|
||||||
|
|
||||||
def solution_to_file(buffer, total_form_items, forms):
|
def solution_to_file(buffer, total_form_items, forms):
|
||||||
wr = csv.writer(buffer, dialect='excel', delimiter=',')
|
wr = csv.writer(buffer, dialect='excel', delimiter=',')
|
||||||
|
|
||||||
# write header row for first row utilizing the total items all forms will have
|
# write header row for first row utilizing the total items all forms will have
|
||||||
# fill the rows with the targets and cut score then the items
|
# fill the rows with the targets and cut score then the items
|
||||||
header = ['status']
|
header = ['status']
|
||||||
|
|
||||||
for result in forms[0].tif_results:
|
for result in forms[0].tif_results:
|
||||||
header += [f'tif @ {round(result.theta, 2)}']
|
header += [f'tif @ {round(result.theta, 2)}']
|
||||||
|
|
||||||
for result in forms[0].tcc_results:
|
for result in forms[0].tcc_results:
|
||||||
header += [f'tcc @ {round(result.theta, 2)}']
|
header += [f'tcc @ {round(result.theta, 2)}']
|
||||||
|
|
||||||
header += ['cut score'] + [x + 1 for x in range(total_form_items)]
|
header += ['cut score'] + [x + 1 for x in range(total_form_items)]
|
||||||
wr.writerow(header)
|
wr.writerow(header)
|
||||||
|
|
||||||
# add each form as row to processed csv
|
# add each form as row to processed csv
|
||||||
for form in forms:
|
for form in forms:
|
||||||
row = [form.status]
|
row = [form.status]
|
||||||
|
|
||||||
for result in form.tif_results + form.tcc_results:
|
for result in form.tif_results + form.tcc_results:
|
||||||
row += [f'target - {result.value}\nresult - {round(result.result, 2)}']
|
row += [
|
||||||
|
f'target - {result.value}\nresult - {round(result.result, 2)}'
|
||||||
|
]
|
||||||
|
|
||||||
# provide generated items and cut score
|
# provide generated items and cut score
|
||||||
row += [round(form.cut_score, 2)] + [item.id for item in form.items]
|
row += [round(form.cut_score, 2)] + [item.id for item in form.items]
|
||||||
wr.writerow(row)
|
wr.writerow(row)
|
||||||
|
|
||||||
buff2 = io.BytesIO(buffer.getvalue().encode())
|
buff2 = io.BytesIO(buffer.getvalue().encode())
|
||||||
|
|
||||||
|
return buff2
|
||||||
|
|
||||||
return buff2
|
|
||||||
|
|
||||||
def error_to_file(buffer, error):
|
def error_to_file(buffer, error):
|
||||||
wr = csv.writer(buffer, dialect='excel', delimiter=',')
|
wr = csv.writer(buffer, dialect='excel', delimiter=',')
|
||||||
wr.writerow(['status'])
|
wr.writerow(['status'])
|
||||||
wr.writerow([error.args[0]])
|
wr.writerow([error.args[0]])
|
||||||
|
|
||||||
|
return io.BytesIO(buffer.getvalue().encode())
|
||||||
|
|
||||||
return io.BytesIO(buffer.getvalue().encode())
|
|
||||||
|
|
||||||
def key_to_uuid(key):
|
def key_to_uuid(key):
|
||||||
return re.split("_", key)[0]
|
return re.split("_", key)[0]
|
||||||
|
|
||||||
|
|
||||||
def solution_items(variables, solver_run):
|
def solution_items(variables, solver_run):
|
||||||
form_items = []
|
form_items = []
|
||||||
|
|
||||||
for v in variables:
|
for v in variables:
|
||||||
if v.varValue > 0 and 'Item_' in v.name:
|
if v.varValue > 0:
|
||||||
item_id = v.name.replace('Item_', '')
|
if 'Item_' in v.name:
|
||||||
item = solver_run.get_item(item_id)
|
item_id = v.name.replace('Item_', '')
|
||||||
# add item to list and then remove from master item list
|
item = solver_run.get_item(item_id)
|
||||||
form_items.append(item)
|
# add item to list and then remove from master item list
|
||||||
|
if item: form_items.append(item)
|
||||||
|
elif 'Bundle_' in v.name:
|
||||||
|
bundle_id = v.name.replace('Bundle_', '')
|
||||||
|
bundle = solver_run.get_bundle(bundle_id)
|
||||||
|
|
||||||
|
if bundle:
|
||||||
|
for item in bundle.items:
|
||||||
|
if item: form_items.append(item)
|
||||||
|
|
||||||
|
return form_items
|
||||||
|
|
||||||
return form_items
|
|
||||||
|
|
||||||
# probably a better place for this...
|
# probably a better place for this...
|
||||||
def is_float(element: String) -> bool:
|
def is_float(element: String) -> bool:
|
||||||
try:
|
try:
|
||||||
float(element)
|
float(element)
|
||||||
return True
|
return True
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
|
@ -9,75 +9,95 @@ from models.item import Item
|
|||||||
|
|
||||||
from lib.errors.item_generation_error import ItemGenerationError
|
from lib.errors.item_generation_error import ItemGenerationError
|
||||||
|
|
||||||
def build_constraints(solver_run: SolverRun, problem: LpProblem, items: list[Item]) -> LpProblem:
|
|
||||||
logging.info('Creating Constraints...')
|
|
||||||
|
|
||||||
try:
|
def build_constraints(solver_run: SolverRun, problem: LpProblem,
|
||||||
total_form_items = solver_run.total_form_items
|
items: list[Item], bundles: list[Bundle]) -> LpProblem:
|
||||||
constraints = solver_run.constraints
|
logging.info('Creating Constraints...')
|
||||||
|
|
||||||
for constraint in constraints:
|
try:
|
||||||
attribute = constraint.reference_attribute
|
total_form_items = solver_run.total_form_items
|
||||||
min = constraint.minimum
|
constraints = solver_run.constraints
|
||||||
max = constraint.maximum
|
|
||||||
|
|
||||||
if attribute.type == 'metadata':
|
for constraint in constraints:
|
||||||
logging.info('Metadata Constraint Generating...')
|
attribute = constraint.reference_attribute
|
||||||
con = dict(zip([item.id for item in solver_run.items],
|
min = constraint.minimum
|
||||||
[item.attribute_exists(attribute)
|
max = constraint.maximum
|
||||||
for item in solver_run.items]))
|
|
||||||
problem += lpSum([con[item.id]
|
|
||||||
* items[item.id]
|
|
||||||
for item in solver_run.items]) >= round(total_form_items * (min / 100)), f'{attribute.id} - {attribute.value} - min'
|
|
||||||
problem += lpSum([con[item.id]
|
|
||||||
* items[item.id]
|
|
||||||
for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
|
|
||||||
elif attribute.type == 'bundle':
|
|
||||||
logging.info('Bundles Constraint Generating...')
|
|
||||||
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
|
|
||||||
if solver_run.bundles != None:
|
|
||||||
total_bundle_items = 0
|
|
||||||
selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
|
|
||||||
|
|
||||||
for bundle in selected_bundles:
|
if attribute.type == 'metadata':
|
||||||
con = dict(zip([item.id for item in solver_run.items],
|
logging.info('Metadata Constraint Generating...')
|
||||||
[(getattr(item, bundle.type, False) == bundle.id)
|
con = dict(
|
||||||
for item in solver_run.items]))
|
zip([item.id for item in solver_run.items], [
|
||||||
problem += lpSum([con[item.id]
|
item.attribute_exists(attribute)
|
||||||
* items[item.id]
|
for item in solver_run.items
|
||||||
for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
|
]))
|
||||||
total_bundle_items += bundle.count
|
problem += lpSum([
|
||||||
|
con[item.id] * items[item.id] for item in solver_run.items
|
||||||
|
]) >= round(
|
||||||
|
total_form_items *
|
||||||
|
(min / 100)), f'{attribute.id} - {attribute.value} - min'
|
||||||
|
problem += lpSum([
|
||||||
|
con[item.id] * items[item.id] for item in solver_run.items
|
||||||
|
]) <= round(
|
||||||
|
total_form_items *
|
||||||
|
(max / 100)), f'{attribute.id} - {attribute.value} - max'
|
||||||
|
elif attribute.type == 'bundle':
|
||||||
|
logging.info('Bundles Constraint Generating...')
|
||||||
|
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
|
||||||
|
if solver_run.bundles != None:
|
||||||
|
# make sure the total bundles used in generated form is limited between min-max set
|
||||||
|
problem += lpSum([
|
||||||
|
bundles[bundle.id] for bundle in solver_run.bundles
|
||||||
|
]) == randint(int(constraint.minimum),
|
||||||
|
int(constraint.maximum))
|
||||||
|
# total_bundle_items = 0
|
||||||
|
# selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
|
||||||
|
|
||||||
# make sure all other items added to the form
|
# for bundle in selected_bundles:
|
||||||
# are not a part of any bundle
|
# con = dict(zip([item.id for item in solver_run.items],
|
||||||
# currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
|
# [(getattr(item, bundle.type, False) == bundle.id)
|
||||||
con = dict(zip([item.id for item in solver_run.items],
|
# for item in solver_run.items]))
|
||||||
[(getattr(item, attribute.id, None) == None)
|
# problem += lpSum([con[item.id]
|
||||||
for item in solver_run.items]))
|
# * items[item.id]
|
||||||
problem += lpSum([con[item.id]
|
# for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
|
||||||
* items[item.id]
|
# total_bundle_items += bundle.count
|
||||||
for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
|
|
||||||
|
|
||||||
logging.info('Constraints Created...')
|
# # make sure all other items added to the form
|
||||||
return problem
|
# # are not a part of any bundle
|
||||||
except ValueError as error:
|
# # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
|
||||||
logging.error(error)
|
# con = dict(zip([item.id for item in solver_run.items],
|
||||||
raise ItemGenerationError("Bundle min and/or max larger than bundle amount provided", error.args[0])
|
# [(getattr(item, attribute.id, None) == None)
|
||||||
|
# for item in solver_run.items]))
|
||||||
|
# problem += lpSum([con[item.id]
|
||||||
|
# * items[item.id]
|
||||||
|
# for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
|
||||||
|
|
||||||
def get_random_bundles(total_form_items: int, bundles: list[Bundle], min: int , max: int, found_bundles = False) -> list[Bundle]:
|
logging.info('Constraints Created...')
|
||||||
selected_bundles = None
|
return problem
|
||||||
total_bundle_items = 0
|
except ValueError as error:
|
||||||
total_bundles = randint(min, max)
|
logging.error(error)
|
||||||
logging.info(f'Selecting Bundles (total of {total_bundles})...')
|
raise ItemGenerationError(
|
||||||
|
"Bundle min and/or max larger than bundle amount provided",
|
||||||
|
error.args[0])
|
||||||
|
|
||||||
while found_bundles == False:
|
|
||||||
selected_bundles = sample(bundles, total_bundles)
|
|
||||||
total_bundle_items = sum(bundle.count for bundle in selected_bundles)
|
|
||||||
|
|
||||||
if total_bundle_items <= total_form_items:
|
def get_random_bundles(total_form_items: int,
|
||||||
found_bundles = True
|
bundles: list[Bundle],
|
||||||
|
min: int,
|
||||||
|
max: int,
|
||||||
|
found_bundles=False) -> list[Bundle]:
|
||||||
|
selected_bundles = None
|
||||||
|
total_bundle_items = 0
|
||||||
|
total_bundles = randint(min, max)
|
||||||
|
logging.info(f'Selecting Bundles (total of {total_bundles})...')
|
||||||
|
|
||||||
if found_bundles == True:
|
while found_bundles == False:
|
||||||
return selected_bundles
|
selected_bundles = sample(bundles, total_bundles)
|
||||||
else:
|
total_bundle_items = sum(bundle.count for bundle in selected_bundles)
|
||||||
return get_random_bundles(total_form_items, total_bundles - 1, bundles)
|
|
||||||
|
if total_bundle_items <= total_form_items:
|
||||||
|
found_bundles = True
|
||||||
|
|
||||||
|
if found_bundles == True:
|
||||||
|
return selected_bundles
|
||||||
|
else:
|
||||||
|
return get_random_bundles(total_form_items, total_bundles - 1, bundles)
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
import io
|
import io
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
|
|
||||||
def raw_to_tar(raw_object):
|
def raw_to_tar(raw_object):
|
||||||
tarball = io.BytesIO(raw_object)
|
tarball = io.BytesIO(raw_object)
|
||||||
return tarfile.open(fileobj=tarball, mode='r:gz')
|
return tarfile.open(fileobj=tarball, mode='r:gz')
|
||||||
|
|
||||||
|
|
||||||
def extract_file_from_tar(tar, file_name):
|
def extract_file_from_tar(tar, file_name):
|
||||||
return tar.extractfile(tar.getmember(file_name))
|
return tar.extractfile(tar.getmember(file_name))
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
class ItemGenerationError(Exception):
|
class ItemGenerationError(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -3,22 +3,28 @@ import logging
|
|||||||
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
|
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
|
||||||
from lib.errors.item_generation_error import ItemGenerationError
|
from lib.errors.item_generation_error import ItemGenerationError
|
||||||
|
|
||||||
class ItemInformationFunction():
|
|
||||||
def __init__(self, irt_model):
|
|
||||||
self.model_data = irt_model
|
|
||||||
|
|
||||||
# determines the amount of information for a given question at a given theta (ability level)
|
class ItemInformationFunction():
|
||||||
# further detailed on page 161, equation 4 here:
|
|
||||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
def __init__(self, irt_model):
|
||||||
def calculate(self, **kwargs):
|
self.model_data = irt_model
|
||||||
try:
|
|
||||||
if self.model_data.model == '3PL':
|
# determines the amount of information for a given question at a given theta (ability level)
|
||||||
p = ThreeParameterLogistic(self.model_data, kwargs).result()
|
# further detailed on page 161, equation 4 here:
|
||||||
q = 1 - p
|
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||||
return (self.model_data.a_param * q * (p - self.model_data.c_param)**2) / (p * ((1 - self.model_data.c_param)**2))
|
def calculate(self, **kwargs):
|
||||||
else:
|
try:
|
||||||
# potentially error out
|
if self.model_data.model == '3PL':
|
||||||
raise ItemGenerationError("irt model not supported or provided")
|
p = ThreeParameterLogistic(self.model_data, kwargs).result()
|
||||||
except ZeroDivisionError as error:
|
q = 1 - p
|
||||||
logging.error(error)
|
return (self.model_data.a_param * q *
|
||||||
raise ItemGenerationError("params not well formatted", error.args[0])
|
(p - self.model_data.c_param)**2) / (p * (
|
||||||
|
(1 - self.model_data.c_param)**2))
|
||||||
|
else:
|
||||||
|
# potentially error out
|
||||||
|
raise ItemGenerationError(
|
||||||
|
"irt model not supported or provided")
|
||||||
|
except ZeroDivisionError as error:
|
||||||
|
logging.error(error)
|
||||||
|
raise ItemGenerationError("params not well formatted",
|
||||||
|
error.args[0])
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
|
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
|
||||||
from lib.errors.item_generation_error import ItemGenerationError
|
from lib.errors.item_generation_error import ItemGenerationError
|
||||||
|
|
||||||
class ItemResponseFunction():
|
|
||||||
def __init__(self, irt_model):
|
|
||||||
self.model_data = irt_model
|
|
||||||
|
|
||||||
def calculate(self, **kwargs):
|
class ItemResponseFunction():
|
||||||
if self.model_data.model == '3PL':
|
|
||||||
return ThreeParameterLogistic(self.model_data, kwargs).result()
|
def __init__(self, irt_model):
|
||||||
else:
|
self.model_data = irt_model
|
||||||
raise ItemGenerationError("irt model not supported or provided")
|
|
||||||
|
def calculate(self, **kwargs):
|
||||||
|
if self.model_data.model == '3PL':
|
||||||
|
return ThreeParameterLogistic(self.model_data, kwargs).result()
|
||||||
|
else:
|
||||||
|
raise ItemGenerationError("irt model not supported or provided")
|
||||||
|
@ -1,16 +1,18 @@
|
|||||||
class ThreeParameterLogistic:
|
class ThreeParameterLogistic:
|
||||||
def __init__(self, model_params, kwargs):
|
|
||||||
self.model_params = model_params
|
|
||||||
# check if exists, if not error out
|
|
||||||
self.b_param = kwargs['b_param']
|
|
||||||
self.e = 2.71828
|
|
||||||
self.theta = kwargs['theta']
|
|
||||||
|
|
||||||
# contains the primary 3pl function, determining the probably of an inidividual
|
def __init__(self, model_params, kwargs):
|
||||||
# that an individual at a certain theta would get a particular question correct
|
self.model_params = model_params
|
||||||
# detailed further on page 161, equation 1 here:
|
# check if exists, if not error out
|
||||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
self.b_param = kwargs['b_param']
|
||||||
def result(self):
|
self.e = 2.71828
|
||||||
a = self.model_params.a_param
|
self.theta = kwargs['theta']
|
||||||
c = self.model_params.c_param
|
|
||||||
return c + (1 - c) * (1 / (1 + self.e**(-a * (self.theta - self.b_param))))
|
# contains the primary 3pl function, determining the probably of an inidividual
|
||||||
|
# that an individual at a certain theta would get a particular question correct
|
||||||
|
# detailed further on page 161, equation 1 here:
|
||||||
|
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||||
|
def result(self):
|
||||||
|
a = self.model_params.a_param
|
||||||
|
c = self.model_params.c_param
|
||||||
|
return c + (1 - c) * (1 / (1 + self.e**(-a *
|
||||||
|
(self.theta - self.b_param))))
|
||||||
|
@ -1,19 +1,22 @@
|
|||||||
from lib.irt.item_information_function import ItemInformationFunction
|
from lib.irt.item_information_function import ItemInformationFunction
|
||||||
|
|
||||||
|
|
||||||
class TestInformationFunction():
|
class TestInformationFunction():
|
||||||
def __init__(self, irt_model):
|
|
||||||
self.irt_model = irt_model
|
|
||||||
self.iif = ItemInformationFunction(irt_model)
|
|
||||||
|
|
||||||
# determins the amount of information
|
def __init__(self, irt_model):
|
||||||
# at a certain theta (ability level) of the sum of a question set correct
|
self.irt_model = irt_model
|
||||||
# detailed further on page 166, equation 4 here:
|
self.iif = ItemInformationFunction(irt_model)
|
||||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
|
||||||
def calculate(self, items, **kwargs):
|
|
||||||
sum = 0
|
|
||||||
|
|
||||||
for item in items:
|
# determins the amount of information
|
||||||
result = self.iif.calculate(b_param=item.b_param, theta=kwargs['theta'])
|
# at a certain theta (ability level) of the sum of a question set correct
|
||||||
sum += result
|
# detailed further on page 166, equation 4 here:
|
||||||
|
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||||
|
def calculate(self, items, **kwargs):
|
||||||
|
sum = 0
|
||||||
|
|
||||||
return sum
|
for item in items:
|
||||||
|
result = self.iif.calculate(b_param=item.b_param,
|
||||||
|
theta=kwargs['theta'])
|
||||||
|
sum += result
|
||||||
|
|
||||||
|
return sum
|
||||||
|
@ -1,20 +1,23 @@
|
|||||||
from lib.irt.item_response_function import ItemResponseFunction
|
from lib.irt.item_response_function import ItemResponseFunction
|
||||||
|
|
||||||
|
|
||||||
# otherwise known as the Test Characteristic Curve (TCC)
|
# otherwise known as the Test Characteristic Curve (TCC)
|
||||||
class TestResponseFunction():
|
class TestResponseFunction():
|
||||||
def __init__(self, irt_model):
|
|
||||||
self.irt_model = irt_model
|
|
||||||
self.irf = ItemResponseFunction(irt_model)
|
|
||||||
|
|
||||||
# determins the probably of an inidividual
|
def __init__(self, irt_model):
|
||||||
# at a certain theta (ability level) would get a sum of questions correct
|
self.irt_model = irt_model
|
||||||
# detailed further on page 166, equation 3 here:
|
self.irf = ItemResponseFunction(irt_model)
|
||||||
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
|
||||||
def calculate(self, items, **kwargs):
|
|
||||||
sum = 0
|
|
||||||
|
|
||||||
for item in items:
|
# determins the probably of an inidividual
|
||||||
result = self.irf.calculate(b_param=item.b_param, theta=kwargs['theta'])
|
# at a certain theta (ability level) would get a sum of questions correct
|
||||||
sum += result
|
# detailed further on page 166, equation 3 here:
|
||||||
|
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5978482/pdf/10.1177_0146621615613308.pdf
|
||||||
|
def calculate(self, items, **kwargs):
|
||||||
|
sum = 0
|
||||||
|
|
||||||
return sum
|
for item in items:
|
||||||
|
result = self.irf.calculate(b_param=item.b_param,
|
||||||
|
theta=kwargs['theta'])
|
||||||
|
sum += result
|
||||||
|
|
||||||
|
return sum
|
||||||
|
45
app/main.py
45
app/main.py
@ -6,31 +6,36 @@ from helpers import aws_helper
|
|||||||
from daemonize import Daemonize
|
from daemonize import Daemonize
|
||||||
from sqs_listener import SqsListener
|
from sqs_listener import SqsListener
|
||||||
|
|
||||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s %(asctime)s - %(message)s")
|
logging.basicConfig(stream=sys.stdout,
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(levelname)s %(asctime)s - %(message)s")
|
||||||
|
|
||||||
|
|
||||||
class ServiceListener(SqsListener):
|
class ServiceListener(SqsListener):
|
||||||
def handle_message(self, body, attributes, messages_attributes):
|
|
||||||
# gather/manage/process data based on the particular needs
|
|
||||||
logging.info('Incoming message: %s', body)
|
|
||||||
|
|
||||||
service = LoftService(body)
|
def handle_message(self, body, attributes, messages_attributes):
|
||||||
service.process()
|
# gather/manage/process data based on the particular needs
|
||||||
|
logging.info('Incoming message: %s', body)
|
||||||
|
|
||||||
|
service = LoftService(body)
|
||||||
|
service.process()
|
||||||
|
|
||||||
|
logging.info('Process complete for %s', service.file_name)
|
||||||
|
|
||||||
logging.info('Process complete for %s', service.file_name)
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
logging.info('Starting Solver Service (v1.1.2)...')
|
logging.info('Starting Solver Service (v1.1.2)...')
|
||||||
listener = ServiceListener(
|
listener = ServiceListener(
|
||||||
os.environ['SQS_QUEUE'],
|
os.environ['SQS_QUEUE'],
|
||||||
region_name=os.environ['AWS_REGION'],
|
region_name=os.environ['AWS_REGION'],
|
||||||
aws_access_key=os.environ['AWS_ACCESS_KEY_ID'],
|
aws_access_key=os.environ['AWS_ACCESS_KEY_ID'],
|
||||||
aws_secret_key=os.environ['AWS_SECRET_ACCESS_KEY'],
|
aws_secret_key=os.environ['AWS_SECRET_ACCESS_KEY'],
|
||||||
queue_url=os.environ['SQS_QUEUE']
|
queue_url=os.environ['SQS_QUEUE'])
|
||||||
)
|
listener.listen()
|
||||||
listener.listen()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
myname=os.path.basename(sys.argv[0])
|
myname = os.path.basename(sys.argv[0])
|
||||||
pidfile='/tmp/%s' % myname
|
pidfile = '/tmp/%s' % myname
|
||||||
daemon = Daemonize(app=myname,pid=pidfile, action=main, foreground=True)
|
daemon = Daemonize(app=myname, pid=pidfile, action=main, foreground=True)
|
||||||
daemon.start()
|
daemon.start()
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import List, Optional, Dict
|
from typing import List, Optional, Dict
|
||||||
|
|
||||||
|
|
||||||
class AdvancedOptions(BaseModel):
|
class AdvancedOptions(BaseModel):
|
||||||
linearity_check: Optional[bool]
|
linearity_check: Optional[bool]
|
||||||
show_progress: Optional[bool]
|
show_progress: Optional[bool]
|
||||||
max_solution_time: Optional[int]
|
max_solution_time: Optional[int]
|
||||||
brand_bound_tolerance: Optional[float]
|
brand_bound_tolerance: Optional[float]
|
||||||
max_forms: Optional[int]
|
max_forms: Optional[int]
|
||||||
precision: Optional[float]
|
precision: Optional[float]
|
||||||
extra_param_range: Optional[List[Dict]]
|
extra_param_range: Optional[List[Dict]]
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class Attribute(BaseModel):
|
class Attribute(BaseModel):
|
||||||
value: Optional[str]
|
value: Optional[str]
|
||||||
type: Optional[str]
|
type: Optional[str]
|
||||||
id: str
|
id: str
|
||||||
|
@ -1,6 +1,23 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from lib.irt.test_information_function import TestInformationFunction
|
||||||
|
from lib.irt.test_response_function import TestResponseFunction
|
||||||
|
|
||||||
|
from models.item import Item
|
||||||
|
from models.irt_model import IRTModel
|
||||||
|
|
||||||
|
|
||||||
class Bundle(BaseModel):
|
class Bundle(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
count: int
|
count: int
|
||||||
type: str
|
items: List[Item]
|
||||||
|
type: str
|
||||||
|
|
||||||
|
def tif(self, irt_model: IRTModel, theta: float) -> float:
|
||||||
|
return TestInformationFunction(irt_model).calculate(self.items,
|
||||||
|
theta=theta)
|
||||||
|
|
||||||
|
def trf(self, irt_model: IRTModel, theta: float) -> float:
|
||||||
|
return TestResponseFunction(irt_model).calculate(self.items,
|
||||||
|
theta=theta)
|
||||||
|
@ -2,7 +2,8 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from models.attribute import Attribute
|
from models.attribute import Attribute
|
||||||
|
|
||||||
|
|
||||||
class Constraint(BaseModel):
|
class Constraint(BaseModel):
|
||||||
reference_attribute: Attribute
|
reference_attribute: Attribute
|
||||||
minimum: float
|
minimum: float
|
||||||
maximum: float
|
maximum: float
|
||||||
|
@ -8,19 +8,20 @@ from models.target import Target
|
|||||||
|
|
||||||
from lib.irt.test_response_function import TestResponseFunction
|
from lib.irt.test_response_function import TestResponseFunction
|
||||||
|
|
||||||
class Form(BaseModel):
|
|
||||||
items: List[Item]
|
|
||||||
cut_score: float
|
|
||||||
tif_results: List[Target]
|
|
||||||
tcc_results: List[Target]
|
|
||||||
status: str = 'Not Optimized'
|
|
||||||
|
|
||||||
@classmethod
|
class Form(BaseModel):
|
||||||
def create(cls, items, solver_run, status):
|
items: List[Item]
|
||||||
return cls(
|
cut_score: float
|
||||||
items=items,
|
tif_results: List[Target]
|
||||||
cut_score=TestResponseFunction(solver_run.irt_model).calculate(items, theta=solver_run.theta_cut_score),
|
tcc_results: List[Target]
|
||||||
tif_results=irt_helper.generate_tif_results(items, solver_run),
|
status: str = 'Not Optimized'
|
||||||
tcc_results=irt_helper.generate_tcc_results(items, solver_run),
|
|
||||||
status=status
|
@classmethod
|
||||||
)
|
def create(cls, items, solver_run, status):
|
||||||
|
return cls(
|
||||||
|
items=items,
|
||||||
|
cut_score=TestResponseFunction(solver_run.irt_model).calculate(
|
||||||
|
items, theta=solver_run.theta_cut_score),
|
||||||
|
tif_results=irt_helper.generate_tif_results(items, solver_run),
|
||||||
|
tcc_results=irt_helper.generate_tcc_results(items, solver_run),
|
||||||
|
status=status)
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
|
|
||||||
class IRTModel(BaseModel):
|
class IRTModel(BaseModel):
|
||||||
a_param: float
|
a_param: float
|
||||||
b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
|
b_param: Dict = {"schema_bson_id": str, "field_bson_id": str}
|
||||||
c_param: float
|
c_param: float
|
||||||
model: str
|
model: str
|
||||||
|
|
||||||
|
def formatted_b_param(self):
|
||||||
def formatted_b_param(self):
|
return self.b_param['schema_bson_id'] + '-' + self.b_param[
|
||||||
return self.b_param['schema_bson_id'] + '-' + self.b_param['field_bson_id']
|
'field_bson_id']
|
||||||
|
@ -6,27 +6,32 @@ from models.attribute import Attribute
|
|||||||
from lib.irt.item_response_function import ItemResponseFunction
|
from lib.irt.item_response_function import ItemResponseFunction
|
||||||
from lib.irt.item_information_function import ItemInformationFunction
|
from lib.irt.item_information_function import ItemInformationFunction
|
||||||
|
|
||||||
|
|
||||||
class Item(BaseModel):
|
class Item(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
passage_id: Optional[int]
|
passage_id: Optional[int]
|
||||||
workflow_state: Optional[str]
|
workflow_state: Optional[str]
|
||||||
attributes: List[Attribute]
|
attributes: List[Attribute]
|
||||||
b_param: float = 0.00
|
b_param: float = 0.00
|
||||||
|
|
||||||
def iif(self, solver_run, theta):
|
def iif(self, solver_run, theta):
|
||||||
return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta)
|
return ItemInformationFunction(solver_run.irt_model).calculate(
|
||||||
|
b_param=self.b_param, theta=theta)
|
||||||
|
|
||||||
def irf(self, solver_run, theta):
|
def irf(self, solver_run, theta):
|
||||||
return ItemResponseFunction(solver_run.irt_model).calculate(b_param=self.b_param,theta=theta)
|
return ItemResponseFunction(solver_run.irt_model).calculate(
|
||||||
|
b_param=self.b_param, theta=theta)
|
||||||
|
|
||||||
def get_attribute(self, ref_attribute):
|
def get_attribute(self, ref_attribute):
|
||||||
for attribute in self.attributes:
|
for attribute in self.attributes:
|
||||||
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
|
if attribute.id == ref_attribute.id and attribute.value.lower(
|
||||||
return attribute.value
|
) == ref_attribute.value.lower():
|
||||||
return False
|
return attribute.value
|
||||||
|
return False
|
||||||
|
|
||||||
def attribute_exists(self, ref_attribute):
|
def attribute_exists(self, ref_attribute):
|
||||||
for attribute in self.attributes:
|
for attribute in self.attributes:
|
||||||
if attribute.id == ref_attribute.id and attribute.value.lower() == ref_attribute.value.lower():
|
if attribute.id == ref_attribute.id and attribute.value.lower(
|
||||||
return True
|
) == ref_attribute.value.lower():
|
||||||
return False
|
return True
|
||||||
|
return False
|
||||||
|
@ -3,11 +3,12 @@ from typing import Dict, List, AnyStr
|
|||||||
|
|
||||||
from models.target import Target
|
from models.target import Target
|
||||||
|
|
||||||
|
|
||||||
class ObjectiveFunction(BaseModel):
|
class ObjectiveFunction(BaseModel):
|
||||||
# minimizing tif/tcc target value is only option currently
|
# minimizing tif/tcc target value is only option currently
|
||||||
# as we add more we can build this out to be more dynamic
|
# as we add more we can build this out to be more dynamic
|
||||||
# likely with models representing each objective function type
|
# likely with models representing each objective function type
|
||||||
tif_targets: List[Target]
|
tif_targets: List[Target]
|
||||||
tcc_targets: List[Target]
|
tcc_targets: List[Target]
|
||||||
objective: AnyStr = "minimize"
|
objective: AnyStr = "minimize"
|
||||||
weight: Dict = {'tif': 1, 'tcc': 1}
|
weight: Dict = {'tif': 1, 'tcc': 1}
|
||||||
|
@ -3,6 +3,7 @@ from typing import List
|
|||||||
|
|
||||||
from models.form import Form
|
from models.form import Form
|
||||||
|
|
||||||
|
|
||||||
class Solution(BaseModel):
|
class Solution(BaseModel):
|
||||||
response_id: int
|
response_id: int
|
||||||
forms: List[Form]
|
forms: List[Form]
|
||||||
|
@ -10,64 +10,87 @@ from models.bundle import Bundle
|
|||||||
from models.objective_function import ObjectiveFunction
|
from models.objective_function import ObjectiveFunction
|
||||||
from models.advanced_options import AdvancedOptions
|
from models.advanced_options import AdvancedOptions
|
||||||
|
|
||||||
|
|
||||||
class SolverRun(BaseModel):
|
class SolverRun(BaseModel):
|
||||||
items: List[Item] = []
|
items: List[Item] = []
|
||||||
bundles: Optional[Bundle]
|
bundles: list[Bundle] = []
|
||||||
constraints: List[Constraint]
|
constraints: List[Constraint]
|
||||||
irt_model: IRTModel
|
irt_model: IRTModel
|
||||||
objective_function: ObjectiveFunction
|
objective_function: ObjectiveFunction
|
||||||
total_form_items: int
|
total_form_items: int
|
||||||
total_forms: int = 1
|
total_forms: int = 1
|
||||||
theta_cut_score: float = 0.00
|
theta_cut_score: float = 0.00
|
||||||
advanced_options: Optional[AdvancedOptions]
|
advanced_options: Optional[AdvancedOptions]
|
||||||
engine: str
|
engine: str
|
||||||
|
|
||||||
def get_item(self, item_id):
|
def get_item(self, item_id: int) -> Item or None:
|
||||||
for item in self.items:
|
for item in self.items:
|
||||||
if str(item.id) == item_id:
|
if str(item.id) == item_id:
|
||||||
return item
|
return item
|
||||||
return False
|
|
||||||
|
|
||||||
def remove_items(self, items):
|
def get_bundle(self, bundle_id: int) -> Bundle or None:
|
||||||
self.items = [item for item in self.items if item not in items]
|
for bundle in self.bundles:
|
||||||
return True
|
if str(bundle.id) == bundle_id:
|
||||||
|
return bundle
|
||||||
|
|
||||||
def generate_bundles(self):
|
def get_constraint_by_type(self, type: str) -> Constraint or None:
|
||||||
logging.info('Generating Bundles...')
|
for constraint in self.constraints:
|
||||||
bundle_constraints = (constraint.reference_attribute for constraint in self.constraints if constraint.reference_attribute.type == 'bundle')
|
if type == constraint.reference_attribute.type:
|
||||||
|
return constraint
|
||||||
|
|
||||||
for bundle_constraint in bundle_constraints:
|
def remove_items(self, items: list[Item]) -> bool:
|
||||||
type_attribute = bundle_constraint.id
|
self.items = [item for item in self.items if item not in items]
|
||||||
|
return True
|
||||||
|
|
||||||
for item in self.items:
|
def generate_bundles(self):
|
||||||
attribute_id = getattr(item, type_attribute, None)
|
logging.info('Generating Bundles...')
|
||||||
|
# confirms bundle constraints exists
|
||||||
|
bundle_constraints = (
|
||||||
|
constraint.reference_attribute for constraint in self.constraints
|
||||||
|
if constraint.reference_attribute.type == 'bundle')
|
||||||
|
|
||||||
# make sure the item has said attribute
|
for bundle_constraint in bundle_constraints:
|
||||||
if attribute_id != None:
|
type_attribute = bundle_constraint.id
|
||||||
# if there are pre-existing bundles, add new or increment existing
|
|
||||||
# else create array with new bundle
|
|
||||||
if self.bundles != None:
|
|
||||||
# get index of the bundle in the bundles list if exists or None if it doesn't
|
|
||||||
bundle_index = next((index for (index, bundle) in enumerate(self.bundles) if bundle.id == attribute_id and bundle.type == type_attribute), None)
|
|
||||||
|
|
||||||
# if the index doesn't exist add the new bundle of whatever type
|
for item in self.items:
|
||||||
# else increment the count of the current bundle
|
attribute_id = getattr(item, type_attribute, None)
|
||||||
if bundle_index == None:
|
|
||||||
self.bundles.append(Bundle(
|
|
||||||
id=attribute_id,
|
|
||||||
count=1,
|
|
||||||
type=type_attribute
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
self.bundles[bundle_index].count += 1
|
|
||||||
else:
|
|
||||||
self.bundles = [Bundle(
|
|
||||||
id=attribute_id,
|
|
||||||
count=1,
|
|
||||||
type=type_attribute
|
|
||||||
)]
|
|
||||||
|
|
||||||
logging.info('Bundles Generated...')
|
# make sure the item has said attribute
|
||||||
|
if attribute_id != None:
|
||||||
|
# if there are pre-existing bundles, add new or increment existing
|
||||||
|
# else create array with new bundle
|
||||||
|
if self.bundles != None:
|
||||||
|
# get index of the bundle in the bundles list if exists or None if it doesn't
|
||||||
|
bundle_index = next(
|
||||||
|
(index
|
||||||
|
for (index, bundle) in enumerate(self.bundles)
|
||||||
|
if bundle.id == attribute_id
|
||||||
|
and bundle.type == type_attribute), None)
|
||||||
|
|
||||||
def get_constraint(self, name):
|
# if the index doesn't exist add the new bundle of whatever type
|
||||||
return next((constraint for constraint in self.constraints if constraint.reference_attribute.id == name), None)
|
# else increment the count of the current bundle
|
||||||
|
if bundle_index == None:
|
||||||
|
self.bundles.append(
|
||||||
|
Bundle(id=attribute_id,
|
||||||
|
count=1,
|
||||||
|
items=[item],
|
||||||
|
type=type_attribute))
|
||||||
|
else:
|
||||||
|
self.bundles[bundle_index].count += 1
|
||||||
|
self.bundles[bundle_index].items.append(item)
|
||||||
|
else:
|
||||||
|
self.bundles = [
|
||||||
|
Bundle(id=attribute_id,
|
||||||
|
count=1,
|
||||||
|
items=[item],
|
||||||
|
type=type_attribute)
|
||||||
|
]
|
||||||
|
|
||||||
|
logging.info('Bundles Generated...')
|
||||||
|
|
||||||
|
def get_constraint(self, name: str) -> Constraint:
|
||||||
|
return next((constraint for constraint in self.constraints
|
||||||
|
if constraint.reference_attribute.id == name), None)
|
||||||
|
|
||||||
|
def unbundled_items(self) -> list:
|
||||||
|
return [item for item in self.items if item.passage_id == None]
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class Target(BaseModel):
|
class Target(BaseModel):
|
||||||
theta: float
|
theta: float
|
||||||
value: float
|
value: float
|
||||||
result: Optional[float]
|
result: Optional[float]
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
class Base:
|
class Base:
|
||||||
def __init__(self, source, ingest_type='message'):
|
|
||||||
self.ingest_type = ingest_type
|
def __init__(self, source, ingest_type='message'):
|
||||||
self.source = source
|
self.ingest_type = ingest_type
|
||||||
|
self.source = source
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import os, json, random, io, logging
|
import os, json, random, io, logging
|
||||||
|
|
||||||
from pulp import LpProblem, LpVariable, LpMinimize, LpStatus, lpSum
|
from pulp import LpProblem, LpVariable, LpMinimize, LpMaximize, LpStatus, lpSum
|
||||||
|
|
||||||
from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper
|
from helpers import aws_helper, tar_helper, csv_helper, service_helper, solver_helper
|
||||||
from lib.errors.item_generation_error import ItemGenerationError
|
from lib.errors.item_generation_error import ItemGenerationError
|
||||||
@ -12,134 +12,233 @@ from models.item import Item
|
|||||||
|
|
||||||
from services.base import Base
|
from services.base import Base
|
||||||
|
|
||||||
|
|
||||||
class LoftService(Base):
|
class LoftService(Base):
|
||||||
def process(self):
|
|
||||||
try:
|
|
||||||
self.solver_run = self.create_solver_run_from_attributes()
|
|
||||||
self.solver_run.generate_bundles()
|
|
||||||
self.solution = self.generate_solution()
|
|
||||||
self.result = self.stream_to_s3_bucket()
|
|
||||||
except ItemGenerationError as error:
|
|
||||||
self.result = self.stream_to_s3_bucket(error)
|
|
||||||
except TypeError as error:
|
|
||||||
logging.error(error)
|
|
||||||
self.result = self.stream_to_s3_bucket(ItemGenerationError("Provided params causing error in calculation results"))
|
|
||||||
|
|
||||||
def create_solver_run_from_attributes(self) -> SolverRun:
|
def process(self):
|
||||||
logging.info('Retrieving attributes from message...')
|
try:
|
||||||
# get s3 object
|
self.solver_run = self.create_solver_run_from_attributes()
|
||||||
self.key = aws_helper.get_key_from_message(self.source)
|
self.solver_run.generate_bundles()
|
||||||
s3_object = aws_helper.get_object(self.key, aws_helper.get_bucket_from_message(self.source))
|
self.solution = self.generate_solution()
|
||||||
|
# self.solution = self.generate_test_solution()
|
||||||
|
self.result = self.stream_to_s3_bucket()
|
||||||
|
except ItemGenerationError as error:
|
||||||
|
self.result = self.stream_to_s3_bucket(error)
|
||||||
|
except TypeError as error:
|
||||||
|
logging.error(error)
|
||||||
|
self.result = self.stream_to_s3_bucket(
|
||||||
|
ItemGenerationError(
|
||||||
|
"Provided params causing error in calculation results"))
|
||||||
|
|
||||||
# convert to tar
|
def generate_test_solution(self) -> Solution:
|
||||||
self.tar = tar_helper.raw_to_tar(s3_object)
|
solution = Solution(response_id=random.randint(100, 5000), forms=[])
|
||||||
|
|
||||||
# get attributes file and convert to dict
|
problem = LpProblem("ata-form-generate-with-bundles", LpMinimize)
|
||||||
attributes = json.loads(tar_helper.extract_file_from_tar(self.tar , 'solver_run_attributes.json').read())
|
|
||||||
|
|
||||||
# create solver run
|
bundles = LpVariable.dicts(
|
||||||
solver_run = SolverRun.parse_obj(attributes)
|
"Bundle", [bundle.id for bundle in self.solver_run.bundles],
|
||||||
|
lowBound=1,
|
||||||
|
upBound=1,
|
||||||
|
cat='Binary')
|
||||||
|
items = LpVariable.dicts("Item",
|
||||||
|
[item.id for item in self.solver_run.items],
|
||||||
|
lowBound=1,
|
||||||
|
upBound=1,
|
||||||
|
cat='Binary')
|
||||||
|
|
||||||
# get items file and convert to dict
|
problem += lpSum(
|
||||||
items_csv = tar_helper.extract_file_from_tar(self.tar , 'items.csv')
|
[bundles[bundle.id] for bundle in self.solver_run.bundles])
|
||||||
items_csv_reader = csv_helper.file_stream_reader(items_csv)
|
# problem += lpSum([items[item.id] for item in self.solver_run.items])
|
||||||
|
|
||||||
# add items to solver run
|
# problem += lpSum([bundles[bundle.id] for bundle in self.solver_run.bundles]) <= 3, 'max total bundles used'
|
||||||
for item in service_helper.items_csv_to_dict(items_csv_reader, solver_run):
|
# problem += lpSum([bundles[bundle.id] for bundle in self.solver_run.bundles]) >= 1, 'min total bundles used'
|
||||||
solver_run.items.append(Item.parse_obj(item))
|
problem += lpSum(
|
||||||
|
[bundles[bundle.id] for bundle in self.solver_run.bundles]) == 3
|
||||||
|
|
||||||
logging.info('Processed Attributes...')
|
problem += lpSum(
|
||||||
|
[
|
||||||
|
bundle.count * bundles[bundle.id]
|
||||||
|
for bundle in self.solver_run.bundles
|
||||||
|
] +
|
||||||
|
[1 * items[item.id] for item in self.solver_run.unbundled_items()]
|
||||||
|
) == self.solver_run.total_form_items, 'Total bundle form items for form'
|
||||||
|
|
||||||
return solver_run
|
problem.solve()
|
||||||
|
|
||||||
def generate_solution(self) -> Solution:
|
# for v in problem.variables():
|
||||||
logging.info('Generating Solution...')
|
# print(f'{v.name}: {v.varValue}')
|
||||||
|
|
||||||
# unsolved solution
|
# add return items and create as a form
|
||||||
solution = Solution(
|
form_items = service_helper.solution_items(problem.variables(),
|
||||||
response_id=random.randint(100, 5000),
|
self.solver_run)
|
||||||
forms=[]
|
|
||||||
)
|
|
||||||
|
|
||||||
# counter for number of forms
|
# add form to solution
|
||||||
f = 0
|
solution.forms.append(
|
||||||
|
Form.create(form_items, self.solver_run, LpStatus[problem.status]))
|
||||||
|
logging.info('Form generated and added to solution...')
|
||||||
|
|
||||||
# iterate for number of forms that require creation
|
return solution
|
||||||
# currently creates distinc forms with no item overlap
|
|
||||||
while f < self.solver_run.total_forms:
|
|
||||||
# setup vars
|
|
||||||
items = LpVariable.dicts(
|
|
||||||
"Item", [item.id for item in self.solver_run.items], lowBound=1, upBound=1, cat='Binary')
|
|
||||||
# bundles = LpVariable.dicts(
|
|
||||||
# "Bundle", [bundle.id for bundle in self.solver_run.bundles], lowBound=1, upBound=1, cat='Binary')
|
|
||||||
|
|
||||||
problem_objection_functions = []
|
def create_solver_run_from_attributes(self) -> SolverRun:
|
||||||
|
logging.info('Retrieving attributes from message...')
|
||||||
|
# get s3 object
|
||||||
|
self.key = aws_helper.get_key_from_message(self.source)
|
||||||
|
s3_object = aws_helper.get_object(
|
||||||
|
self.key, aws_helper.get_bucket_from_message(self.source))
|
||||||
|
|
||||||
# create problem
|
# convert to tar
|
||||||
problem = LpProblem("ata-form-generate", LpMinimize)
|
self.tar = tar_helper.raw_to_tar(s3_object)
|
||||||
|
|
||||||
# dummy objective function, because it just makes things easier™
|
# get attributes file and convert to dict
|
||||||
# problem += lpSum([items[item.id]
|
attributes = json.loads(
|
||||||
# for item in self.solver_run.items])
|
tar_helper.extract_file_from_tar(
|
||||||
|
self.tar, 'solver_run_attributes.json').read())
|
||||||
|
|
||||||
# constraints
|
# create solver run
|
||||||
problem += lpSum([items[item.id]
|
solver_run = SolverRun.parse_obj(attributes)
|
||||||
for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items'
|
|
||||||
|
|
||||||
# dynamic constraints
|
# get items file and convert to dict
|
||||||
problem = solver_helper.build_constraints(self.solver_run, problem, items)
|
items_csv = tar_helper.extract_file_from_tar(self.tar, 'items.csv')
|
||||||
|
items_csv_reader = csv_helper.file_stream_reader(items_csv)
|
||||||
|
|
||||||
# multi-objective constraints
|
# add items to solver run
|
||||||
logging.info('Creating TIF and TCC constraints')
|
for item in service_helper.items_csv_to_dict(items_csv_reader,
|
||||||
for target in self.solver_run.objective_function.tif_targets:
|
solver_run):
|
||||||
tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
|
solver_run.items.append(Item.parse_obj(item))
|
||||||
for item in self.solver_run.items])
|
|
||||||
problem_objection_functions.append(tif)
|
|
||||||
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
|
|
||||||
for item in self.solver_run.items]) >= target.value - 8, f'max tif theta ({target.theta}) target value {target.value}'
|
|
||||||
problem += lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
|
|
||||||
for item in self.solver_run.items]) <= target.value + 8, f'min tif theta ({target.theta}) target value {target.value}'
|
|
||||||
|
|
||||||
for target in self.solver_run.objective_function.tcc_targets:
|
logging.info('Processed Attributes...')
|
||||||
tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
|
|
||||||
for item in self.solver_run.items])
|
|
||||||
problem_objection_functions.append(tcc)
|
|
||||||
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
|
|
||||||
for item in self.solver_run.items]) >= target.value - 20, f'max tcc theta ({target.theta}) target value {target.value}'
|
|
||||||
problem += lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
|
|
||||||
for item in self.solver_run.items]) <= target.value + 20, f'min tcc theta ({target.theta}) target value {target.value}'
|
|
||||||
|
|
||||||
# solve problem
|
return solver_run
|
||||||
logging.info('Solving...')
|
|
||||||
# problem.solve()
|
|
||||||
problem.sequentialSolve(problem_objection_functions)
|
|
||||||
logging.info('Solved...generating form and adding to solution')
|
|
||||||
|
|
||||||
# add return items and create as a form
|
def generate_solution(self) -> Solution:
|
||||||
form_items = service_helper.solution_items(problem.variables(), self.solver_run)
|
logging.info('Generating Solution...')
|
||||||
|
|
||||||
# add form to solution
|
# unsolved solution
|
||||||
solution.forms.append(Form.create(form_items, self.solver_run, LpStatus[problem.status]))
|
solution = Solution(response_id=random.randint(100, 5000), forms=[])
|
||||||
logging.info('Form generated and added to solution...')
|
|
||||||
|
|
||||||
# successfull form, increment
|
# counter for number of forms
|
||||||
f += 1
|
f = 0
|
||||||
|
|
||||||
logging.info('Solution Generated.')
|
# iterate for number of forms that require creation
|
||||||
return solution
|
# currently creates distinc forms with no item overlap
|
||||||
|
while f < self.solver_run.total_forms:
|
||||||
|
# setup vars
|
||||||
|
items = LpVariable.dicts(
|
||||||
|
"Item", [item.id for item in self.solver_run.items],
|
||||||
|
lowBound=1,
|
||||||
|
upBound=1,
|
||||||
|
cat='Binary')
|
||||||
|
bundles = LpVariable.dicts(
|
||||||
|
"Bundle", [bundle.id for bundle in self.solver_run.bundles],
|
||||||
|
lowBound=1,
|
||||||
|
upBound=1,
|
||||||
|
cat='Binary')
|
||||||
|
|
||||||
def stream_to_s3_bucket(self, error = None):
|
# problem_objection_functions = []
|
||||||
self.file_name = f'{service_helper.key_to_uuid(self.key)}.csv'
|
|
||||||
solution_file = None
|
|
||||||
# setup writer buffer and write processed forms to file
|
|
||||||
buffer = io.StringIO()
|
|
||||||
|
|
||||||
if error:
|
# create problem
|
||||||
logging.info('Streaming %s error response to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET'])
|
problem = LpProblem("ata-form-generate", LpMinimize)
|
||||||
solution_file = service_helper.error_to_file(buffer, error)
|
|
||||||
else:
|
|
||||||
logging.info('Streaming %s to s3 bucket - %s', self.file_name, os.environ['S3_PROCESSED_BUCKET'])
|
|
||||||
solution_file = service_helper.solution_to_file(buffer, self.solver_run.total_form_items, self.solution.forms)
|
|
||||||
|
|
||||||
# upload generated file to s3 and return result
|
# dummy objective function, because it just makes things easier™
|
||||||
return aws_helper.file_stream_upload(solution_file, self.file_name, os.environ['S3_PROCESSED_BUCKET'])
|
problem += lpSum(
|
||||||
|
[items[item.id] for item in self.solver_run.items])
|
||||||
|
|
||||||
|
# constraints
|
||||||
|
# problem += lpSum([items[item.id]
|
||||||
|
# for item in self.solver_run.items]) == self.solver_run.total_form_items, 'Total form items'
|
||||||
|
problem += lpSum(
|
||||||
|
[
|
||||||
|
bundle.count * bundles[bundle.id]
|
||||||
|
for bundle in self.solver_run.bundles
|
||||||
|
] + [
|
||||||
|
1 * items[item.id]
|
||||||
|
for item in self.solver_run.unbundled_items()
|
||||||
|
]
|
||||||
|
) == self.solver_run.total_form_items, 'Total bundle form items for form'
|
||||||
|
|
||||||
|
# dynamic constraints
|
||||||
|
problem = solver_helper.build_constraints(self.solver_run, problem,
|
||||||
|
items, bundles)
|
||||||
|
|
||||||
|
# multi-objective constraints
|
||||||
|
logging.info('Creating TIF and TCC constraints')
|
||||||
|
for target in self.solver_run.objective_function.tif_targets:
|
||||||
|
|
||||||
|
# tif = lpSum([item.iif(self.solver_run, target.theta)*items[item.id]
|
||||||
|
# for item in self.solver_run.items])
|
||||||
|
# problem_objection_functions.append(tif)
|
||||||
|
problem += lpSum([
|
||||||
|
bundle.tif(self.solver_run.irt_model, target.theta) *
|
||||||
|
bundles[bundle.id] for bundle in self.solver_run.bundles
|
||||||
|
] + [
|
||||||
|
item.iif(self.solver_run, target.theta) * items[item.id]
|
||||||
|
for item in self.solver_run.items
|
||||||
|
]) >= target.value - 8, f'max tif theta ({target.theta}) target value {target.value}'
|
||||||
|
problem += lpSum([
|
||||||
|
bundle.tif(self.solver_run.irt_model, target.theta) *
|
||||||
|
bundles[bundle.id] for bundle in self.solver_run.bundles
|
||||||
|
] + [
|
||||||
|
item.iif(self.solver_run, target.theta) * items[item.id]
|
||||||
|
for item in self.solver_run.items
|
||||||
|
]) <= target.value + 8, f'min tif theta ({target.theta}) target value {target.value}'
|
||||||
|
|
||||||
|
for target in self.solver_run.objective_function.tcc_targets:
|
||||||
|
# tcc = lpSum([item.irf(self.solver_run, target.theta)*items[item.id]
|
||||||
|
# for item in self.solver_run.items])
|
||||||
|
# problem_objection_functions.append(tcc)
|
||||||
|
problem += lpSum([
|
||||||
|
bundle.trf(self.solver_run.irt_model, target.theta) *
|
||||||
|
bundles[bundle.id] for bundle in self.solver_run.bundles
|
||||||
|
] + [
|
||||||
|
item.irf(self.solver_run, target.theta) * items[item.id]
|
||||||
|
for item in self.solver_run.items
|
||||||
|
]) >= target.value - 20, f'max tcc theta ({target.theta}) target value {target.value}'
|
||||||
|
problem += lpSum([
|
||||||
|
bundle.trf(self.solver_run.irt_model, target.theta) *
|
||||||
|
bundles[bundle.id] for bundle in self.solver_run.bundles
|
||||||
|
] + [
|
||||||
|
item.irf(self.solver_run, target.theta) * items[item.id]
|
||||||
|
for item in self.solver_run.items
|
||||||
|
]) <= target.value + 20, f'min tcc theta ({target.theta}) target value {target.value}'
|
||||||
|
|
||||||
|
# solve problem
|
||||||
|
logging.info('Solving...')
|
||||||
|
problem.solve()
|
||||||
|
# problem.sequentialSolve(problem_objection_functions)
|
||||||
|
logging.info('Solved...generating form and adding to solution')
|
||||||
|
|
||||||
|
# add return items and create as a form
|
||||||
|
form_items = service_helper.solution_items(problem.variables(),
|
||||||
|
self.solver_run)
|
||||||
|
|
||||||
|
# add form to solution
|
||||||
|
solution.forms.append(
|
||||||
|
Form.create(form_items, self.solver_run,
|
||||||
|
LpStatus[problem.status]))
|
||||||
|
logging.info('Form generated and added to solution...')
|
||||||
|
|
||||||
|
# successfull form, increment
|
||||||
|
f += 1
|
||||||
|
|
||||||
|
logging.info('Solution Generated.')
|
||||||
|
return solution
|
||||||
|
|
||||||
|
def stream_to_s3_bucket(self, error=None):
|
||||||
|
self.file_name = f'{service_helper.key_to_uuid(self.key)}.csv'
|
||||||
|
solution_file = None
|
||||||
|
# setup writer buffer and write processed forms to file
|
||||||
|
buffer = io.StringIO()
|
||||||
|
|
||||||
|
if error:
|
||||||
|
logging.info('Streaming %s error response to s3 bucket - %s',
|
||||||
|
self.file_name, os.environ['S3_PROCESSED_BUCKET'])
|
||||||
|
solution_file = service_helper.error_to_file(buffer, error)
|
||||||
|
else:
|
||||||
|
logging.info('Streaming %s to s3 bucket - %s', self.file_name,
|
||||||
|
os.environ['S3_PROCESSED_BUCKET'])
|
||||||
|
solution_file = service_helper.solution_to_file(
|
||||||
|
buffer, self.solver_run.total_form_items, self.solution.forms)
|
||||||
|
|
||||||
|
# upload generated file to s3 and return result
|
||||||
|
return aws_helper.file_stream_upload(solution_file, self.file_name,
|
||||||
|
os.environ['S3_PROCESSED_BUCKET'])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user