the big format

This commit is contained in:
Joshua Burman
2022-02-10 20:29:50 -05:00
parent 19a37ab33a
commit deb6b9014e
25 changed files with 682 additions and 466 deletions

View File

@ -3,37 +3,34 @@ import os
import json
session = boto3.Session(
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']
)
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
s3 = session.resource('s3', region_name=os.environ['AWS_REGION'])
sqs = session.client('sqs', region_name=os.environ['AWS_REGION'])
def get_key_from_message(body):
return body['Records'][0]['s3']['object']['key']
return body['Records'][0]['s3']['object']['key']
def get_bucket_from_message(body):
return body['Records'][0]['s3']['bucket']['name']
return body['Records'][0]['s3']['bucket']['name']
def get_object(key, bucket):
return s3.Object(
bucket_name=bucket,
key=key
).get()['Body'].read()
return s3.Object(bucket_name=bucket, key=key).get()['Body'].read()
def file_stream_upload(buffer, name, bucket):
return s3.Bucket(bucket).upload_fileobj(buffer, name)
return s3.Bucket(bucket).upload_fileobj(buffer, name)
def receive_message(queue, message_num=1, wait_time=1):
return sqs.receive_message(
QueueUrl=queue,
MaxNumberOfMessages=message_num,
WaitTimeSeconds=wait_time
)
return sqs.receive_message(QueueUrl=queue,
MaxNumberOfMessages=message_num,
WaitTimeSeconds=wait_time)
def delete_message(queue, receipt):
return sqs.delete_message(
QueueUrl=queue,
ReceiptHandle=receipt
)
return sqs.delete_message(QueueUrl=queue, ReceiptHandle=receipt)

View File

@ -1,5 +1,6 @@
import csv
import io
def file_stream_reader(f):
return csv.reader(io.StringIO(f.read().decode('ascii')))
return csv.reader(io.StringIO(f.read().decode('ascii')))

View File

@ -3,98 +3,120 @@ import io
import re
from tokenize import String
def items_csv_to_dict(items_csv_reader, solver_run):
items = []
headers = []
items = []
headers = []
# get headers and items
for key, row in enumerate(items_csv_reader):
if key == 0:
headers = row
else:
item = { 'attributes': [] }
# get headers and items
for key, row in enumerate(items_csv_reader):
if key == 0:
headers = row
else:
item = {'attributes': []}
# ensure that the b param is formatted correctly
if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]):
for key, col in enumerate(headers):
if solver_run.irt_model.formatted_b_param() == col:
value = float(row[key])
item['b_param'] = value
elif solver_run.get_constraint(col) and solver_run.get_constraint(col).reference_attribute.type == 'bundle':
if row[key]:
item[solver_run.get_constraint(col).reference_attribute.id] = row[key]
elif solver_run.get_constraint(col):
constraint = solver_run.get_constraint(col)
item['attributes'].append({
'id': col,
'value': row[key],
'type': constraint.reference_attribute.type
})
else:
if row[key]:
item[col] = row[key]
# ensure that the b param is formatted correctly
if row[len(headers) - 1] != '' and is_float(row[len(headers) - 1]):
for key, col in enumerate(headers):
if solver_run.irt_model.formatted_b_param() == col:
value = float(row[key])
item['b_param'] = value
elif solver_run.get_constraint(
col) and solver_run.get_constraint(
col).reference_attribute.type == 'bundle':
if row[key]:
item[solver_run.get_constraint(
col).reference_attribute.id] = row[key]
elif solver_run.get_constraint(col):
constraint = solver_run.get_constraint(col)
item['attributes'].append({
'id':
col,
'value':
row[key],
'type':
constraint.reference_attribute.type
})
else:
if row[key]:
item[col] = row[key]
items.append(item)
items.append(item)
return items
return items
def solution_to_file(buffer, total_form_items, forms):
wr = csv.writer(buffer, dialect='excel', delimiter=',')
wr = csv.writer(buffer, dialect='excel', delimiter=',')
# write header row for first row utilizing the total items all forms will have
# fill the rows with the targets and cut score then the items
header = ['status']
# write header row for first row utilizing the total items all forms will have
# fill the rows with the targets and cut score then the items
header = ['status']
for result in forms[0].tif_results:
header += [f'tif @ {round(result.theta, 2)}']
for result in forms[0].tif_results:
header += [f'tif @ {round(result.theta, 2)}']
for result in forms[0].tcc_results:
header += [f'tcc @ {round(result.theta, 2)}']
for result in forms[0].tcc_results:
header += [f'tcc @ {round(result.theta, 2)}']
header += ['cut score'] + [x + 1 for x in range(total_form_items)]
wr.writerow(header)
header += ['cut score'] + [x + 1 for x in range(total_form_items)]
wr.writerow(header)
# add each form as row to processed csv
for form in forms:
row = [form.status]
# add each form as row to processed csv
for form in forms:
row = [form.status]
for result in form.tif_results + form.tcc_results:
row += [f'target - {result.value}\nresult - {round(result.result, 2)}']
for result in form.tif_results + form.tcc_results:
row += [
f'target - {result.value}\nresult - {round(result.result, 2)}'
]
# provide generated items and cut score
row += [round(form.cut_score, 2)] + [item.id for item in form.items]
wr.writerow(row)
# provide generated items and cut score
row += [round(form.cut_score, 2)] + [item.id for item in form.items]
wr.writerow(row)
buff2 = io.BytesIO(buffer.getvalue().encode())
buff2 = io.BytesIO(buffer.getvalue().encode())
return buff2
return buff2
def error_to_file(buffer, error):
wr = csv.writer(buffer, dialect='excel', delimiter=',')
wr.writerow(['status'])
wr.writerow([error.args[0]])
wr = csv.writer(buffer, dialect='excel', delimiter=',')
wr.writerow(['status'])
wr.writerow([error.args[0]])
return io.BytesIO(buffer.getvalue().encode())
return io.BytesIO(buffer.getvalue().encode())
def key_to_uuid(key):
return re.split("_", key)[0]
return re.split("_", key)[0]
def solution_items(variables, solver_run):
form_items = []
form_items = []
for v in variables:
if v.varValue > 0 and 'Item_' in v.name:
item_id = v.name.replace('Item_', '')
item = solver_run.get_item(item_id)
# add item to list and then remove from master item list
form_items.append(item)
for v in variables:
if v.varValue > 0:
if 'Item_' in v.name:
item_id = v.name.replace('Item_', '')
item = solver_run.get_item(item_id)
# add item to list and then remove from master item list
if item: form_items.append(item)
elif 'Bundle_' in v.name:
bundle_id = v.name.replace('Bundle_', '')
bundle = solver_run.get_bundle(bundle_id)
if bundle:
for item in bundle.items:
if item: form_items.append(item)
return form_items
return form_items
# probably a better place for this...
def is_float(element: String) -> bool:
try:
float(element)
return True
except ValueError:
return False
try:
float(element)
return True
except ValueError:
return False

View File

@ -9,75 +9,95 @@ from models.item import Item
from lib.errors.item_generation_error import ItemGenerationError
def build_constraints(solver_run: SolverRun, problem: LpProblem, items: list[Item]) -> LpProblem:
logging.info('Creating Constraints...')
try:
total_form_items = solver_run.total_form_items
constraints = solver_run.constraints
def build_constraints(solver_run: SolverRun, problem: LpProblem,
items: list[Item], bundles: list[Bundle]) -> LpProblem:
logging.info('Creating Constraints...')
for constraint in constraints:
attribute = constraint.reference_attribute
min = constraint.minimum
max = constraint.maximum
try:
total_form_items = solver_run.total_form_items
constraints = solver_run.constraints
if attribute.type == 'metadata':
logging.info('Metadata Constraint Generating...')
con = dict(zip([item.id for item in solver_run.items],
[item.attribute_exists(attribute)
for item in solver_run.items]))
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) >= round(total_form_items * (min / 100)), f'{attribute.id} - {attribute.value} - min'
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) <= round(total_form_items * (max / 100)), f'{attribute.id} - {attribute.value} - max'
elif attribute.type == 'bundle':
logging.info('Bundles Constraint Generating...')
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
if solver_run.bundles != None:
total_bundle_items = 0
selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
for constraint in constraints:
attribute = constraint.reference_attribute
min = constraint.minimum
max = constraint.maximum
for bundle in selected_bundles:
con = dict(zip([item.id for item in solver_run.items],
[(getattr(item, bundle.type, False) == bundle.id)
for item in solver_run.items]))
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
total_bundle_items += bundle.count
if attribute.type == 'metadata':
logging.info('Metadata Constraint Generating...')
con = dict(
zip([item.id for item in solver_run.items], [
item.attribute_exists(attribute)
for item in solver_run.items
]))
problem += lpSum([
con[item.id] * items[item.id] for item in solver_run.items
]) >= round(
total_form_items *
(min / 100)), f'{attribute.id} - {attribute.value} - min'
problem += lpSum([
con[item.id] * items[item.id] for item in solver_run.items
]) <= round(
total_form_items *
(max / 100)), f'{attribute.id} - {attribute.value} - max'
elif attribute.type == 'bundle':
logging.info('Bundles Constraint Generating...')
# TODO: account for many different bundle types, since the id condition in L33 could yield duplicates
if solver_run.bundles != None:
# make sure the total bundles used in generated form is limited between min-max set
problem += lpSum([
bundles[bundle.id] for bundle in solver_run.bundles
]) == randint(int(constraint.minimum),
int(constraint.maximum))
# total_bundle_items = 0
# selected_bundles = get_random_bundles(solver_run.total_form_items, solver_run.bundles, int(constraint.minimum), int(constraint.maximum))
# make sure all other items added to the form
# are not a part of any bundle
# currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
con = dict(zip([item.id for item in solver_run.items],
[(getattr(item, attribute.id, None) == None)
for item in solver_run.items]))
problem += lpSum([con[item.id]
* items[item.id]
for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
# for bundle in selected_bundles:
# con = dict(zip([item.id for item in solver_run.items],
# [(getattr(item, bundle.type, False) == bundle.id)
# for item in solver_run.items]))
# problem += lpSum([con[item.id]
# * items[item.id]
# for item in solver_run.items]) == bundle.count, f'Bundle constraint for {bundle.type} ({bundle.id})'
# total_bundle_items += bundle.count
logging.info('Constraints Created...')
return problem
except ValueError as error:
logging.error(error)
raise ItemGenerationError("Bundle min and/or max larger than bundle amount provided", error.args[0])
# # make sure all other items added to the form
# # are not a part of any bundle
# # currently only supports single bundle constraints, will need refactoring for multiple bundle constraints
# con = dict(zip([item.id for item in solver_run.items],
# [(getattr(item, attribute.id, None) == None)
# for item in solver_run.items]))
# problem += lpSum([con[item.id]
# * items[item.id]
# for item in solver_run.items]) == solver_run.total_form_items - total_bundle_items, f'Remaining items are not of a bundle type'
def get_random_bundles(total_form_items: int, bundles: list[Bundle], min: int , max: int, found_bundles = False) -> list[Bundle]:
selected_bundles = None
total_bundle_items = 0
total_bundles = randint(min, max)
logging.info(f'Selecting Bundles (total of {total_bundles})...')
logging.info('Constraints Created...')
return problem
except ValueError as error:
logging.error(error)
raise ItemGenerationError(
"Bundle min and/or max larger than bundle amount provided",
error.args[0])
while found_bundles == False:
selected_bundles = sample(bundles, total_bundles)
total_bundle_items = sum(bundle.count for bundle in selected_bundles)
if total_bundle_items <= total_form_items:
found_bundles = True
def get_random_bundles(total_form_items: int,
bundles: list[Bundle],
min: int,
max: int,
found_bundles=False) -> list[Bundle]:
selected_bundles = None
total_bundle_items = 0
total_bundles = randint(min, max)
logging.info(f'Selecting Bundles (total of {total_bundles})...')
if found_bundles == True:
return selected_bundles
else:
return get_random_bundles(total_form_items, total_bundles - 1, bundles)
while found_bundles == False:
selected_bundles = sample(bundles, total_bundles)
total_bundle_items = sum(bundle.count for bundle in selected_bundles)
if total_bundle_items <= total_form_items:
found_bundles = True
if found_bundles == True:
return selected_bundles
else:
return get_random_bundles(total_form_items, total_bundles - 1, bundles)

View File

@ -1,9 +1,11 @@
import io
import tarfile
def raw_to_tar(raw_object):
tarball = io.BytesIO(raw_object)
return tarfile.open(fileobj=tarball, mode='r:gz')
tarball = io.BytesIO(raw_object)
return tarfile.open(fileobj=tarball, mode='r:gz')
def extract_file_from_tar(tar, file_name):
return tar.extractfile(tar.getmember(file_name))
return tar.extractfile(tar.getmember(file_name))