diff --git a/.docker-compose/Dockerfile b/.docker-compose/Dockerfile index 593525a..224b14f 100644 --- a/.docker-compose/Dockerfile +++ b/.docker-compose/Dockerfile @@ -10,6 +10,7 @@ RUN python -m pip install -U pytest RUN python -m pip install pytest-cov RUN python -m pip install pytest-stub RUN python -m pip install pytest-mock +RUN python -m pip install girth --upgrade RUN mkdir /app WORKDIR /app diff --git a/Dockerfile b/Dockerfile index 4c2bfb4..cb104f2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,7 @@ RUN python -m pip install -U pytest RUN python -m pip install pytest-cov RUN python -m pip install pytest-stub RUN python -m pip install pytest-mock +RUN python -m pip install girth --upgrade RUN mkdir /app WORKDIR /app diff --git a/app/helpers/common_helper.py b/app/helpers/common_helper.py index d74ec0e..6eea8f3 100644 --- a/app/helpers/common_helper.py +++ b/app/helpers/common_helper.py @@ -1,3 +1,4 @@ +import re def boolean_to_int(value: bool) -> int: if value: @@ -5,10 +6,12 @@ def boolean_to_int(value: bool) -> int: else: return 0 - def is_float(element: str) -> bool: try: float(element) return True except ValueError: return False + +def camel_to_snake(camel_string: str) -> str: + return re.sub(r'(? float: + return 0.0 diff --git a/app/lib/irt/models/rasch.py b/app/lib/irt/models/rasch.py new file mode 100644 index 0000000..46ed222 --- /dev/null +++ b/app/lib/irt/models/rasch.py @@ -0,0 +1,26 @@ +from girth import ability_mle + +from lib.irt.models.base import * + +class Rasch(Base): + + def result(self): + return 0.0 + + @classmethod + def ability_estimate(self, items) -> float: + # responses are mapped into a matrix, where each row and item + # and each column is an exam form result + # we'll likely have to change this to something more robust + # when we get into more complex response types + responses = np.array([[int(item.response)] for item in items]) + # the difficulty (b param) for each item is in an ordered list + difficulty = np.array([item.b_param for item in items]) + # the package currently utilizes a fixed a param (discrimination) + discrimination = np.linspace(1, 1, len(difficulty)) + # there are many methodologies to calculate ability from a data set of responses + # this is what our client currently uses but we should expand this to allow for + # switching between methodologies when needed + # it also currrently only does a single ability estimation + # at some point we can also accommodate batch ability estimates if need be + return ability_mle(responses, difficulty, discrimination).tolist()[0] diff --git a/app/lib/irt/models/three_parameter_logistic.py b/app/lib/irt/models/three_parameter_logistic.py index 5fd4956..f8226e7 100644 --- a/app/lib/irt/models/three_parameter_logistic.py +++ b/app/lib/irt/models/three_parameter_logistic.py @@ -1,11 +1,6 @@ -class ThreeParameterLogistic: +from lib.irt.models.base import * - def __init__(self, model_params, kwargs): - self.model_params = model_params - # check if exists, if not error out - self.b_param = kwargs['b_param'] - self.e = 2.71828 - self.theta = kwargs['theta'] +class ThreeParameterLogistic(Base): # contains the primary 3pl function, determining the probably of an inidividual # that an individual at a certain theta would get a particular question correct @@ -14,5 +9,5 @@ class ThreeParameterLogistic: def result(self): a = self.model_params.a_param c = self.model_params.c_param - return c + (1 - c) * (1 / (1 + self.e**(-a * + return c + (1 - c) * (1 / (1 + self.e(-a * (self.theta - self.b_param)))) diff --git a/app/main.py b/app/main.py index ff2cc6f..44b0da9 100644 --- a/app/main.py +++ b/app/main.py @@ -43,7 +43,7 @@ class ServiceListener(Consumer): logging.error(f'action of type {action} does not exist.') def main(): - logging.info('Starting IRT Service: That Was Rash (v1.4.0)...') + logging.info('Starting IRT Service: That Was Rasch (v1.5.0)...') # ToDo: Figure out a much better way of doing this. # LocalStack wants 'endpoint_url', while prod doesnt :( diff --git a/app/models/ability_estimation.py b/app/models/ability_estimation.py new file mode 100644 index 0000000..97244ab --- /dev/null +++ b/app/models/ability_estimation.py @@ -0,0 +1,27 @@ +from pydantic import BaseModel +from typing import ClassVar, List + +from models.item import Item +from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic +from lib.irt.models.rasch import Rasch + +class AbilityEstimation(BaseModel): + exam_id: int + items: List[Item] = [] + irt_model: str + min_theta: float = -3.0 + max_theta: float = 3.0 + + IRT_MODELS: ClassVar[dict] = { + 'rasch': Rasch, + # not supported + # '3pl': ThreeParameterLogistic + } + + def calculate(self) -> float: + if self.irt_model in self.IRT_MODELS: + model = self.IRT_MODELS[self.irt_model] + return model.ability_estimate(self.items) + else: + logging.error(f'model of type {self.irt_model} does not exist.') + return None diff --git a/app/models/item.py b/app/models/item.py index 96ac372..dba3b3e 100644 --- a/app/models/item.py +++ b/app/models/item.py @@ -11,8 +11,9 @@ class Item(BaseModel): position: Optional[int] = None passage_id: Optional[int] = None workflow_state: Optional[str] = None - attributes: List[Attribute] + attributes: List[Attribute] = None b_param: float = 0.00 + response: Optional[str] = None def iif(self, solver_run, theta): return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param, theta=theta) diff --git a/app/services/ability_estimation_service.py b/app/services/ability_estimation_service.py index 763c91c..2f6b5f7 100644 --- a/app/services/ability_estimation_service.py +++ b/app/services/ability_estimation_service.py @@ -1,9 +1,31 @@ -import logging +import json, io from services.base import Base +from helpers import aws_helper +from models.ability_estimation import AbilityEstimation +from lib.application_configs import ApplicationConfigs class AbilityEstimationService(Base): ACTION = 'abilityEstimation' def process(self): - logging.info('Ability Estimation Service to be implemented...') + attributes = self.service_attributes() + ability_estimation = AbilityEstimation.parse_obj(attributes) + result = ability_estimation.calculate() + + if result is not None: + response = json.dumps({ + 'status': 'success', + 'result': result + }) + + self.file_name = f'{ability_estimation.exam_id}_ability_estimation_result.json' + else: + response = json.dumps({ + 'status': 'error', + 'result': None + }) + + aws_helper.file_stream_upload( + io.BytesIO(bytes(response.encode('UTF-8'))), self.file_name, + ApplicationConfigs.s3_processed_bucket, self.ACTION) diff --git a/app/services/base.py b/app/services/base.py index eb7a248..21b4159 100644 --- a/app/services/base.py +++ b/app/services/base.py @@ -1,5 +1,26 @@ +import logging, json + +from helpers import aws_helper, tar_helper, common_helper + class Base: def __init__(self, source, ingest_type='message'): self.ingest_type = ingest_type self.source = source + + def service_attributes(self): + logging.info('Retrieving attributes from message...') + # get s3 object + self.key = aws_helper.get_key_from_message(self.source) + s3_object = aws_helper.get_object( + self.key, aws_helper.get_bucket_from_message(self.source)) + + # convert to tar + self.tar = tar_helper.raw_to_tar(s3_object) + + # get attributes file and convert to dict + attributes = json.loads( + tar_helper.extract_file_from_tar( + self.tar, f'{common_helper.camel_to_snake(self.ACTION)}_attributes.json').read()) + + return attributes