Merge pull request #36 from yardstick/feature/QUANT-2993

2023-09-25 15:39:45 -04:00
parent e218ce4053 375db5f1c2
commit 939fc9bae9
11 changed files with 124 additions and 13 deletions
--- a/.docker-compose/Dockerfile
+++ b/.docker-compose/Dockerfile
@ -10,6 +10,7 @@ RUN python -m pip install -U pytest
 RUN python -m pip install pytest-cov
 RUN python -m pip install pytest-stub
 RUN python -m pip install pytest-mock
+RUN python -m pip install girth --upgrade

 RUN mkdir /app
 WORKDIR /app
--- a/1
+++ b/1
@ -10,6 +10,7 @@ RUN python -m pip install -U pytest
 RUN python -m pip install pytest-cov
 RUN python -m pip install pytest-stub
 RUN python -m pip install pytest-mock
+RUN python -m pip install girth --upgrade

 RUN mkdir /app
 WORKDIR /app
--- a/app/helpers/common_helper.py
+++ b/app/helpers/common_helper.py
@ -1,3 +1,4 @@
+import re

 def boolean_to_int(value: bool) -> int:
    if value:
@ -5,10 +6,12 @@ def boolean_to_int(value: bool) -> int:
    else:
        return 0

-
 def is_float(element: str) -> bool:
    try:
        float(element)
        return True
    except ValueError:
        return False
+
+def camel_to_snake(camel_string: str) -> str:
+    return re.sub(r'(?<!^)(?=[A-Z])', '_', camel_string).lower()
--- a/app/lib/irt/models/base.py
+++ b/app/lib/irt/models/base.py
@ -0,0 +1,14 @@
+import numpy as np
+
+class Base:
+
+  def __init__(self, model_params, kwargs):
+    self.model_params = model_params
+    # check if exists, if not error out
+    self.b_param = kwargs['b_param']
+    self.e = np.exp
+    self.theta = kwargs['theta']
+
+  @classmethod
+  def ability_estimate(self) -> float:
+    return 0.0
--- a/app/lib/irt/models/rasch.py
+++ b/app/lib/irt/models/rasch.py
@ -0,0 +1,26 @@
+from girth import ability_mle
+
+from lib.irt.models.base import *
+
+class Rasch(Base):
+
+  def result(self):
+    return 0.0
+
+  @classmethod
+  def ability_estimate(self, items) -> float:
+    # responses are mapped into a matrix, where each row and item
+    # and each column is an exam form result
+    # we'll likely have to change this to something more robust
+    # when we get into more complex response types
+    responses = np.array([[int(item.response)] for item in items])
+    # the difficulty (b param) for each item is in an ordered list
+    difficulty = np.array([item.b_param for item in items])
+    # the package currently utilizes a fixed a param (discrimination)
+    discrimination = np.linspace(1, 1, len(difficulty))
+    # there are many methodologies to calculate ability from a data set of responses
+    # this is what our client currently uses but we should expand this to allow for
+    # switching between methodologies when needed
+    # it also currrently only does a single ability estimation
+    # at some point we can also accommodate batch ability estimates if need be
+    return ability_mle(responses, difficulty, discrimination).tolist()[0]
--- a/app/lib/irt/models/three_parameter_logistic.py
+++ b/app/lib/irt/models/three_parameter_logistic.py
@ -1,11 +1,6 @@
-class ThreeParameterLogistic:
+from lib.irt.models.base import *

-    def __init__(self, model_params, kwargs):
-        self.model_params = model_params
-        # check if exists, if not error out
-        self.b_param = kwargs['b_param']
-        self.e = 2.71828
-        self.theta = kwargs['theta']
+class ThreeParameterLogistic(Base):

    # contains the primary 3pl function, determining the probably of an inidividual
    # that an individual at a certain theta would get a particular question correct
@ -14,5 +9,5 @@ class ThreeParameterLogistic:
    def result(self):
        a = self.model_params.a_param
        c = self.model_params.c_param
-        return c + (1 - c) * (1 / (1 + self.e**(-a *
+        return c + (1 - c) * (1 / (1 + self.e(-a *
                                                (self.theta - self.b_param))))
--- a/app/main.py
+++ b/app/main.py
@ -43,7 +43,7 @@ class ServiceListener(Consumer):
            logging.error(f'action of type {action} does not exist.')

 def main():
-    logging.info('Starting IRT Service: That Was Rash (v1.4.0)...')
+    logging.info('Starting IRT Service: That Was Rasch (v1.5.0)...')

    # ToDo: Figure out a much better way of doing this.
    # LocalStack wants 'endpoint_url', while prod doesnt :(
--- a/app/models/ability_estimation.py
+++ b/app/models/ability_estimation.py
@ -0,0 +1,27 @@
+from pydantic import BaseModel
+from typing import ClassVar, List
+
+from models.item import Item
+from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
+from lib.irt.models.rasch import Rasch
+
+class AbilityEstimation(BaseModel):
+  exam_id: int
+  items: List[Item] = []
+  irt_model: str
+  min_theta: float = -3.0
+  max_theta: float = 3.0
+
+  IRT_MODELS: ClassVar[dict] = {
+    'rasch': Rasch,
+    # not supported
+    # '3pl': ThreeParameterLogistic
+  }
+
+  def calculate(self) -> float:
+    if self.irt_model in self.IRT_MODELS:
+      model = self.IRT_MODELS[self.irt_model]
+      return model.ability_estimate(self.items)
+    else:
+      logging.error(f'model of type {self.irt_model} does not exist.')
+      return None
--- a/app/models/item.py
+++ b/app/models/item.py
@ -11,8 +11,9 @@ class Item(BaseModel):
    position: Optional[int] = None
    passage_id: Optional[int] = None
    workflow_state: Optional[str] = None
-    attributes: List[Attribute]
+    attributes: List[Attribute] = None
    b_param: float = 0.00
+    response: Optional[str] = None

    def iif(self, solver_run, theta):
        return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param, theta=theta)
--- a/app/services/ability_estimation_service.py
+++ b/app/services/ability_estimation_service.py
@ -1,9 +1,31 @@
-import logging
+import json, io

 from services.base import Base
+from helpers import aws_helper
+from models.ability_estimation import AbilityEstimation
+from lib.application_configs import ApplicationConfigs

 class AbilityEstimationService(Base):
  ACTION = 'abilityEstimation'

  def process(self):
-    logging.info('Ability Estimation Service to be implemented...')
+    attributes = self.service_attributes()
+    ability_estimation = AbilityEstimation.parse_obj(attributes)
+    result = ability_estimation.calculate()
+
+    if result is not None:
+      response = json.dumps({
+        'status': 'success',
+        'result': result
+      })
+
+      self.file_name = f'{ability_estimation.exam_id}_ability_estimation_result.json'
+    else:
+      response = json.dumps({
+        'status': 'error',
+        'result': None
+      })
+
+    aws_helper.file_stream_upload(
+        io.BytesIO(bytes(response.encode('UTF-8'))), self.file_name,
+        ApplicationConfigs.s3_processed_bucket, self.ACTION)
--- a/app/services/base.py
+++ b/app/services/base.py
@ -1,5 +1,26 @@
+import logging, json
+
+from helpers import aws_helper, tar_helper, common_helper
+
 class Base:

    def __init__(self, source, ingest_type='message'):
        self.ingest_type = ingest_type
        self.source = source
+
+    def service_attributes(self):
+        logging.info('Retrieving attributes from message...')
+        # get s3 object
+        self.key = aws_helper.get_key_from_message(self.source)
+        s3_object = aws_helper.get_object(
+            self.key, aws_helper.get_bucket_from_message(self.source))
+
+        # convert to tar
+        self.tar = tar_helper.raw_to_tar(s3_object)
+
+        # get attributes file and convert to dict
+        attributes = json.loads(
+            tar_helper.extract_file_from_tar(
+                self.tar, f'{common_helper.camel_to_snake(self.ACTION)}_attributes.json').read())
+
+        return attributes