Merge pull request #36 from yardstick/feature/QUANT-2993

2023-09-25 15:39:45 -04:00
parent e218ce4053 375db5f1c2
commit 939fc9bae9
11 changed files with 124 additions and 13 deletions
--- a/.docker-compose/Dockerfile
+++ b/.docker-compose/Dockerfile
@ -10,6 +10,7 @@ RUN python -m pip install -U pytest
 RUN python -m pip install pytest-cov
 RUN python -m pip install pytest-stub
 RUN python -m pip install pytest-mock
 RUN python -m pip install girth --upgrade
 RUN mkdir /app
 WORKDIR /app
--- a/1
+++ b/1
@ -10,6 +10,7 @@ RUN python -m pip install -U pytest
 RUN python -m pip install pytest-cov
 RUN python -m pip install pytest-stub
 RUN python -m pip install pytest-mock
 RUN python -m pip install girth --upgrade
 RUN mkdir /app
 WORKDIR /app
--- a/app/helpers/common_helper.py
+++ b/app/helpers/common_helper.py
@ -1,3 +1,4 @@
 import re
 def boolean_to_int(value: bool) -> int:
    if value:
@ -5,10 +6,12 @@ def boolean_to_int(value: bool) -> int:
    else:
        return 0
 def is_float(element: str) -> bool:
    try:
        float(element)
        return True
    except ValueError:
        return False
 def camel_to_snake(camel_string: str) -> str:
    return re.sub(r'(?<!^)(?=[A-Z])', '_', camel_string).lower()
--- a/app/lib/irt/models/base.py
+++ b/app/lib/irt/models/base.py
@ -0,0 +1,14 @@
 import numpy as np
 class Base:
  def __init__(self, model_params, kwargs):
    self.model_params = model_params
    # check if exists, if not error out
    self.b_param = kwargs['b_param']
    self.e = np.exp
    self.theta = kwargs['theta']
  @classmethod
  def ability_estimate(self) -> float:
    return 0.0
--- a/app/lib/irt/models/rasch.py
+++ b/app/lib/irt/models/rasch.py
@ -0,0 +1,26 @@
 from girth import ability_mle
 from lib.irt.models.base import *
 class Rasch(Base):
  def result(self):
    return 0.0
  @classmethod
  def ability_estimate(self, items) -> float:
    # responses are mapped into a matrix, where each row and item
    # and each column is an exam form result
    # we'll likely have to change this to something more robust
    # when we get into more complex response types
    responses = np.array([[int(item.response)] for item in items])
    # the difficulty (b param) for each item is in an ordered list
    difficulty = np.array([item.b_param for item in items])
    # the package currently utilizes a fixed a param (discrimination)
    discrimination = np.linspace(1, 1, len(difficulty))
    # there are many methodologies to calculate ability from a data set of responses
    # this is what our client currently uses but we should expand this to allow for
    # switching between methodologies when needed
    # it also currrently only does a single ability estimation
    # at some point we can also accommodate batch ability estimates if need be
    return ability_mle(responses, difficulty, discrimination).tolist()[0]
--- a/app/lib/irt/models/three_parameter_logistic.py
+++ b/app/lib/irt/models/three_parameter_logistic.py
@ -1,11 +1,6 @@
-class ThreeParameterLogistic:
+from lib.irt.models.base import *
-    def __init__(self, model_params, kwargs):
+class ThreeParameterLogistic(Base):
        self.model_params = model_params
        # check if exists, if not error out
        self.b_param = kwargs['b_param']
        self.e = 2.71828
        self.theta = kwargs['theta']
    # contains the primary 3pl function, determining the probably of an inidividual
    # that an individual at a certain theta would get a particular question correct
@ -14,5 +9,5 @@ class ThreeParameterLogistic:
    def result(self):
        a = self.model_params.a_param
        c = self.model_params.c_param
-        return c + (1 - c) * (1 / (1 + self.e**(-a *
+        return c + (1 - c) * (1 / (1 + self.e(-a *
                                                (self.theta - self.b_param))))
--- a/app/main.py
+++ b/app/main.py
@ -43,7 +43,7 @@ class ServiceListener(Consumer):
            logging.error(f'action of type {action} does not exist.')
 def main():
-    logging.info('Starting IRT Service: That Was Rash (v1.4.0)...')
+    logging.info('Starting IRT Service: That Was Rasch (v1.5.0)...')
    # ToDo: Figure out a much better way of doing this.
    # LocalStack wants 'endpoint_url', while prod doesnt :(
--- a/app/models/ability_estimation.py
+++ b/app/models/ability_estimation.py
@ -0,0 +1,27 @@
 from pydantic import BaseModel
 from typing import ClassVar, List
 from models.item import Item
 from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
 from lib.irt.models.rasch import Rasch
 class AbilityEstimation(BaseModel):
  exam_id: int
  items: List[Item] = []
  irt_model: str
  min_theta: float = -3.0
  max_theta: float = 3.0
  IRT_MODELS: ClassVar[dict] = {
    'rasch': Rasch,
    # not supported
    # '3pl': ThreeParameterLogistic
  }
  def calculate(self) -> float:
    if self.irt_model in self.IRT_MODELS:
      model = self.IRT_MODELS[self.irt_model]
      return model.ability_estimate(self.items)
    else:
      logging.error(f'model of type {self.irt_model} does not exist.')
      return None
--- a/app/models/item.py
+++ b/app/models/item.py
@ -11,8 +11,9 @@ class Item(BaseModel):
    position: Optional[int] = None
    passage_id: Optional[int] = None
    workflow_state: Optional[str] = None
-    attributes: List[Attribute]
+    attributes: List[Attribute] = None
    b_param: float = 0.00
    response: Optional[str] = None
    def iif(self, solver_run, theta):
        return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param, theta=theta)
--- a/app/services/ability_estimation_service.py
+++ b/app/services/ability_estimation_service.py
@ -1,9 +1,31 @@
-import logging
+import json, io
 from services.base import Base
 from helpers import aws_helper
 from models.ability_estimation import AbilityEstimation
 from lib.application_configs import ApplicationConfigs
 class AbilityEstimationService(Base):
  ACTION = 'abilityEstimation'
  def process(self):
-    logging.info('Ability Estimation Service to be implemented...')
+    attributes = self.service_attributes()
    ability_estimation = AbilityEstimation.parse_obj(attributes)
    result = ability_estimation.calculate()
    if result is not None:
      response = json.dumps({
        'status': 'success',
        'result': result
      })
      self.file_name = f'{ability_estimation.exam_id}_ability_estimation_result.json'
    else:
      response = json.dumps({
        'status': 'error',
        'result': None
      })
    aws_helper.file_stream_upload(
        io.BytesIO(bytes(response.encode('UTF-8'))), self.file_name,
        ApplicationConfigs.s3_processed_bucket, self.ACTION)
--- a/app/services/base.py
+++ b/app/services/base.py
@ -1,5 +1,26 @@
 import logging, json
 from helpers import aws_helper, tar_helper, common_helper
 class Base:
    def __init__(self, source, ingest_type='message'):
        self.ingest_type = ingest_type
        self.source = source
    def service_attributes(self):
        logging.info('Retrieving attributes from message...')
        # get s3 object
        self.key = aws_helper.get_key_from_message(self.source)
        s3_object = aws_helper.get_object(
            self.key, aws_helper.get_bucket_from_message(self.source))
        # convert to tar
        self.tar = tar_helper.raw_to_tar(s3_object)
        # get attributes file and convert to dict
        attributes = json.loads(
            tar_helper.extract_file_from_tar(
                self.tar, f'{common_helper.camel_to_snake(self.ACTION)}_attributes.json').read())
        return attributes