Merge pull request #36 from yardstick/feature/QUANT-2993

This commit is contained in:
brmnjsh 2023-09-25 15:39:45 -04:00 committed by GitHub
commit 939fc9bae9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 124 additions and 13 deletions

View File

@ -10,6 +10,7 @@ RUN python -m pip install -U pytest
RUN python -m pip install pytest-cov
RUN python -m pip install pytest-stub
RUN python -m pip install pytest-mock
RUN python -m pip install girth --upgrade
RUN mkdir /app
WORKDIR /app

View File

@ -10,6 +10,7 @@ RUN python -m pip install -U pytest
RUN python -m pip install pytest-cov
RUN python -m pip install pytest-stub
RUN python -m pip install pytest-mock
RUN python -m pip install girth --upgrade
RUN mkdir /app
WORKDIR /app

View File

@ -1,3 +1,4 @@
import re
def boolean_to_int(value: bool) -> int:
if value:
@ -5,10 +6,12 @@ def boolean_to_int(value: bool) -> int:
else:
return 0
def is_float(element: str) -> bool:
try:
float(element)
return True
except ValueError:
return False
def camel_to_snake(camel_string: str) -> str:
return re.sub(r'(?<!^)(?=[A-Z])', '_', camel_string).lower()

View File

@ -0,0 +1,14 @@
import numpy as np
class Base:
def __init__(self, model_params, kwargs):
self.model_params = model_params
# check if exists, if not error out
self.b_param = kwargs['b_param']
self.e = np.exp
self.theta = kwargs['theta']
@classmethod
def ability_estimate(self) -> float:
return 0.0

View File

@ -0,0 +1,26 @@
from girth import ability_mle
from lib.irt.models.base import *
class Rasch(Base):
def result(self):
return 0.0
@classmethod
def ability_estimate(self, items) -> float:
# responses are mapped into a matrix, where each row and item
# and each column is an exam form result
# we'll likely have to change this to something more robust
# when we get into more complex response types
responses = np.array([[int(item.response)] for item in items])
# the difficulty (b param) for each item is in an ordered list
difficulty = np.array([item.b_param for item in items])
# the package currently utilizes a fixed a param (discrimination)
discrimination = np.linspace(1, 1, len(difficulty))
# there are many methodologies to calculate ability from a data set of responses
# this is what our client currently uses but we should expand this to allow for
# switching between methodologies when needed
# it also currrently only does a single ability estimation
# at some point we can also accommodate batch ability estimates if need be
return ability_mle(responses, difficulty, discrimination).tolist()[0]

View File

@ -1,11 +1,6 @@
class ThreeParameterLogistic:
from lib.irt.models.base import *
def __init__(self, model_params, kwargs):
self.model_params = model_params
# check if exists, if not error out
self.b_param = kwargs['b_param']
self.e = 2.71828
self.theta = kwargs['theta']
class ThreeParameterLogistic(Base):
# contains the primary 3pl function, determining the probably of an inidividual
# that an individual at a certain theta would get a particular question correct
@ -14,5 +9,5 @@ class ThreeParameterLogistic:
def result(self):
a = self.model_params.a_param
c = self.model_params.c_param
return c + (1 - c) * (1 / (1 + self.e**(-a *
return c + (1 - c) * (1 / (1 + self.e(-a *
(self.theta - self.b_param))))

View File

@ -43,7 +43,7 @@ class ServiceListener(Consumer):
logging.error(f'action of type {action} does not exist.')
def main():
logging.info('Starting IRT Service: That Was Rash (v1.4.0)...')
logging.info('Starting IRT Service: That Was Rasch (v1.5.0)...')
# ToDo: Figure out a much better way of doing this.
# LocalStack wants 'endpoint_url', while prod doesnt :(

View File

@ -0,0 +1,27 @@
from pydantic import BaseModel
from typing import ClassVar, List
from models.item import Item
from lib.irt.models.three_parameter_logistic import ThreeParameterLogistic
from lib.irt.models.rasch import Rasch
class AbilityEstimation(BaseModel):
exam_id: int
items: List[Item] = []
irt_model: str
min_theta: float = -3.0
max_theta: float = 3.0
IRT_MODELS: ClassVar[dict] = {
'rasch': Rasch,
# not supported
# '3pl': ThreeParameterLogistic
}
def calculate(self) -> float:
if self.irt_model in self.IRT_MODELS:
model = self.IRT_MODELS[self.irt_model]
return model.ability_estimate(self.items)
else:
logging.error(f'model of type {self.irt_model} does not exist.')
return None

View File

@ -11,8 +11,9 @@ class Item(BaseModel):
position: Optional[int] = None
passage_id: Optional[int] = None
workflow_state: Optional[str] = None
attributes: List[Attribute]
attributes: List[Attribute] = None
b_param: float = 0.00
response: Optional[str] = None
def iif(self, solver_run, theta):
return ItemInformationFunction(solver_run.irt_model).calculate(b_param=self.b_param, theta=theta)

View File

@ -1,9 +1,31 @@
import logging
import json, io
from services.base import Base
from helpers import aws_helper
from models.ability_estimation import AbilityEstimation
from lib.application_configs import ApplicationConfigs
class AbilityEstimationService(Base):
ACTION = 'abilityEstimation'
def process(self):
logging.info('Ability Estimation Service to be implemented...')
attributes = self.service_attributes()
ability_estimation = AbilityEstimation.parse_obj(attributes)
result = ability_estimation.calculate()
if result is not None:
response = json.dumps({
'status': 'success',
'result': result
})
self.file_name = f'{ability_estimation.exam_id}_ability_estimation_result.json'
else:
response = json.dumps({
'status': 'error',
'result': None
})
aws_helper.file_stream_upload(
io.BytesIO(bytes(response.encode('UTF-8'))), self.file_name,
ApplicationConfigs.s3_processed_bucket, self.ACTION)

View File

@ -1,5 +1,26 @@
import logging, json
from helpers import aws_helper, tar_helper, common_helper
class Base:
def __init__(self, source, ingest_type='message'):
self.ingest_type = ingest_type
self.source = source
def service_attributes(self):
logging.info('Retrieving attributes from message...')
# get s3 object
self.key = aws_helper.get_key_from_message(self.source)
s3_object = aws_helper.get_object(
self.key, aws_helper.get_bucket_from_message(self.source))
# convert to tar
self.tar = tar_helper.raw_to_tar(s3_object)
# get attributes file and convert to dict
attributes = json.loads(
tar_helper.extract_file_from_tar(
self.tar, f'{common_helper.camel_to_snake(self.ACTION)}_attributes.json').read())
return attributes