def test_registry(): def test_fn(): pass r = Registry('test') r.register(test_fn) assert r['test_fn'] is not None
def setUp(self): yield initDB(self) self.user = yield User(first_name="First", last_name="Last", age=10).save() self.avatar = yield Avatar(name="an avatar name", user_id=self.user.id).save() self.picture = yield Picture(name="a pic", size=10, user_id=self.user.id).save() self.favcolor = yield FavoriteColor(name="blue").save() self.boy = yield Boy(name="Robert").save() self.girl = yield Girl(name="Susan").save() self.config = Registry.getConfig()
def setUp(self): yield initDB(self) self.config = Registry.getConfig()
import pandas as pd from assets.mapping import colmap, local_identifiers from .standardize import standardize from .view import View from db import query_db, get_db from db.common import get_table_columns from utils import Registry from config import IDVIEWTYPE import logging preplogger = logging.getLogger(__name__) # Creating simple config registry for function assignment registry_idview = {} view_registry = Registry(name='views') ## Prepare identifying information for matching ## def create_data_view(tablename: str) -> pd.DataFrame: raw_query = f"SELECT * FROM {tablename}" dview = View(pd.read_sql_query(raw_query, get_db()), context={'partner': tablename.split('_')[0]}) preplogger.info(f'Data View created with statement:\n{raw_query}') preplogger.info(f'Data View created with columns:\n{dview.columns}\nlength: {len(dview)}') dview.standardized = standardize(dview.subset).data return dview ################################################
class LogParser: handler = Registry() def _update_course(self, item): self.course_name = (get_item(item, 'context.course_id').split( ':', 1)[-1] or self.course_name) @handler.add(event_type=['load_video', 'edx.video.loaded']) def _load_video(self, item): self._update_course(item) video_id = get_item(json.loads(get_item(item, 'event')), 'id') page = get_item(item, 'page') self.content.add_content('video', video_id) self.modules.add_content(page, video_id) @handler.add(event_type=['play_video', 'edx.video.played']) def _play_video(self, item): self._update_course(item) user_id = get_item(item, 'context.user_id') video_id = get_item(json.loads(get_item(item, 'event')), 'id') self.users.view_content(user_id, video_id) @handler.add(event_type='problem_check', event_source='server') def _problem_check_server(self, item): self._update_course(item) (problem_id, user_id, time) = get_items(item, ['event.problem_id', 'context.user_id', 'time']) subtasks = get_item(item, 'event.submission', type_=dict) for (subtask_id, subtask) in subtasks.items(): (question, task_type) = get_items(subtask, ['question', 'response_type']) correct = get_item(subtask, 'correct', type_=bool) self.tasks.add_task(problem_id, subtask_id, question, task_type) self.users.score_task(user_id, problem_id, subtask_id, correct, time) @handler.add(event_type='edx.grades.problem.submitted') def _problem_submitted(self, item): self._update_course(item) (user_id, problem_id, page, time) = get_items( item, ['context.user_id', 'event.problem_id', 'referer', 'time']) self.modules.add_task(page, problem_id) self.users.post_solution(user_id, problem_id, convert_datetime(time)) @handler.add(event_type='openassessmentblock.create_submission') def _create_submission(self, item): self._update_course(item) (submission_id, task_id, user_id, name, page) = get_items(item, [ 'event.submission_uuid', 'context.module.usage_key', 'context.user_id', 'context.module.display_name', 'referer' ]) self.users.create_submission(submission_id, user_id, task_id) self.modules.add_task(page, task_id) self.tasks.add_assessment(task_id, name) @handler.add(event_type=[ 'openassessmentblock.self_assess', 'openassessmentblock.peer_assess', 'openassessmentblock.staff_assess' ]) def _assess_submission(self, item): self._update_course(item) (submission_id, user_id) = get_items(item, ['event.submission_uuid', 'context.user_id']) scores = get_item(item, 'event.parts', type_=list) points = sum( get_item(score, 'option.points', type_=int) for score in scores) max_points = sum( get_item(score, 'criterion.points_possible', type_=int) for score in scores) self.users.assess(submission_id, user_id, points, max_points) def __init__(self, log, course, answers, courses): self.course_name = '' self.users = Users() self.tasks = Tasks() self.modules = Modules() self.content = Content() self._parse(log) for item in (self.users, self.tasks, self.modules, self.content): item.update_data(course, answers) self.course_long_name = courses[self.course_name] def _parse(self, log): for (i, line) in enumerate(log): try: item = json.loads(line.split(':', maxsplit=1)[-1]) LogParser.handler(self, item) except Exception as e: logging.warning('Error on process entry, line %d: %s', i, e) def get_course_info(self): return { 'short_name': self.course_name, 'long_name': self.course_long_name } def get_student_solutions(self, user_id=None): if user_id is None: for userid in self.users.submits: yield self.get_student_solutions(userid) else: submits = self.users.submits[user_id] for (taskid, tries) in submits.items(): for (time, correct) in tries: yield (user_id, taskid, correct, time) def get_student_content(self, user_id=None): if user_id is None: for userid in self.users.viewed_content: yield self.get_student_content(userid) else: viewed = self.users.viewed_content[user_id] for (_, content) in self.content.content.items(): for content_id in content: if self.modules.get_content_module(content_id): yield (user_id, content_id, int(content_id in viewed)) def get_assessments(self): for submission_id in self.users.pr_submits: (user_id, problem_id) = self.users.pr_submits[submission_id] problem_id = get_id(problem_id) assessments = self.users.assessments[submission_id] for (reviewer, score, max_score) in assessments: yield (user_id, problem_id, reviewer, score, max_score) def get_tasks(self, task_id=None): if task_id is None: task_ids = set(self.tasks.tasks) | set(self.tasks.assessments) for taskid in task_ids: yield self.get_tasks(taskid) else: module = self.modules.get_task_module(task_id) if not module: return if task_id in self.tasks.tasks: for subtask in self.tasks.tasks[task_id]: text = self.tasks.subtask_text.get(subtask) or 'NA' yield (subtask, self.tasks.subtask_type[subtask], text, module) if task_id in self.tasks.assessments: name = self.tasks.assessments[task_id] or 'NA' yield (get_id(task_id), 'openassessment', name, module) def get_content(self): for (content_type, content) in self.content.content.items(): for content_id in content: module = self.modules.get_content_module(content_id) if module: yield (content_id, content_type, 'NA', module)
from utils import Registry DATASETS = Registry('dataset') PIPELINES = Registry('pipeline') DATA_LOADER = Registry('data_loader')
from utils import Registry dataset_default_cfg = {'type': 'ModelNet'} dataloader_default_cfg = {'type': 'DataListLoader'} DATASETS = Registry(default_cfg=dataset_default_cfg) DATALOADERS = Registry(default_cfg=dataloader_default_cfg)
from utils import Registry DATASETS = Registry('dataset') DATA_LOADER = Registry('data_loader')
from utils import Registry DATASETS = Registry('dataset') PIPELINES = Registry('pipeline')
from utils import Registry, OptimizerRegistry from torch.optim import Adam, SGD from torch.optim.lr_scheduler import StepLR, MultiStepLR, CosineAnnealingLR optimizer_default_cfg = {'type': 'Adam', 'lr': 0.001} lr_scheduler_default_cfg = None OPTIMIZERS = OptimizerRegistry(default_cfg=optimizer_default_cfg) OPTIMIZERS.register(Adam) OPTIMIZERS.register(SGD) LR_SCHEDULER = Registry(default_cfg=lr_scheduler_default_cfg) LR_SCHEDULER.register(StepLR) LR_SCHEDULER.register(CosineAnnealingLR) LR_SCHEDULER.register(MultiStepLR)
from utils import Registry MODELS = Registry() ENCODERS = Registry() GENERATORS = Registry() MAPPINGS = Registry() DISCRIMINATORS = Registry()
def validate_registry(registry: Registry, mapping=colmap) -> bool: for val in mapping.values(): assert val in registry.keys( ), f'{val} missing from registry {registry.name}'
# pandas standards.py from utils import Registry from assets.mapping import colmap from mpi.prepare.view import View from recordlinkage.preprocessing import clean pandas_standards_registry = Registry('pandas_standards') # Standardization Functions def ssn_pool(view: View, colname='ssn_pool'): def _test_ssn(x): try: tx = str(x) assert len(tx) == 9 assert tx[0:3] != '000' assert tx[0:3] != '666' assert int(tx[0:3]) < 900 assert int(tx[3:5]) > 0 assert int(tx[-4:]) > 0 return int(x) except: return None return view[colname].apply(_test_ssn) pandas_standards_registry.register(ssn_pool)
# CMD import torch from utils import Registry OPTIMIZER_REGISTRY = Registry("optimizer") LR_SCHEDULER_REGISTRY = Registry("lr_scheduler") def build_optimizer(cfg, model) -> torch.optim.Optimizer: optimizer = OPTIMIZER_REGISTRY.get(cfg.schedule.optimizer.pop("name"))(model.parameters(), **cfg.schedule.optimizer) return optimizer def build_lr_scheduler(cfg, optimizer): lr_scheduler = LR_SCHEDULER_REGISTRY.get(cfg.schedule.lr_scheduler.pop("name"))(optimizer, **cfg.schedule.lr_scheduler) return lr_scheduler
from utils import Registry model_default_cfg = {'type': 'PointNet2'} MODELS = Registry(default_cfg=model_default_cfg)
# view_pandas.py from utils import Registry, get_column_intersect import pandas as pd from assets.mapping import colmap, local_identifiers df_registry = Registry('df_view') ## Access/View functions def dfcolumns(data: pd.DataFrame) -> list: return data.columns.tolist() df_registry.register(dfcolumns, name='columns') def dflen(data: pd.DataFrame) -> int: return len(data) df_registry.register(dflen, name='dlen') def getitem(data: pd.DataFrame, idx) -> pd.Series: return data[idx] df_registry.register(getitem) def head(data: pd.DataFrame, nrows:int) -> pd.DataFrame: return data.head(nrows) df_registry.register(head)
from utils import Registry MODELS = Registry('model')