Пример #1
0
def test_registry():
    def test_fn():
        pass

    r = Registry('test')
    r.register(test_fn)

    assert r['test_fn'] is not None
Пример #2
0
 def setUp(self):
     yield initDB(self)
     self.user = yield User(first_name="First", last_name="Last", age=10).save()
     self.avatar = yield Avatar(name="an avatar name", user_id=self.user.id).save()
     self.picture = yield Picture(name="a pic", size=10, user_id=self.user.id).save()
     self.favcolor = yield FavoriteColor(name="blue").save()
     self.boy = yield Boy(name="Robert").save()
     self.girl = yield Girl(name="Susan").save()
     self.config = Registry.getConfig()
Пример #3
0
 def setUp(self):
     yield initDB(self)
     self.user = yield User(first_name="First", last_name="Last",
                            age=10).save()
     self.avatar = yield Avatar(name="an avatar name",
                                user_id=self.user.id).save()
     self.picture = yield Picture(name="a pic",
                                  size=10,
                                  user_id=self.user.id).save()
     self.favcolor = yield FavoriteColor(name="blue").save()
     self.boy = yield Boy(name="Robert").save()
     self.girl = yield Girl(name="Susan").save()
     self.config = Registry.getConfig()
Пример #4
0
 def setUp(self):
     yield initDB(self)
     self.config = Registry.getConfig()
Пример #5
0
import pandas as pd
from assets.mapping import colmap, local_identifiers
from .standardize import standardize
from .view import View
from db import query_db, get_db
from db.common import get_table_columns
from utils import Registry
from config import IDVIEWTYPE
import logging

preplogger = logging.getLogger(__name__)


# Creating simple config registry for function assignment
registry_idview = {}
view_registry = Registry(name='views')

## Prepare identifying information for matching ##
def create_data_view(tablename: str) -> pd.DataFrame:
    raw_query = f"SELECT * FROM {tablename}"
    dview = View(pd.read_sql_query(raw_query, get_db()), context={'partner': tablename.split('_')[0]})

    preplogger.info(f'Data View created with statement:\n{raw_query}')
    preplogger.info(f'Data View created with columns:\n{dview.columns}\nlength: {len(dview)}')

    dview.standardized = standardize(dview.subset).data
    return dview



################################################
Пример #6
0
class LogParser:
    handler = Registry()

    def _update_course(self, item):
        self.course_name = (get_item(item, 'context.course_id').split(
            ':', 1)[-1] or self.course_name)

    @handler.add(event_type=['load_video', 'edx.video.loaded'])
    def _load_video(self, item):
        self._update_course(item)
        video_id = get_item(json.loads(get_item(item, 'event')), 'id')
        page = get_item(item, 'page')
        self.content.add_content('video', video_id)
        self.modules.add_content(page, video_id)

    @handler.add(event_type=['play_video', 'edx.video.played'])
    def _play_video(self, item):
        self._update_course(item)
        user_id = get_item(item, 'context.user_id')
        video_id = get_item(json.loads(get_item(item, 'event')), 'id')
        self.users.view_content(user_id, video_id)

    @handler.add(event_type='problem_check', event_source='server')
    def _problem_check_server(self, item):
        self._update_course(item)
        (problem_id, user_id,
         time) = get_items(item,
                           ['event.problem_id', 'context.user_id', 'time'])
        subtasks = get_item(item, 'event.submission', type_=dict)
        for (subtask_id, subtask) in subtasks.items():
            (question, task_type) = get_items(subtask,
                                              ['question', 'response_type'])
            correct = get_item(subtask, 'correct', type_=bool)
            self.tasks.add_task(problem_id, subtask_id, question, task_type)
            self.users.score_task(user_id, problem_id, subtask_id, correct,
                                  time)

    @handler.add(event_type='edx.grades.problem.submitted')
    def _problem_submitted(self, item):
        self._update_course(item)
        (user_id, problem_id, page, time) = get_items(
            item, ['context.user_id', 'event.problem_id', 'referer', 'time'])
        self.modules.add_task(page, problem_id)
        self.users.post_solution(user_id, problem_id, convert_datetime(time))

    @handler.add(event_type='openassessmentblock.create_submission')
    def _create_submission(self, item):
        self._update_course(item)
        (submission_id, task_id, user_id, name,
         page) = get_items(item, [
             'event.submission_uuid', 'context.module.usage_key',
             'context.user_id', 'context.module.display_name', 'referer'
         ])
        self.users.create_submission(submission_id, user_id, task_id)
        self.modules.add_task(page, task_id)
        self.tasks.add_assessment(task_id, name)

    @handler.add(event_type=[
        'openassessmentblock.self_assess', 'openassessmentblock.peer_assess',
        'openassessmentblock.staff_assess'
    ])
    def _assess_submission(self, item):
        self._update_course(item)
        (submission_id,
         user_id) = get_items(item,
                              ['event.submission_uuid', 'context.user_id'])
        scores = get_item(item, 'event.parts', type_=list)
        points = sum(
            get_item(score, 'option.points', type_=int) for score in scores)
        max_points = sum(
            get_item(score, 'criterion.points_possible', type_=int)
            for score in scores)
        self.users.assess(submission_id, user_id, points, max_points)

    def __init__(self, log, course, answers, courses):
        self.course_name = ''
        self.users = Users()
        self.tasks = Tasks()
        self.modules = Modules()
        self.content = Content()

        self._parse(log)

        for item in (self.users, self.tasks, self.modules, self.content):
            item.update_data(course, answers)

        self.course_long_name = courses[self.course_name]

    def _parse(self, log):
        for (i, line) in enumerate(log):
            try:
                item = json.loads(line.split(':', maxsplit=1)[-1])
                LogParser.handler(self, item)
            except Exception as e:
                logging.warning('Error on process entry, line %d: %s', i, e)

    def get_course_info(self):
        return {
            'short_name': self.course_name,
            'long_name': self.course_long_name
        }

    def get_student_solutions(self, user_id=None):
        if user_id is None:
            for userid in self.users.submits:
                yield self.get_student_solutions(userid)
        else:
            submits = self.users.submits[user_id]
            for (taskid, tries) in submits.items():
                for (time, correct) in tries:
                    yield (user_id, taskid, correct, time)

    def get_student_content(self, user_id=None):
        if user_id is None:
            for userid in self.users.viewed_content:
                yield self.get_student_content(userid)
        else:
            viewed = self.users.viewed_content[user_id]
            for (_, content) in self.content.content.items():
                for content_id in content:
                    if self.modules.get_content_module(content_id):
                        yield (user_id, content_id, int(content_id in viewed))

    def get_assessments(self):
        for submission_id in self.users.pr_submits:
            (user_id, problem_id) = self.users.pr_submits[submission_id]
            problem_id = get_id(problem_id)
            assessments = self.users.assessments[submission_id]
            for (reviewer, score, max_score) in assessments:
                yield (user_id, problem_id, reviewer, score, max_score)

    def get_tasks(self, task_id=None):
        if task_id is None:
            task_ids = set(self.tasks.tasks) | set(self.tasks.assessments)
            for taskid in task_ids:
                yield self.get_tasks(taskid)
        else:
            module = self.modules.get_task_module(task_id)
            if not module:
                return
            if task_id in self.tasks.tasks:
                for subtask in self.tasks.tasks[task_id]:
                    text = self.tasks.subtask_text.get(subtask) or 'NA'
                    yield (subtask, self.tasks.subtask_type[subtask], text,
                           module)
            if task_id in self.tasks.assessments:
                name = self.tasks.assessments[task_id] or 'NA'
                yield (get_id(task_id), 'openassessment', name, module)

    def get_content(self):
        for (content_type, content) in self.content.content.items():
            for content_id in content:
                module = self.modules.get_content_module(content_id)
                if module:
                    yield (content_id, content_type, 'NA', module)
Пример #7
0
from utils import Registry

DATASETS = Registry('dataset')
PIPELINES = Registry('pipeline')
DATA_LOADER = Registry('data_loader')
Пример #8
0
from utils import Registry

dataset_default_cfg = {'type': 'ModelNet'}

dataloader_default_cfg = {'type': 'DataListLoader'}

DATASETS = Registry(default_cfg=dataset_default_cfg)
DATALOADERS = Registry(default_cfg=dataloader_default_cfg)
Пример #9
0
from utils import Registry

DATASETS = Registry('dataset')
DATA_LOADER = Registry('data_loader')
Пример #10
0
from utils import Registry

DATASETS = Registry('dataset')
PIPELINES = Registry('pipeline')
Пример #11
0
from utils import Registry, OptimizerRegistry
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import StepLR, MultiStepLR, CosineAnnealingLR

optimizer_default_cfg = {'type': 'Adam', 'lr': 0.001}

lr_scheduler_default_cfg = None

OPTIMIZERS = OptimizerRegistry(default_cfg=optimizer_default_cfg)
OPTIMIZERS.register(Adam)
OPTIMIZERS.register(SGD)

LR_SCHEDULER = Registry(default_cfg=lr_scheduler_default_cfg)
LR_SCHEDULER.register(StepLR)
LR_SCHEDULER.register(CosineAnnealingLR)
LR_SCHEDULER.register(MultiStepLR)
Пример #12
0
from utils import Registry

MODELS = Registry()
ENCODERS = Registry()
GENERATORS = Registry()
MAPPINGS = Registry()
DISCRIMINATORS = Registry()
Пример #13
0
def validate_registry(registry: Registry, mapping=colmap) -> bool:
    for val in mapping.values():
        assert val in registry.keys(
        ), f'{val} missing from registry {registry.name}'
Пример #14
0
# pandas standards.py

from utils import Registry
from assets.mapping import colmap
from mpi.prepare.view import View

from recordlinkage.preprocessing import clean

pandas_standards_registry = Registry('pandas_standards')

# Standardization Functions


def ssn_pool(view: View, colname='ssn_pool'):
    def _test_ssn(x):
        try:
            tx = str(x)
            assert len(tx) == 9
            assert tx[0:3] != '000'
            assert tx[0:3] != '666'
            assert int(tx[0:3]) < 900
            assert int(tx[3:5]) > 0
            assert int(tx[-4:]) > 0
            return int(x)
        except:
            return None

    return view[colname].apply(_test_ssn)


pandas_standards_registry.register(ssn_pool)
Пример #15
0
# CMD

import torch

from utils import Registry


OPTIMIZER_REGISTRY = Registry("optimizer")
LR_SCHEDULER_REGISTRY = Registry("lr_scheduler")


def build_optimizer(cfg, model) -> torch.optim.Optimizer:
    optimizer = OPTIMIZER_REGISTRY.get(cfg.schedule.optimizer.pop("name"))(model.parameters(), **cfg.schedule.optimizer)
    return optimizer


def build_lr_scheduler(cfg, optimizer):
    lr_scheduler = LR_SCHEDULER_REGISTRY.get(cfg.schedule.lr_scheduler.pop("name"))(optimizer, **cfg.schedule.lr_scheduler)
    return lr_scheduler
Пример #16
0
from utils import Registry

model_default_cfg = {'type': 'PointNet2'}

MODELS = Registry(default_cfg=model_default_cfg)
Пример #17
0
# view_pandas.py


from utils import Registry, get_column_intersect
import pandas as pd
from assets.mapping import colmap, local_identifiers

df_registry = Registry('df_view')


## Access/View functions

def dfcolumns(data: pd.DataFrame) -> list:
    return data.columns.tolist()
df_registry.register(dfcolumns, name='columns')


def dflen(data: pd.DataFrame) -> int:
    return len(data)
df_registry.register(dflen, name='dlen')


def getitem(data: pd.DataFrame, idx) -> pd.Series:
    return data[idx]
df_registry.register(getitem)


def head(data: pd.DataFrame, nrows:int) -> pd.DataFrame:
    return data.head(nrows)
df_registry.register(head)
Пример #18
0
from utils import Registry

MODELS = Registry('model')