示例#1
0
def setup_training_run(model_name):
    args = parse_training_args()
    config = Settings(args.config)

    util.ensure_exists(awe_runs_dir)
    run_name = '{0}_{1}_{2}'.format(
        model_name,
        len([path for path in os.listdir(awe_runs_dir) if os.path.isdir(os.path.join(awe_runs_dir, path))
             and model_name in path]),
        datetime.datetime.now().strftime('%d_%m_%Y')
    )
    log_dir = os.path.join(awe_runs_dir, run_name)
    checkpoint_dir = os.path.join(log_dir, 'checkpoints')
    util.ensure_exists(log_dir)
    util.ensure_exists(checkpoint_dir)

    copyfile(args.config, os.path.join(log_dir, 'conf.ini'))
    logger = create_logger(model_name, os.path.join(log_dir, 'log'))

    logger.info('Running with args:')
    for var in vars(args):
        logger.info('{0}: {1}'.format(var, getattr(args, var)))

    use_gru = config.general_training.use_gru if hasattr(config, 'general_training') else False
    noise_mult = config.general_training.noise_multiplier if hasattr(config.general_training, 'noise_multiplier') else 0
    noise_prob = config.general_training.noise_prob if hasattr(config.general_training, 'noise_prob') else 0
    mean_sub = getattr(config.general_training, 'mean_subtraction', True)
    var_norm = getattr(config.general_training, 'variance_normalization', False)

    return args, config, logger, checkpoint_dir, log_dir, use_gru, noise_mult, noise_prob, mean_sub, var_norm
示例#2
0
def upload_to_server(work_directory):
    book = load_workbook(os.path.join(work_directory, 'japan.xlsx'))
    log = create_logger('japan-pmda')
    log.critical(datetime.datetime.now())
    for sheet_name in ('device', 'drug'):
        sheet = book.get_sheet_by_name(sheet_name)
        first_row = True
        for row in sheet.rows:
            if first_row:
                first_row = False
                continue
            cells = []
            for c in row:
                if isinstance(c.value, str):
                    cells.append(replace_carriage(c.value))
                elif c.value is None:
                    cells.append('')
                else:
                    cells.append(c.value)
            if sheet_name == 'device':
                p, a = parse_device(cells)
            else:
                p, a = parse_drug(cells)
            if len(p['name']) < 1 or len(a['name']) < 1:
                log.warning('invalid record for {}'.format(p['name']))
                continue
            response = add_record('entity', [p, a])
            if response['_status'] != 'OK':
                log.error('fail to create record for {}'.format(p['name']))
                log.error(response)
                continue
            applicant_product = create_relationship(
                response['_items'][1]['_id'], response['_items'][0]['_id'])
            applicant_product['type'] = 7
            applicant_product['name'] = 'Applicant'
            applicant_product['abs'] = 'Applicant'
            response = add_record('relationship', [applicant_product])
            if response['_status'] != 'OK':
                log.error('fail to create relationship for {}'.format(
                    p['name']))
                log.error(response)
            else:
                log.debug('added {} to the system'.format(p['name']))
    log.critical(datetime.datetime.now())
示例#3
0
def get_data(name: str, list_url: str, page_url: str, total_page: int, with_proxy: bool=False):
    log = create_logger(name)
    # each proxy need 5 seconds to start, there will be 16 proxies required to start
    time.sleep(5)
    work_directory = os.path.expanduser('~/Downloads/{}'.format(name))
    if not os.path.exists(work_directory):
        os.mkdir(work_directory)

    results = find_list(log, work_directory, list_url, total_page)
    slugs = []
    for r in results:
        if os.path.exists(os.path.join(work_directory, '{}.json'.format(
                r['opportunities'][0]['fileNumberSlug']))):
            log.debug('already processed {}'.format(r['opportunities'][0]['fileNumberSlug']))
            continue
        slugs.append(page_url.format(r['opportunities'][0]['fileNumberSlug']))
    if with_proxy:
        PROXY_THREAD.start()
    with Pool(16) as pool:
        pool.starmap(parse_page, [(None, work_directory, with_proxy, slug) for slug in slugs])
    if with_proxy:
        PROXY_THREAD.close()
示例#4
0
 def __init__(self, data_path):
     self.logger = create_logger('patents_view.log')
     if not os.path.exists(data_path):
         self.logger.critical('{} doesnot exist'.format(data_path))
         return
     self.data_path = data_path
     self.cpc_group = {}
     self.init_cpc_group(os.path.join(data_path, 'cpc_group.tsv'))
     self.cpc_subgroup = {}
     self.init_cpc_subgroup(os.path.join(data_path, 'cpc_subgroup.tsv'))
     self.cpc_subsection = {}
     self.init_cpc_subsection(os.path.join(data_path, 'cpc_subsection.tsv'))
     self.nber_category = {}
     self.init_nber_category(os.path.join(data_path, 'nber_category.tsv'))
     self.nber_subcategory = {}
     self.init_nber_subcategory(os.path.join(data_path, 'nber_subcategory.tsv'))
     self.uspc_class = {}
     self.init_uspc_class(os.path.join(data_path, 'mainclass_current.tsv'))
     self.uspc_subclass = {}
     self.init_uspc_subclass(os.path.join(data_path, 'subclass_current.tsv'))
     self.wipo_field = {}
     self.init_wipo_field(os.path.join(data_path, 'wipo_field.tsv'))
     self.uspto_class = {}
     self.init_uspto_class(os.path.join(data_path, 'mainclass_current.tsv'))
示例#5
0
def main():
    product_code = get_product_code()
    log = create_logger('510K')
    result = json.load(open(os.path.expanduser('~/work/fda/device-classification-0001-of-0001.json'), 'r'))
    log.critical(datetime.datetime.now())
    for r in result['results']:
        p = create_product()
        p['name'] = r.get('device_name', r['openfda'].get('device_name', ''))
        p['ref'] = r.get('k_number', r['openfda'].get('k_number', ''))
        p['addr']['line1'] = r.get('address_1', r['openfda'].get('address_1', ''))
        p['addr']['line2'] = r.get('address_2', r['openfda'].get('address_2', ''))
        p['addr']['city'] = r.get('city', r['openfda'].get('city', ''))
        p['addr']['state'] = r.get('state', r['openfda'].get('state', ''))
        p['addr']['zip'] = r.get('zip_code', r['openfda'].get('zip_code', ''))
        p['addr']['country'] = r.get('country_code', r['openfda'].get('country_code', ''))
        p['intro'] = r.get('statement_or_summary', r['openfda'].get('statement_or_summary', ''))
        p['asset']['type'] = 0
        p['tag'] = [
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r.get('medical_specialty_description', r['openfda'].get('medical_specialty_description', '')),
            'FDA',
            'Medical Device',
            '510K']
        # p['tag'] is used for tags readable to common users, p['lic'] is used for tags specified for product.
        p['asset']['lic'] = [
            'FDA',
            '510K',
            r.get('clearance_type', r['openfda'].get('clearance_type', '')),
            r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')),
            r['openfda'].get('medical_specialty_description', ''),
            r.get('product_code', r['openfda'].get('product_code', '')),
            r.get('regulation_number', r['openfda'].get('regulation_number', '')),
            r.get('decision_description', r['openfda'].get('decision_description', '')), ]
        p['asset']['lic'].extend(third_party(r.get('third_party_flag', r['openfda'].get('third_party_flag', ''))))
        if len(r.get('expedited_review_flag', r['openfda'].get('expedited_review_flag', ''))) > 0:
            p['asset']['lic'].append('Expedited Review')
        if r.get('submission_type_id', r['openfda'].get('submission_type_id', '')) not in {'1', '2'} and \
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))) is not None:
            p['asset']['lic'].append(
                submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
            p['tag'].append(submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))))
        code = product_code.get(r.get('product_code', r['openfda'].get('product_code', '')), None)
        if code is not None:
            p['abs'] = code['device_name']
            p['asset']['lic'].extend([
                'Class ' + code['device_class'],
                'GMP Exempt' if code['gmp_exempt_flag'] == 'N' else 'GMP Required',
            ])
            p['tag'].append('Class ' + code['device_class'])
            if code['implant_flag'] != 'N':
                p['asset']['lic'].append('Implant')
                p['tag'].append('Implant')
            if code['life_sustain_support_flag'] != 'N':
                p['asset']['lic'].append('Life Sustain Support')
                p['tag'].append('Life Sustain Support')
        else:
            p['abs'] = p['name']
        p['asset']['stat'] = map_status(r.get('decision_code', r['openfda'].get('decision_code', '')))
        try:
            p['created'] = parser.parse(r.get('date_received', r['openfda'].get('date_received', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        try:
            p['updated'] = parser.parse(r.get('decision_date', r['openfda'].get('decision_date', None))).strftime(
                "%a, %d %b %Y %H:%M:%S GMT")
        except:
            pass
        p['asset']['lic'] = remove_empty_string_from_array(p['asset']['lic'])
        p['tag'] = remove_empty_string_from_array(p['tag'])
        a = create_company()
        a['name'] = r.get('applicant', r['openfda'].get('applicant', ''))
        a['abs'] = 'A Medical Device Company'
        a['addr'] = p['addr']
        a['tag'] = p['tag']
        a['group']['parentId'] = '000000000000000000000000'
        # contact is just the name of contact

        response = add_record('entity', [p, a])
        if response['_status'] != 'OK':
            log.error('fail to create record for {}'.format(p['name']))
            continue
        applicant_product = create_relationship(response['_items'][1]['_id'], response['_items'][0]['_id'])
        applicant_product['type'] = 7
        applicant_product['name'] = 'Applicant'
        applicant_product['abs'] = 'Applicant'
        response = add_record('relationship', [applicant_product])
        if response['_status'] != 'OK':
            log.error('fail to create relationship for {}'.format(p['name']))
        else:
            log.debug('added {} to the system'.format(p['name']))
    log.critical(datetime.datetime.now())
示例#6
0
 def __init__(self, mongo_uri: str):
     self.client = MongoClient(mongo_uri)
     self.authors = UnionFind()
     self.logger = create_logger('pubmed.log')
示例#7
0
def parse_mesh(data_file):
    result = {
        'A': {
            'name': 'Anatomy'
        },
        'B': {
            'name': 'Organisms'
        },
        'C': {
            'name': 'Diseases'
        },
        'D': {
            'name': 'Chemicals and Drugs'
        },
        'E': {
            'name':
            'Analytical, Diagnostic and Therapeutic Techniques, and Equipment'
        },
        'F': {
            'name': 'Psychiatry and Psychology'
        },
        'G': {
            'name': 'Phenomena and Processes'
        },
        'H': {
            'name': 'Disciplines and Occupations'
        },
        'I': {
            'name': 'Anthropology, Education, Sociology, and Social Phenomena'
        },
        'J': {
            'name': 'Technology, Industry, and Agriculture'
        },
        'K': {
            'name': 'Humanities'
        },
        'L': {
            'name': 'Information Science'
        },
        'M': {
            'name': 'Named Groups'
        },
        'N': {
            'name': 'Health Care'
        },
        'V': {
            'name': 'Publication Characteristics'
        },
        'Z': {
            'name': 'Geographicals'
        },
    }
    logger = create_logger('pubmed.log')

    def process_record(_, record):
        name = record['DescriptorName']['String']
        logger.debug(name)
        if 'TreeNumberList' not in record or 'TreeNumber' not in record[
                'TreeNumberList']:
            return True
        tree = record['TreeNumberList']['TreeNumber']
        if not isinstance(tree, list):
            tree = [tree]
        for t in tree:
            # special process the first part of the path
            path2 = t[1:].split('.')
            path = [t[0]]
            path.extend(path2)
            node = result
            for p in path:
                if p not in node:
                    node[p] = {}
                node = node[p]
            node['name'] = name
        return True

    logger.info('process {}'.format(data_file))
    try:
        xmltodict.parse(open(data_file, "rb"),
                        item_depth=2,
                        item_callback=process_record)
    except xmltodict.ParsingInterrupted:
        pass
    except Exception as e:
        raise e
    return dict(result)