def setup_training_run(model_name): args = parse_training_args() config = Settings(args.config) util.ensure_exists(awe_runs_dir) run_name = '{0}_{1}_{2}'.format( model_name, len([path for path in os.listdir(awe_runs_dir) if os.path.isdir(os.path.join(awe_runs_dir, path)) and model_name in path]), datetime.datetime.now().strftime('%d_%m_%Y') ) log_dir = os.path.join(awe_runs_dir, run_name) checkpoint_dir = os.path.join(log_dir, 'checkpoints') util.ensure_exists(log_dir) util.ensure_exists(checkpoint_dir) copyfile(args.config, os.path.join(log_dir, 'conf.ini')) logger = create_logger(model_name, os.path.join(log_dir, 'log')) logger.info('Running with args:') for var in vars(args): logger.info('{0}: {1}'.format(var, getattr(args, var))) use_gru = config.general_training.use_gru if hasattr(config, 'general_training') else False noise_mult = config.general_training.noise_multiplier if hasattr(config.general_training, 'noise_multiplier') else 0 noise_prob = config.general_training.noise_prob if hasattr(config.general_training, 'noise_prob') else 0 mean_sub = getattr(config.general_training, 'mean_subtraction', True) var_norm = getattr(config.general_training, 'variance_normalization', False) return args, config, logger, checkpoint_dir, log_dir, use_gru, noise_mult, noise_prob, mean_sub, var_norm
def upload_to_server(work_directory): book = load_workbook(os.path.join(work_directory, 'japan.xlsx')) log = create_logger('japan-pmda') log.critical(datetime.datetime.now()) for sheet_name in ('device', 'drug'): sheet = book.get_sheet_by_name(sheet_name) first_row = True for row in sheet.rows: if first_row: first_row = False continue cells = [] for c in row: if isinstance(c.value, str): cells.append(replace_carriage(c.value)) elif c.value is None: cells.append('') else: cells.append(c.value) if sheet_name == 'device': p, a = parse_device(cells) else: p, a = parse_drug(cells) if len(p['name']) < 1 or len(a['name']) < 1: log.warning('invalid record for {}'.format(p['name'])) continue response = add_record('entity', [p, a]) if response['_status'] != 'OK': log.error('fail to create record for {}'.format(p['name'])) log.error(response) continue applicant_product = create_relationship( response['_items'][1]['_id'], response['_items'][0]['_id']) applicant_product['type'] = 7 applicant_product['name'] = 'Applicant' applicant_product['abs'] = 'Applicant' response = add_record('relationship', [applicant_product]) if response['_status'] != 'OK': log.error('fail to create relationship for {}'.format( p['name'])) log.error(response) else: log.debug('added {} to the system'.format(p['name'])) log.critical(datetime.datetime.now())
def get_data(name: str, list_url: str, page_url: str, total_page: int, with_proxy: bool=False): log = create_logger(name) # each proxy need 5 seconds to start, there will be 16 proxies required to start time.sleep(5) work_directory = os.path.expanduser('~/Downloads/{}'.format(name)) if not os.path.exists(work_directory): os.mkdir(work_directory) results = find_list(log, work_directory, list_url, total_page) slugs = [] for r in results: if os.path.exists(os.path.join(work_directory, '{}.json'.format( r['opportunities'][0]['fileNumberSlug']))): log.debug('already processed {}'.format(r['opportunities'][0]['fileNumberSlug'])) continue slugs.append(page_url.format(r['opportunities'][0]['fileNumberSlug'])) if with_proxy: PROXY_THREAD.start() with Pool(16) as pool: pool.starmap(parse_page, [(None, work_directory, with_proxy, slug) for slug in slugs]) if with_proxy: PROXY_THREAD.close()
def __init__(self, data_path): self.logger = create_logger('patents_view.log') if not os.path.exists(data_path): self.logger.critical('{} doesnot exist'.format(data_path)) return self.data_path = data_path self.cpc_group = {} self.init_cpc_group(os.path.join(data_path, 'cpc_group.tsv')) self.cpc_subgroup = {} self.init_cpc_subgroup(os.path.join(data_path, 'cpc_subgroup.tsv')) self.cpc_subsection = {} self.init_cpc_subsection(os.path.join(data_path, 'cpc_subsection.tsv')) self.nber_category = {} self.init_nber_category(os.path.join(data_path, 'nber_category.tsv')) self.nber_subcategory = {} self.init_nber_subcategory(os.path.join(data_path, 'nber_subcategory.tsv')) self.uspc_class = {} self.init_uspc_class(os.path.join(data_path, 'mainclass_current.tsv')) self.uspc_subclass = {} self.init_uspc_subclass(os.path.join(data_path, 'subclass_current.tsv')) self.wipo_field = {} self.init_wipo_field(os.path.join(data_path, 'wipo_field.tsv')) self.uspto_class = {} self.init_uspto_class(os.path.join(data_path, 'mainclass_current.tsv'))
def main(): product_code = get_product_code() log = create_logger('510K') result = json.load(open(os.path.expanduser('~/work/fda/device-classification-0001-of-0001.json'), 'r')) log.critical(datetime.datetime.now()) for r in result['results']: p = create_product() p['name'] = r.get('device_name', r['openfda'].get('device_name', '')) p['ref'] = r.get('k_number', r['openfda'].get('k_number', '')) p['addr']['line1'] = r.get('address_1', r['openfda'].get('address_1', '')) p['addr']['line2'] = r.get('address_2', r['openfda'].get('address_2', '')) p['addr']['city'] = r.get('city', r['openfda'].get('city', '')) p['addr']['state'] = r.get('state', r['openfda'].get('state', '')) p['addr']['zip'] = r.get('zip_code', r['openfda'].get('zip_code', '')) p['addr']['country'] = r.get('country_code', r['openfda'].get('country_code', '')) p['intro'] = r.get('statement_or_summary', r['openfda'].get('statement_or_summary', '')) p['asset']['type'] = 0 p['tag'] = [ r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')), r.get('medical_specialty_description', r['openfda'].get('medical_specialty_description', '')), 'FDA', 'Medical Device', '510K'] # p['tag'] is used for tags readable to common users, p['lic'] is used for tags specified for product. p['asset']['lic'] = [ 'FDA', '510K', r.get('clearance_type', r['openfda'].get('clearance_type', '')), r.get('advisory_committee_description', r['openfda'].get('advisory_committee_description', '')), r['openfda'].get('medical_specialty_description', ''), r.get('product_code', r['openfda'].get('product_code', '')), r.get('regulation_number', r['openfda'].get('regulation_number', '')), r.get('decision_description', r['openfda'].get('decision_description', '')), ] p['asset']['lic'].extend(third_party(r.get('third_party_flag', r['openfda'].get('third_party_flag', '')))) if len(r.get('expedited_review_flag', r['openfda'].get('expedited_review_flag', ''))) > 0: p['asset']['lic'].append('Expedited Review') if r.get('submission_type_id', r['openfda'].get('submission_type_id', '')) not in {'1', '2'} and \ submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', ''))) is not None: p['asset']['lic'].append( submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', '')))) p['tag'].append(submission_type(r.get('submission_type_id', r['openfda'].get('submission_type_id', '')))) code = product_code.get(r.get('product_code', r['openfda'].get('product_code', '')), None) if code is not None: p['abs'] = code['device_name'] p['asset']['lic'].extend([ 'Class ' + code['device_class'], 'GMP Exempt' if code['gmp_exempt_flag'] == 'N' else 'GMP Required', ]) p['tag'].append('Class ' + code['device_class']) if code['implant_flag'] != 'N': p['asset']['lic'].append('Implant') p['tag'].append('Implant') if code['life_sustain_support_flag'] != 'N': p['asset']['lic'].append('Life Sustain Support') p['tag'].append('Life Sustain Support') else: p['abs'] = p['name'] p['asset']['stat'] = map_status(r.get('decision_code', r['openfda'].get('decision_code', ''))) try: p['created'] = parser.parse(r.get('date_received', r['openfda'].get('date_received', None))).strftime( "%a, %d %b %Y %H:%M:%S GMT") except: pass try: p['updated'] = parser.parse(r.get('decision_date', r['openfda'].get('decision_date', None))).strftime( "%a, %d %b %Y %H:%M:%S GMT") except: pass p['asset']['lic'] = remove_empty_string_from_array(p['asset']['lic']) p['tag'] = remove_empty_string_from_array(p['tag']) a = create_company() a['name'] = r.get('applicant', r['openfda'].get('applicant', '')) a['abs'] = 'A Medical Device Company' a['addr'] = p['addr'] a['tag'] = p['tag'] a['group']['parentId'] = '000000000000000000000000' # contact is just the name of contact response = add_record('entity', [p, a]) if response['_status'] != 'OK': log.error('fail to create record for {}'.format(p['name'])) continue applicant_product = create_relationship(response['_items'][1]['_id'], response['_items'][0]['_id']) applicant_product['type'] = 7 applicant_product['name'] = 'Applicant' applicant_product['abs'] = 'Applicant' response = add_record('relationship', [applicant_product]) if response['_status'] != 'OK': log.error('fail to create relationship for {}'.format(p['name'])) else: log.debug('added {} to the system'.format(p['name'])) log.critical(datetime.datetime.now())
def __init__(self, mongo_uri: str): self.client = MongoClient(mongo_uri) self.authors = UnionFind() self.logger = create_logger('pubmed.log')
def parse_mesh(data_file): result = { 'A': { 'name': 'Anatomy' }, 'B': { 'name': 'Organisms' }, 'C': { 'name': 'Diseases' }, 'D': { 'name': 'Chemicals and Drugs' }, 'E': { 'name': 'Analytical, Diagnostic and Therapeutic Techniques, and Equipment' }, 'F': { 'name': 'Psychiatry and Psychology' }, 'G': { 'name': 'Phenomena and Processes' }, 'H': { 'name': 'Disciplines and Occupations' }, 'I': { 'name': 'Anthropology, Education, Sociology, and Social Phenomena' }, 'J': { 'name': 'Technology, Industry, and Agriculture' }, 'K': { 'name': 'Humanities' }, 'L': { 'name': 'Information Science' }, 'M': { 'name': 'Named Groups' }, 'N': { 'name': 'Health Care' }, 'V': { 'name': 'Publication Characteristics' }, 'Z': { 'name': 'Geographicals' }, } logger = create_logger('pubmed.log') def process_record(_, record): name = record['DescriptorName']['String'] logger.debug(name) if 'TreeNumberList' not in record or 'TreeNumber' not in record[ 'TreeNumberList']: return True tree = record['TreeNumberList']['TreeNumber'] if not isinstance(tree, list): tree = [tree] for t in tree: # special process the first part of the path path2 = t[1:].split('.') path = [t[0]] path.extend(path2) node = result for p in path: if p not in node: node[p] = {} node = node[p] node['name'] = name return True logger.info('process {}'.format(data_file)) try: xmltodict.parse(open(data_file, "rb"), item_depth=2, item_callback=process_record) except xmltodict.ParsingInterrupted: pass except Exception as e: raise e return dict(result)