def update_abstract(nex_session, fw, pmid, reference_id, record, abstract_db, source_id): abstract = record.get('AB', '') if abstract == '': return if abstract and abstract == abstract_db: return if abstract_db is None: x = Referencedocument(document_type='Abstract', source_id=source_id, reference_id=reference_id, text=abstract, html=link_gene_names(abstract, {}, nex_session), created_by=CREATED_BY) nex_session.add(x) nex_session.commit() fw.write("PMID:" + str(pmid) + ": new abstract added.\nNew abstract: " + abstract + "\n") print("PMID:", pmid, ": new abstract added.") print("New abstract:", abstract) else: nex_session.query(Referencedocument).filter_by( reference_id=reference_id).update({ 'text': abstract, 'html': link_gene_names(abstract, {}, nex_session) }) nex_session.commit() fw.write("PMID=" + str(pmid) + ": the abstract is updated.\nNew abstract: " + abstract + "\nOld abstract: " + abstract_db + "\n\n") print("PMID=", pmid, ": the abstract is updated.") print("New abstract:", abstract) print("Old abstract:", abstract_db)
def read_summary_file(nex_session, fw, summary_type, summary_file, log_file): from util import link_gene_names from models import Locusdbentity, Referencedbentity name_to_dbentity = dict([(x.systematic_name, x) for x in nex_session.query(Locusdbentity).all()]) sgdid_to_dbentity_id = dict([(x.sgdid, x.dbentity_id) for x in nex_session.query(Locusdbentity).all()]) pmid_to_reference_id = dict([(x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()]) data = [] if summary_type == 'Phenotype': f = open(summary_file, 'U') for line in f: pieces = line.strip().split('\t') dbentity = name_to_dbentity.get(pieces[0]) if dbentity is None: continue data.append({'locus_id': dbentity.dbentity_id, 'text': pieces[1], 'html': link_gene_names(pieces[1], {dbentity.display_name, dbentity.format_name, dbentity.display_name + 'P', dbentity.format_name + 'P'}, nex_session), 'summary_type': summary_type, 'summary_order': 1 }) elif summary_type == 'Regulation': f = open(summary_file, 'U') for line in f: pieces = line.strip().split('\t') dbentity = name_to_dbentity.get(pieces[0]) if dbentity is None: continue references = [] pmid_list = pieces[3].replace(' ', '') pmids = pmid_list.split('|') order = 0 for pmid in pmids: reference_id = pmid_to_reference_id.get(int(pmid)) if reference_id is None: print "PMID=", pmid, " is not in the database" continue order = order + 1 references.append({'reference_id': reference_id, 'reference_order': order}) data.append({'locus_id': dbentity.dbentity_id, 'text': pieces[2], 'html': link_gene_names(pieces[2], {dbentity.display_name, dbentity.format_name, dbentity.display_name + 'P', dbentity.format_name + 'P'}, nex_session), 'summary_type': summary_type, 'summary_order': 1, 'references': references}) elif summary_type == 'Function': f = open(summary_file, 'U') for line in f: pieces = line.split('\t') if len(pieces) >= 8: sgdid = pieces[8] if sgdid.startswith('SGD:'): dbentity_id = sgdid_to_dbentity_id.get(sgdid[4:]) if dbentity_id is None: continue functionSummary = [x[22:].strip() for x in pieces[9].split('|') if x.startswith('go_annotation_summary')] if len(functionSummary) == 1: data.append({'locus_id': dbentity_id, 'text': functionSummary[0], 'html': functionSummary[0], 'summary_type': summary_type, 'summary_order': 1}) else: fw.write("Unknown summary_type: " + summary_type+ "\n") exit() return data
def read_summary_file(nex_session, fw, summary_type, summary_file_reader, log_file, data_for_json): from util import link_gene_names from models import Locusdbentity, Referencedbentity name_to_dbentity = dict([(x.systematic_name, x) for x in nex_session.query(Locusdbentity).all()]) sgdid_to_dbentity_id = dict([ (x.sgdid, x.dbentity_id) for x in nex_session.query(Locusdbentity).all() ]) pmid_to_reference_id = dict([ (x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all() ]) data = [] if summary_type == 'Phenotype_Regulation': for pieces in summary_file_reader: summary_type = pieces[1] if summary_type in ['Phenotype', 'phenotype', 'PHENOTYPE']: summary_type = 'Phenotype' elif summary_type in ['Regulation', 'regulation', 'REGULATION']: summary_type = 'Regulation' dbentity = name_to_dbentity.get(pieces[0]) if dbentity is None: data_for_json.append({ 'category': 'locus', 'name': pieces[0], 'type': x['summary_type'] + ' summary', 'value': x['text'], 'tag': "unknown gene name" }) continue references = [] if len(pieces) > 3: pmid_list = pieces[3].replace(' ', '') pmids = pmid_list.split('|') order = 0 for pmid in pmids: reference_id = pmid_to_reference_id.get(int(pmid)) if reference_id is None: print("PMID=", pmid, " is not in the database") continue order = order + 1 references.append({ 'reference_id': reference_id, 'reference_order': order }) data.append({ 'locus_id': dbentity.dbentity_id, 'text': pieces[2], 'html': link_gene_names( pieces[2], { dbentity.display_name, dbentity.format_name, dbentity.display_name + 'P', dbentity.format_name + 'P' }, nex_session), 'summary_type': summary_type, 'summary_order': 1, 'references': references, 'dbentity': dbentity }) elif summary_type == 'Function': for pieces in summary_file_reader: if len(pieces) >= 8: sgdid = pieces[8] if sgdid.startswith('SGD:'): dbentity_id = sgdid_to_dbentity_id.get(sgdid[4:]) if dbentity_id is None: continue functionSummary = [ x[22:].strip() for x in pieces[9].split('|') if x.startswith('go_annotation_summary') ] if len(functionSummary) == 1: data.append({ 'locus_id': dbentity_id, 'text': functionSummary[0], 'html': functionSummary[0], 'summary_type': summary_type, 'summary_order': 1 }) else: fw.write("Unknown summary_type: " + summary_type + "\n") exit() return data