def task_search_for_orphan_enzymes(): activity_enzyme_names = list( set(Activity.objects().distinct('enzyme_name'))) for name in activity_enzyme_names: if len(Sequence.objects(enzyme_name=name)) == 0: enzyme_type = Activity.objects(enzyme_name=name)[0].enzyme_type new_seq = Sequence(enzyme_name=name, enzyme_type=enzyme_type) new_seq.save() print( f"found orphan enzyme, added sequence entry for {name} - {enzyme_type}" )
def add_new_enzymes(): enzyme_type = request.form['enzyme_type'] existing_name = request.form['existing_name'] new_name = request.form['new_name'] user = user_datastore.get_user(current_user.id) paper = Paper.objects(id=request.form['paper_id'])[0] if enzyme_type == '' or enzyme_type is None: result = { 'status': 'danger', 'msg': 'Must select an enzyme type', 'issues': [] } elif existing_name == new_name and new_name == "": result = { 'status': 'danger', 'msg': 'Must select an enzyme or enter a new name', 'issues': [] } elif existing_name != "" and new_name != "": result = { 'status': 'danger', 'msg': 'Choose either an existing enzyme or enter a new name', 'issues': ["(One must be blank)"] } elif existing_name != "": seq = Sequence.objects(enzyme_name=existing_name)[0] seq.papers.append(paper) seq.save() result = { 'status': 'success', 'msg': 'Sequence added to paper', 'issues': [] } elif new_name != "": seq = Sequence(enzyme_name=new_name, enzyme_type=enzyme_type, added_by=user, owner=user, papers=[paper]) seq.save() if user not in paper.edits_by: paper.edits_by.append(user) papers_functions.tag_paper_with_enzyme_types(paper) result = { 'status': 'success', 'msg': 'Sequence added to paper', 'issues': [] } else: result = { 'status': 'danger', 'msg': 'Error creating new enzyme', 'issues': [] } return jsonify(result=result)
def df_to_db(spec_df): #added_by_dict = make_added_by_user_dict() print('Saving biocatdb_2 excel to mongodb..') for i, row in spec_df.iterrows(): html_doi = str(row['html_doi']) doi = str(row['html_doi']) added_by_string = str(row['added_by']) list_html_to_remove = [ 'https://doi.org/', 'http://doi.org/', 'http://dx.doi.org/' ] for to_remove in list_html_to_remove: if to_remove in doi: doi = html_doi.replace(to_remove, '') if len(Paper.objects(doi=doi)) == 0: paper = Paper(short_citation=str(row['short_citation']), html=html_doi, doi=doi) paper = paper.save() print(f"{row['short_citation']} added") else: paper = Paper.objects(doi=doi)[0] if row['enzyme_type'] is not None and row['enzyme_type'] != '' and type( row['enzyme_type']) == str: if len(EnzymeType.objects(enzyme_type=row['enzyme_type'])) == 0: enz_type = EnzymeType(enzyme_type=row['enzyme_type'], description='') enz_type.save() if row['enzyme_name'] is not None and row['enzyme_name'] != '' and type( row['enzyme_name']) == str: if len(Sequence.objects(enzyme_name=row['enzyme_name'])) == 0: seq = Sequence(enzyme_name=check_is_nan(row['enzyme_name']), enzyme_type=check_is_nan(row['enzyme_type']), papers=[paper]) seq.save() else: seq = Sequence.objects(enzyme_name=row['enzyme_name'])[0] if paper not in seq.papers: seq.papers.append(paper) seq = seq.save() if row['binary'] == 1: binary = True else: binary = False if row['auto_generated'] == 1: auto_gen = True else: auto_gen = False activity = Activity( enzyme_type=check_is_nan(row['enzyme_type']), enzyme_name=check_is_nan(row['enzyme_name']), reaction=check_is_nan(row['reaction']), short_citation=check_is_nan(row['short_citation']), html_doi=check_is_nan(row['html_doi']), added_by_string=added_by_string, paper=paper, cascade_num=check_is_nan(row['cascade_num']), substrate_1_smiles=get_smile(row['substrate_1_smiles']), substrate_2_smiles=get_smile(row['substrate_2_smiles']), product_1_smiles=get_smile(row['product_1_smiles']), temperature=check_is_nan(row['temperature']), ph=check_is_nan(row['ph']), solvent=check_is_nan(row['solvent']), other_conditions=check_is_nan(row['other_conditions']), notes=check_is_nan(row['notes']), reaction_vol=check_is_nan(row['reaction_vol']), formulation=check_is_nan(row['formulation']), biocat_conc=check_is_nan(row['biocat_conc']), kcat=check_is_float(row['kcat']), km=check_is_float(row['km']), mw=check_is_float(row['mw']), substrate_1_conc=check_is_nan(row['substrate_1_conc']), substrate_2_conc=check_is_nan(row['substrate_2_conc']), specific_activity=check_is_float(row['specific_activity']), conversion=check_is_float(row['conversion']), conversion_time=check_is_float(row['conversion_time']), categorical=check_is_nan(row['categorical']), binary=binary, selectivity=check_is_nan(row['selectivity']), auto_generated=auto_gen) activity.save() print('..done')
def save_or_add_seqs(data_list, paper): # Used by upload excel user = user_datastore.get_user(current_user.id) issues = [] enzyme_types = EnzymeType.objects().distinct('enzyme_type') for seq_dict in data_list: if 'sequence_unavailable' in seq_dict: if seq_dict['sequence_unavailable'] == '': seq_dict['sequence_unavailable'] = 'False' if 'structure' in seq_dict: if seq_dict['structure'] == '': seq_dict['structure'] = 'False' if 'sequence' in seq_dict: seq_dict['sequence'] = seq_dict['sequence'].replace('\n', '') seq_dict['sequence'] = seq_dict['sequence'].replace(' ', '') if seq_dict.get('enzyme_name', '') == '': issues.append(f"Sequence must have a name") else: if len(Sequence.objects(enzyme_name=seq_dict['enzyme_name'])) == 0: if seq_dict.get('enzyme_type', '') not in enzyme_types: print( f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist" ) issues.append( f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist" ) elif sequence_check(seq_dict.get('sequence', '')) == False: print( f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters" ) issues.append( f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters" ) else: print('Creating new sequence..') seq = Sequence(enzyme_name=seq_dict['enzyme_name'], enzyme_type=seq_dict['enzyme_type'], other_names=seq_dict.get('other_names', '').split(', '), sequence=seq_dict.get('sequence', ''), n_tag=seq_dict.get('n_tag', ''), c_tag=seq_dict.get('c_tag', ''), sequence_unavailable=strtobool( seq_dict.get('sequence_unavailable', 'False')), accession=seq_dict.get('accession', ''), other_identifiers=seq_dict.get( 'other_names', '').split(', '), pdb=seq_dict.get('pdb', ''), mutant_of=seq_dict.get('mutant_of', ''), notes=seq_dict.get('notes', ''), papers=[paper], owner=user) seq.save() else: seq = Sequence.objects(enzyme_name=seq_dict['enzyme_name'])[0] if paper not in seq.papers: seq.papers.append(paper) if seq.owner == user or seq.owner is None: seq.owner = user other_names = seq_dict.get('other_names', '').split(', ') seq.other_names.extend(other_names) if (seq.sequence is None or seq.sequence == ''): seq.sequence = seq_dict.get('sequence', '') if strtobool(seq_dict.get('sequence_unavailable', 'False')) == True: seq.sequence_unavailable = True if (seq.accession is None or seq.accession == ''): seq.accession = seq_dict.get('accession', '') if seq_dict.get('pdb', '') != '': seq.pdb = seq_dict.get('pdb', '') if (seq.mutant_of is None or seq.mutant_of == ''): seq.mutant_of = seq_dict.get('mutant_of', '') if (seq.notes is None or seq.notes == ''): seq.notes = seq_dict.get('notes', '') else: print( 'Sequence already exists but owned by another user - added to paper, but no data updated' ) issues.append( 'Sequence already exists but owned by another user - added to paper, but no data updated' ) seq.save() return issues