def check_blast_status(enzyme_type): seqs = Sequence.objects( db.Q(enzyme_type=enzyme_type) & db.Q(bioinformatics_ignore__ne=True) & db.Q(reviewed=True)) enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] all_complete = True for seq in seqs: if seq.blast is None: all_complete = False enz_type_obj.bioinformatics_status = 'Queued for update' enz_type_obj.save() if all_complete == True: if enz_type_obj.bioinformatics_status != 'Complete': enz_type_obj.bioinformatics_status = 'Complete' enz_type_obj.save() ssn_q = SSN_record.objects(enzyme_type=enz_type_obj) if len(ssn_q) == 1: ssn_record = SSN_record.objects(enzyme_type=enz_type_obj)[0] ssn_record.status = 'Queued for update' ssn_record.save()
def remove_sequence(): user = user_datastore.get_user(current_user.id) paper = Paper.objects(id=request.form['paper_id'])[0] enzyme_name = request.form['enzyme_name'] seq = Sequence.objects(enzyme_name=enzyme_name)[0] if len( Activity.objects( db.Q(enzyme_name=seq.enzyme_name) & db.Q(paper=paper))) != 0: result = { 'status': 'danger', 'msg': 'Can not remove sequence - activity data still attached', 'issues': [ 'Please remove references to this sequence in the activity section before removing' ] } return jsonify(result=result) else: if paper in seq.papers: seq.papers.remove(paper) seq.save() if len(seq.papers) == 0 and (seq.sequence == '' or seq.sequence is None): seq.delete() papers_functions.tag_paper_with_enzyme_types(paper) result = { 'status': 'success', 'msg': 'Sequence removed from paper', 'issues': [] } flash("Sequence removed from paper", 'success') return jsonify(result=result)
def ssn_object(): enzyme_type = request.form['enzyme_type'] enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] ssn_obj = SSN_record.objects(enzyme_type=enzyme_type_obj)[0] num_biocatdb = Sequence.objects(enzyme_type=enzyme_type).count() num_uniref = UniRef50.objects(enzyme_type=enzyme_type_obj).count() precalc_choices = {} for score in ssn_obj.num_at_alignment_score: clusters = ssn_obj.num_at_alignment_score[score] idt = ssn_obj.identity_at_alignment_score[score] choice_text = f"{score}, {clusters} clusters, avg identity {idt[0]} ± {idt[1]}" precalc_choices[score] = choice_text result = { 'status': ssn_obj.status, 'num_biocatdb': num_biocatdb, 'num_uniref': num_uniref, 'precalculated': precalc_choices } return jsonify(result=result)
def leaderboard(): contributor_role = Role.objects(name='contributor')[0] contributors = User.objects(roles=contributor_role) papers_dict = {} sequence_dict = {} activity_dict = {} for user in contributors: username = f"{user.first_name} {user.last_name}, {user.affiliation}" num_papers = len(Paper.objects(owner=user)) num_sequences = len(Sequence.objects(owner=user)) papers_dict[username] = num_papers sequence_dict[username] = num_sequences papers_dict = {k: v for k, v in sorted(papers_dict.items(), key=lambda item: item[1], reverse=True)} papers_dict = {k: v for k, v in papers_dict.items() if v != 0} sequence_dict = {k: v for k, v in sorted(sequence_dict.items(), key=lambda item: item[1], reverse=True)} sequence_dict = {k: v for k, v in sequence_dict.items() if v != 0} return render_template('leaderboard.html', top_papers=papers_dict, top_sequences=sequence_dict)
def delete_paper(): user = user_datastore.get_user(current_user.id) paper = Paper.objects(id=request.form['paper_id'])[0] if not check_permission.check_paper_permission(current_user.id, paper): result = { 'status': 'danger', 'msg': 'You are not the owner of this paper', 'issues': ['Assign this paper to yourself in order to delete it'] } return jsonify(result=result) elif len(Sequence.objects(papers=paper)) != 0: result = { 'status': 'danger', 'msg': 'Paper still contains sequences', 'issues': ['Please remove any sequences from paper before deleting'] } return jsonify(result=result) elif len(Activity.objects(paper=paper)) != 0: result = { 'status': 'danger', 'msg': 'Paper still contains activity data', 'issues': ['Please remove any activity data from paper before deleting'] } return jsonify(result=result) else: paper.delete() result = {'status': 'success', 'msg': 'Paper deleted', 'issues': []} return jsonify(result=result)
def add_new_enzymes(): enzyme_type = request.form['enzyme_type'] existing_name = request.form['existing_name'] new_name = request.form['new_name'] user = user_datastore.get_user(current_user.id) paper = Paper.objects(id=request.form['paper_id'])[0] if enzyme_type == '' or enzyme_type is None: result = { 'status': 'danger', 'msg': 'Must select an enzyme type', 'issues': [] } elif existing_name == new_name and new_name == "": result = { 'status': 'danger', 'msg': 'Must select an enzyme or enter a new name', 'issues': [] } elif existing_name != "" and new_name != "": result = { 'status': 'danger', 'msg': 'Choose either an existing enzyme or enter a new name', 'issues': ["(One must be blank)"] } elif existing_name != "": seq = Sequence.objects(enzyme_name=existing_name)[0] seq.papers.append(paper) seq.save() result = { 'status': 'success', 'msg': 'Sequence added to paper', 'issues': [] } elif new_name != "": seq = Sequence(enzyme_name=new_name, enzyme_type=enzyme_type, added_by=user, owner=user, papers=[paper]) seq.save() if user not in paper.edits_by: paper.edits_by.append(user) papers_functions.tag_paper_with_enzyme_types(paper) result = { 'status': 'success', 'msg': 'Sequence added to paper', 'issues': [] } else: result = { 'status': 'danger', 'msg': 'Error creating new enzyme', 'issues': [] } return jsonify(result=result)
def get_sequences_of_same_type(): enzyme_type = Sequence.objects( enzyme_name=request.form['enzyme_name'])[0].enzyme_type result = seqs_of_type(enzyme_type) return jsonify(result=result)
'w') as file: for seq in list(seqs) + list(bioinf_seqs): name = seq.enzyme_name seq = seq.sequence.replace('\n', '') file.write(f'>{name}\n') file.write(f"{seq}\n") def log(self, msg, level=1): if self.log_level >= level: print(f"ABA_Blaster ({self.enzyme_type}): {msg}") @staticmethod def _calc_alignment_score(bitscore, query_length, subject_length): two = Decimal(2) bitscore = Decimal(bitscore) x = np.power(two, -bitscore) * query_length * subject_length alignment_score = int(-np.log10(x)) return alignment_score if __name__ == '__main__': from retrobiocat_web.mongo.default_connection import make_default_connection make_default_connection() seq_obj = Sequence.objects(enzyme_type='AAD')[0] etb = AllByAllBlaster('AAD', log_level=1) etb.make_blast_db() alignment_names, alignment_scores, identities, coverages = etb.get_alignments( seq_obj)
def _get_sequence_object(enzyme_name): if 'UniRef50' in enzyme_name: return UniRef50.objects(enzyme_name=enzyme_name)[0] else: return Sequence.objects(enzyme_name=enzyme_name)[0]
'Running Blasts') seqs = Sequence.objects( db.Q(enzyme_type=enzyme_type) & db.Q(bioinformatics_ignore__ne=True) & db.Q(reviewed=True)) for seq in seqs: if seq.sequence != '' and seq.sequence is not None and seq.blast is None: if len(seq.sequence) > 50: name = str(seq.enzyme_name) current_app.blast_queue.enqueue(set_up_blast_job, name) print(f'Queued blast for {seq.enzyme_name}') else: print(f'Not blasting {seq.enzyme_name}') seq.blast = datetime.datetime.now() else: seq.blast = datetime.datetime.now() seq.save() current_app.blast_queue.enqueue(check_blast_status, enzyme_type) if __name__ == '__main__': from retrobiocat_web.mongo.default_connection import make_default_connection make_default_connection() seq = Sequence.objects(enzyme_type='AAD')[1] protein_seq = seq.sequence xml = BlastRunner().run(protein_seq) BlastParser().parse(xml, seq)
def download_sequences(): sequences = Sequence.objects().as_pymongo() df = pd.DataFrame(list(sequences)) resp = make_response(df.to_csv()) resp.headers["Content-Disposition"] = "attachment; filename=sequences.csv" return resp
def load_sequence_data(): name = request.form['name'] if name == '': return jsonify(result={}) seq = Sequence.objects( enzyme_name=name).exclude('papers')[0].select_related() sequences_same_type = Sequence.objects( enzyme_type=seq.enzyme_type).distinct('enzyme_name') sequences_same_type.sort() seq_array = {} for seq_same_type in sequences_same_type: seq_array[seq_same_type] = seq_same_type can_edit = False self_assigned = False other_user = False if current_user.is_authenticated: user = user_datastore.get_user(current_user.id) if check_permission.check_seq_permissions(current_user.id, seq): can_edit = True if seq.owner == user: self_assigned = True else: if seq.owner != '' and seq.owner is not None: other_user = True if seq.owner is None: owner = '' else: owner = f"{seq.owner.first_name} {seq.owner.last_name}, {seq.owner.affiliation}" other_names = '' for i, name in enumerate(seq.other_names): other_names += name if (len(seq.other_names) > 1) and (i < len(seq.other_names) - 1): other_names += ', ' other_identifiers = '' for i, ident in enumerate(seq.other_identifiers): other_identifiers += ident if (len(seq.other_identifiers) > 1) and (i < len(seq.other_identifiers) - 1): other_identifiers += ', ' enzyme_type_full = EnzymeType.objects( enzyme_type=seq.enzyme_type)[0].full_name if seq.n_tag is None: seq.n_tag = '' if seq.c_tag is None: seq.c_tag = '' if seq.pdb is None: seq.pdb = '' result = { 'enzyme_type': seq.enzyme_type, 'enzyme_name': seq.enzyme_name, 'sequence': seq.sequence, 'sequence_unavailable': seq.sequence_unavailable, 'n_tag': seq.n_tag, 'c_tag': seq.c_tag, 'accession': seq.accession, 'other_identifiers': other_identifiers, 'pdb': seq.pdb, 'mutant_of': seq.mutant_of, 'sequences': seq_array, 'notes': seq.notes, 'bioinformatics_ignore': seq.bioinformatics_ignore, 'can_edit': can_edit, 'self_assigned': self_assigned, 'owner_is_another_user': other_user, 'other_names': other_names, 'owner': owner, 'enzyme_type_full': enzyme_type_full } return jsonify(result=result)
def save_or_add_seqs(data_list, paper): # Used by upload excel user = user_datastore.get_user(current_user.id) issues = [] enzyme_types = EnzymeType.objects().distinct('enzyme_type') for seq_dict in data_list: if 'sequence_unavailable' in seq_dict: if seq_dict['sequence_unavailable'] == '': seq_dict['sequence_unavailable'] = 'False' if 'structure' in seq_dict: if seq_dict['structure'] == '': seq_dict['structure'] = 'False' if 'sequence' in seq_dict: seq_dict['sequence'] = seq_dict['sequence'].replace('\n', '') seq_dict['sequence'] = seq_dict['sequence'].replace(' ', '') if seq_dict.get('enzyme_name', '') == '': issues.append(f"Sequence must have a name") else: if len(Sequence.objects(enzyme_name=seq_dict['enzyme_name'])) == 0: if seq_dict.get('enzyme_type', '') not in enzyme_types: print( f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist" ) issues.append( f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist" ) elif sequence_check(seq_dict.get('sequence', '')) == False: print( f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters" ) issues.append( f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters" ) else: print('Creating new sequence..') seq = Sequence(enzyme_name=seq_dict['enzyme_name'], enzyme_type=seq_dict['enzyme_type'], other_names=seq_dict.get('other_names', '').split(', '), sequence=seq_dict.get('sequence', ''), n_tag=seq_dict.get('n_tag', ''), c_tag=seq_dict.get('c_tag', ''), sequence_unavailable=strtobool( seq_dict.get('sequence_unavailable', 'False')), accession=seq_dict.get('accession', ''), other_identifiers=seq_dict.get( 'other_names', '').split(', '), pdb=seq_dict.get('pdb', ''), mutant_of=seq_dict.get('mutant_of', ''), notes=seq_dict.get('notes', ''), papers=[paper], owner=user) seq.save() else: seq = Sequence.objects(enzyme_name=seq_dict['enzyme_name'])[0] if paper not in seq.papers: seq.papers.append(paper) if seq.owner == user or seq.owner is None: seq.owner = user other_names = seq_dict.get('other_names', '').split(', ') seq.other_names.extend(other_names) if (seq.sequence is None or seq.sequence == ''): seq.sequence = seq_dict.get('sequence', '') if strtobool(seq_dict.get('sequence_unavailable', 'False')) == True: seq.sequence_unavailable = True if (seq.accession is None or seq.accession == ''): seq.accession = seq_dict.get('accession', '') if seq_dict.get('pdb', '') != '': seq.pdb = seq_dict.get('pdb', '') if (seq.mutant_of is None or seq.mutant_of == ''): seq.mutant_of = seq_dict.get('mutant_of', '') if (seq.notes is None or seq.notes == ''): seq.notes = seq_dict.get('notes', '') else: print( 'Sequence already exists but owned by another user - added to paper, but no data updated' ) issues.append( 'Sequence already exists but owned by another user - added to paper, but no data updated' ) seq.save() return issues
def df_to_db(spec_df): #added_by_dict = make_added_by_user_dict() print('Saving biocatdb_2 excel to mongodb..') for i, row in spec_df.iterrows(): html_doi = str(row['html_doi']) doi = str(row['html_doi']) added_by_string = str(row['added_by']) list_html_to_remove = [ 'https://doi.org/', 'http://doi.org/', 'http://dx.doi.org/' ] for to_remove in list_html_to_remove: if to_remove in doi: doi = html_doi.replace(to_remove, '') if len(Paper.objects(doi=doi)) == 0: paper = Paper(short_citation=str(row['short_citation']), html=html_doi, doi=doi) paper = paper.save() print(f"{row['short_citation']} added") else: paper = Paper.objects(doi=doi)[0] if row['enzyme_type'] is not None and row['enzyme_type'] != '' and type( row['enzyme_type']) == str: if len(EnzymeType.objects(enzyme_type=row['enzyme_type'])) == 0: enz_type = EnzymeType(enzyme_type=row['enzyme_type'], description='') enz_type.save() if row['enzyme_name'] is not None and row['enzyme_name'] != '' and type( row['enzyme_name']) == str: if len(Sequence.objects(enzyme_name=row['enzyme_name'])) == 0: seq = Sequence(enzyme_name=check_is_nan(row['enzyme_name']), enzyme_type=check_is_nan(row['enzyme_type']), papers=[paper]) seq.save() else: seq = Sequence.objects(enzyme_name=row['enzyme_name'])[0] if paper not in seq.papers: seq.papers.append(paper) seq = seq.save() if row['binary'] == 1: binary = True else: binary = False if row['auto_generated'] == 1: auto_gen = True else: auto_gen = False activity = Activity( enzyme_type=check_is_nan(row['enzyme_type']), enzyme_name=check_is_nan(row['enzyme_name']), reaction=check_is_nan(row['reaction']), short_citation=check_is_nan(row['short_citation']), html_doi=check_is_nan(row['html_doi']), added_by_string=added_by_string, paper=paper, cascade_num=check_is_nan(row['cascade_num']), substrate_1_smiles=get_smile(row['substrate_1_smiles']), substrate_2_smiles=get_smile(row['substrate_2_smiles']), product_1_smiles=get_smile(row['product_1_smiles']), temperature=check_is_nan(row['temperature']), ph=check_is_nan(row['ph']), solvent=check_is_nan(row['solvent']), other_conditions=check_is_nan(row['other_conditions']), notes=check_is_nan(row['notes']), reaction_vol=check_is_nan(row['reaction_vol']), formulation=check_is_nan(row['formulation']), biocat_conc=check_is_nan(row['biocat_conc']), kcat=check_is_float(row['kcat']), km=check_is_float(row['km']), mw=check_is_float(row['mw']), substrate_1_conc=check_is_nan(row['substrate_1_conc']), substrate_2_conc=check_is_nan(row['substrate_2_conc']), specific_activity=check_is_float(row['specific_activity']), conversion=check_is_float(row['conversion']), conversion_time=check_is_float(row['conversion_time']), categorical=check_is_nan(row['categorical']), binary=binary, selectivity=check_is_nan(row['selectivity']), auto_generated=auto_gen) activity.save() print('..done')
def task_ensure_correct_sequence_naming(): seqs = Sequence.objects() for seq in seqs: seq.update_name(seq.enzyme_name)
def load_single_activity_data(): activity_id = request.form['activity_id'] activity = Activity.objects(id=activity_id)[0].select_related() seq = Sequence.objects(enzyme_name=activity.enzyme_name) sub1 = images.smitosvg_url(activity.substrate_1_smiles) if activity.product_1_smiles == '' or activity.product_1_smiles is None: prod = '' else: prod = images.smitosvg_url(activity.product_1_smiles) if activity.substrate_2_smiles == '' or activity.substrate_2_smiles is None: sub2 = '' else: sub2 = images.smitosvg_url(activity.substrate_2_smiles) if activity.added_by is not None: added_by = f"{activity.added_by.first_name} {activity.added_by.last_name}, {activity.added_by.affiliation}" else: added_by = 'Unattributed' if activity.substrate_1_conc is None: sub1_conc = '' else: sub1_conc = activity.substrate_1_conc if sub1_conc.replace('.', '').isnumeric(): sub1_conc += ' mM' if activity.substrate_2_conc is None: sub2_conc = '' else: sub2_conc = activity.substrate_2_conc if sub2_conc.replace('.', '').isnumeric(): sub2_conc += ' mM' if activity.biocat_conc is None: biocat_conc = '' else: biocat_conc = activity.biocat_conc if biocat_conc.replace('.', '').isnumeric(): biocat_conc += ' mg/ml' if activity.temperature is None: temperature = '' else: temperature = activity.temperature if temperature.replace('.', '').isnumeric(): temperature += ' <sup>o</sup>C' if activity.ph is None: ph = '' else: ph = activity.ph if ph.replace('.', '').isnumeric(): ph = 'pH ' + ph if activity.reaction_vol is None: volume = '' else: volume = activity.reaction_vol if volume.replace('.', '').isnumeric(): volume += ' mL scale' if activity.binary == True: active = 'Active' else: active = 'Not active' if activity.specific_activity is None: sa = '' else: sa = str(round(activity.specific_activity, 2)) sa += ' μmol / min / mg' if activity.conversion is None: conv = '' else: conv = str(int(activity.conversion)) + " % conversion" if activity.conversion_time is not None: conv += f" in {str(int(activity.conversion_time))} hours" if activity.kcat is None: kinetics = '' else: kinetics = f"kcat: {str(round(activity.kcat,2))} min<sup>-1</sup> km: {str(round(activity.km,2))} mM" result = { 'short_cit': activity.paper.short_citation, 'enzyme_name': activity.enzyme_name, 'enzyme_type': activity.enzyme_type, 'reaction': activity.reaction, 'sub1_img': sub1, 'sub2_img': sub2, 'prod_img': prod, 'added_by': added_by, 'sub1_conc': sub1_conc, 'sub2_conc': sub2_conc, 'biocat_conc': biocat_conc, 'formulation': activity.formulation, 'selectivity': activity.selectivity, 'temperature': temperature, 'ph': ph, 'solvent': activity.solvent, 'volume': volume, 'other_conditions': activity.other_conditions, 'notes': activity.notes, 'active': active, 'category': activity.categorical, 'sa': sa, 'conv': conv, 'kinetics': kinetics } for key in result: if result[key] is None: result[key] = '' return jsonify(result=result)
def save_edited_sequence(): original_name = request.form['original_name'] enzyme_name = request.form['enzyme_name'] enzyme_type = request.form['enzyme_type'] sequence = request.form['sequence'] sequence_unavailable = bool(strtobool( request.form['sequence_unavailable'])) n_tag = request.form['n_tag'] c_tag = request.form['c_tag'] accession = request.form['accession'] other_identifiers = request.form['other_identifiers'] pdb = request.form['pdb'] mutant_of = request.form['mutant_of'] notes = request.form['notes'] other_names = request.form['other_names'] bioinformatics_ignore = bool( strtobool(request.form['bioinformatics_ignore'])) status = 'success' msg = 'Sequence edited' issues = [] seq = Sequence.objects(enzyme_name=original_name)[0] user = user_datastore.get_user(current_user.id) if not check_permission.check_seq_permissions(current_user.id, seq): issues.append('User does not have access to edit this sequence') if original_name != enzyme_name: success, msg = seq.update_name(enzyme_name) if success is False: issues.append(msg) if seq.enzyme_type != enzyme_type: success, msg = seq.update_type(enzyme_type) if success is False: issues.append(msg) if seq.sequence != sequence: success, msg = seq.update_sequence(sequence) if success is False: issues.append(msg) seq.sequence_unavailable = sequence_unavailable seq.n_tag = n_tag seq.c_tag = c_tag seq.accession = accession seq.other_identifiers = other_identifiers.split(', ') seq.pdb = pdb seq.notes = notes seq.mutant_of = mutant_of seq.other_names = other_names.split(', ') seq.bioinformatics_ignore = bioinformatics_ignore seq.save() self_assigned = bool(strtobool(request.form['self_assigned'])) if self_assigned == True: seq.owner = user elif self_assigned == False and seq.owner == user: seq.owner = None update_seq_papers_status(seq.enzyme_name) if len(issues) != 0: status = 'danger' msg = 'Issues updating sequence' result = {'status': status, 'msg': msg, 'issues': issues} return jsonify(result=result)
def inject_login_mode(): inject_dict = {} inject_dict['login_mode'] = app.config['USE_EMAIL_CONFIRMATION'] if current_user.is_authenticated: user = User.objects(id=current_user.id).select_related()[0] if user.has_role('enzyme_teams') and user.enzyme_teams is not None: inject_dict['enzyme_teams'] = [ enz_type_obj.enzyme_type for enz_type_obj in user.enzyme_teams ] if user.has_role( 'enzyme_champion') and user.enzyme_champion is not None: inject_dict['enzyme_champion'] = [ enz_type_obj.enzyme_type for enz_type_obj in user.enzyme_champion ] if user.has_role('contributor'): inject_dict['user_papers_need_data'] = len( Paper.objects( Q(owner=user) & (Q(status='Data required') | Q(status='Enzymes need protein sequences') | Q(status='Issues need to be resolved')))) inject_dict['user_seqs_need_data'] = len( Sequence.objects( Q(owner=user) & ((Q(sequence=None) | Q(sequence='')) & (Q(sequence_unavailable__ne=True))))) inject_dict['total_team_notifications'] = 0 inject_dict['team_notifications'] = {} inject_dict['champ_seq_notifications'] = {} inject_dict['champ_notifications'] = {} if 'enzyme_teams' in inject_dict: for enz_type in inject_dict['enzyme_teams']: num_papers = len( Paper.objects( Q(tags=enz_type) & Q(owner=None) & (Q(status='Data required') | Q(status='Enzymes need protein sequences')))) inject_dict['team_notifications'][enz_type] = num_papers inject_dict['total_team_notifications'] += num_papers if 'enzyme_champion' in inject_dict: for enz_type in inject_dict['enzyme_champion']: num_papers = len( Paper.objects( Q(tags=enz_type) & Q(status='Complete - Awaiting review'))) num_seqs = len( Sequence.objects( Q(enzyme_type=enz_type) & ((Q(sequence=None) | Q(sequence='')) & (Q(sequence_unavailable__ne=True))))) inject_dict['champ_notifications'][enz_type] = num_papers inject_dict['champ_seq_notifications'][enz_type] = num_seqs inject_dict[ 'total_team_notifications'] += num_papers + num_seqs return inject_dict
def set_choices(self): self.enzyme_type.choices = [(c, c) for c in ['All'] + (list(Sequence.objects().distinct('enzyme_type')))] self.enzyme_name.choices = [(c, c) for c in ['All'] + (list(Sequence.objects().distinct('enzyme_name')))]
sequences = Sequence.objects( db.Q(sequence__ne=None) & db.Q(sequence__ne='') & db.Q(enzyme_type=enzyme_type)) unirefs = UniRef90.objects(enzyme_type=enz_type_obj) nodes = [] for seq_obj in list(unirefs) + list(sequences): nodes.append(new_node(seq_obj)) edges = [] for ali_obj in alignments: if ali_obj.identity >= identity and ali_obj.proteins[ 0].enzyme_name != ali_obj.proteins[1].enzyme_name: edges.append(new_edge(ali_obj)) return nodes, edges if __name__ == '__main__': from retrobiocat_web.mongo.default_connection import make_default_connection make_default_connection() enzyme_type = 'AAD' sequences = Sequence.objects( db.Q(sequence__ne=None) & db.Q(sequence__ne='') & db.Q(enzyme_type=enzyme_type)) for seq in sequences: print(seq.enzyme_name) print(f"Sequence = '{seq.sequence}'")
def filter_papers_by_enzyme_name(papers, enzyme_name): seqs = Sequence.objects(enzyme_name=enzyme_name).select_related() seq_papers = [paper for paper in [seq.papers for seq in seqs]][0] filtered_papers = [paper for paper in papers if paper in seq_papers] return filtered_papers