Пример #1
0
def task_search_for_orphan_enzymes():
    activity_enzyme_names = list(
        set(Activity.objects().distinct('enzyme_name')))
    for name in activity_enzyme_names:
        if len(Sequence.objects(enzyme_name=name)) == 0:
            enzyme_type = Activity.objects(enzyme_name=name)[0].enzyme_type
            new_seq = Sequence(enzyme_name=name, enzyme_type=enzyme_type)
            new_seq.save()
            print(
                f"found orphan enzyme, added sequence entry for {name} - {enzyme_type}"
            )
Пример #2
0
def add_new_enzymes():
    enzyme_type = request.form['enzyme_type']
    existing_name = request.form['existing_name']
    new_name = request.form['new_name']
    user = user_datastore.get_user(current_user.id)
    paper = Paper.objects(id=request.form['paper_id'])[0]

    if enzyme_type == '' or enzyme_type is None:
        result = {
            'status': 'danger',
            'msg': 'Must select an enzyme type',
            'issues': []
        }

    elif existing_name == new_name and new_name == "":
        result = {
            'status': 'danger',
            'msg': 'Must select an enzyme or enter a new name',
            'issues': []
        }

    elif existing_name != "" and new_name != "":
        result = {
            'status': 'danger',
            'msg': 'Choose either an existing enzyme or enter a new name',
            'issues': ["(One must be blank)"]
        }

    elif existing_name != "":
        seq = Sequence.objects(enzyme_name=existing_name)[0]
        seq.papers.append(paper)
        seq.save()
        result = {
            'status': 'success',
            'msg': 'Sequence added to paper',
            'issues': []
        }

    elif new_name != "":
        seq = Sequence(enzyme_name=new_name,
                       enzyme_type=enzyme_type,
                       added_by=user,
                       owner=user,
                       papers=[paper])
        seq.save()

        if user not in paper.edits_by:
            paper.edits_by.append(user)

        papers_functions.tag_paper_with_enzyme_types(paper)

        result = {
            'status': 'success',
            'msg': 'Sequence added to paper',
            'issues': []
        }
    else:
        result = {
            'status': 'danger',
            'msg': 'Error creating new enzyme',
            'issues': []
        }

    return jsonify(result=result)
Пример #3
0
def df_to_db(spec_df):
    #added_by_dict = make_added_by_user_dict()

    print('Saving biocatdb_2 excel to mongodb..')
    for i, row in spec_df.iterrows():
        html_doi = str(row['html_doi'])
        doi = str(row['html_doi'])
        added_by_string = str(row['added_by'])

        list_html_to_remove = [
            'https://doi.org/', 'http://doi.org/', 'http://dx.doi.org/'
        ]
        for to_remove in list_html_to_remove:
            if to_remove in doi:
                doi = html_doi.replace(to_remove, '')

        if len(Paper.objects(doi=doi)) == 0:
            paper = Paper(short_citation=str(row['short_citation']),
                          html=html_doi,
                          doi=doi)
            paper = paper.save()
            print(f"{row['short_citation']} added")
        else:
            paper = Paper.objects(doi=doi)[0]

        if row['enzyme_type'] is not None and row['enzyme_type'] != '' and type(
                row['enzyme_type']) == str:
            if len(EnzymeType.objects(enzyme_type=row['enzyme_type'])) == 0:
                enz_type = EnzymeType(enzyme_type=row['enzyme_type'],
                                      description='')
                enz_type.save()

        if row['enzyme_name'] is not None and row['enzyme_name'] != '' and type(
                row['enzyme_name']) == str:
            if len(Sequence.objects(enzyme_name=row['enzyme_name'])) == 0:
                seq = Sequence(enzyme_name=check_is_nan(row['enzyme_name']),
                               enzyme_type=check_is_nan(row['enzyme_type']),
                               papers=[paper])
                seq.save()
            else:
                seq = Sequence.objects(enzyme_name=row['enzyme_name'])[0]
                if paper not in seq.papers:
                    seq.papers.append(paper)
                    seq = seq.save()

        if row['binary'] == 1:
            binary = True
        else:
            binary = False

        if row['auto_generated'] == 1:
            auto_gen = True
        else:
            auto_gen = False

        activity = Activity(
            enzyme_type=check_is_nan(row['enzyme_type']),
            enzyme_name=check_is_nan(row['enzyme_name']),
            reaction=check_is_nan(row['reaction']),
            short_citation=check_is_nan(row['short_citation']),
            html_doi=check_is_nan(row['html_doi']),
            added_by_string=added_by_string,
            paper=paper,
            cascade_num=check_is_nan(row['cascade_num']),
            substrate_1_smiles=get_smile(row['substrate_1_smiles']),
            substrate_2_smiles=get_smile(row['substrate_2_smiles']),
            product_1_smiles=get_smile(row['product_1_smiles']),
            temperature=check_is_nan(row['temperature']),
            ph=check_is_nan(row['ph']),
            solvent=check_is_nan(row['solvent']),
            other_conditions=check_is_nan(row['other_conditions']),
            notes=check_is_nan(row['notes']),
            reaction_vol=check_is_nan(row['reaction_vol']),
            formulation=check_is_nan(row['formulation']),
            biocat_conc=check_is_nan(row['biocat_conc']),
            kcat=check_is_float(row['kcat']),
            km=check_is_float(row['km']),
            mw=check_is_float(row['mw']),
            substrate_1_conc=check_is_nan(row['substrate_1_conc']),
            substrate_2_conc=check_is_nan(row['substrate_2_conc']),
            specific_activity=check_is_float(row['specific_activity']),
            conversion=check_is_float(row['conversion']),
            conversion_time=check_is_float(row['conversion_time']),
            categorical=check_is_nan(row['categorical']),
            binary=binary,
            selectivity=check_is_nan(row['selectivity']),
            auto_generated=auto_gen)

        activity.save()
    print('..done')
Пример #4
0
def save_or_add_seqs(data_list, paper):
    # Used by upload excel
    user = user_datastore.get_user(current_user.id)
    issues = []
    enzyme_types = EnzymeType.objects().distinct('enzyme_type')

    for seq_dict in data_list:
        if 'sequence_unavailable' in seq_dict:
            if seq_dict['sequence_unavailable'] == '':
                seq_dict['sequence_unavailable'] = 'False'

        if 'structure' in seq_dict:
            if seq_dict['structure'] == '':
                seq_dict['structure'] = 'False'

        if 'sequence' in seq_dict:
            seq_dict['sequence'] = seq_dict['sequence'].replace('\n', '')
            seq_dict['sequence'] = seq_dict['sequence'].replace(' ', '')

        if seq_dict.get('enzyme_name', '') == '':
            issues.append(f"Sequence must have a name")
        else:
            if len(Sequence.objects(enzyme_name=seq_dict['enzyme_name'])) == 0:
                if seq_dict.get('enzyme_type', '') not in enzyme_types:
                    print(
                        f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist"
                    )
                    issues.append(
                        f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist"
                    )

                elif sequence_check(seq_dict.get('sequence', '')) == False:
                    print(
                        f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters"
                    )
                    issues.append(
                        f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters"
                    )

                else:
                    print('Creating new sequence..')
                    seq = Sequence(enzyme_name=seq_dict['enzyme_name'],
                                   enzyme_type=seq_dict['enzyme_type'],
                                   other_names=seq_dict.get('other_names',
                                                            '').split(', '),
                                   sequence=seq_dict.get('sequence', ''),
                                   n_tag=seq_dict.get('n_tag', ''),
                                   c_tag=seq_dict.get('c_tag', ''),
                                   sequence_unavailable=strtobool(
                                       seq_dict.get('sequence_unavailable',
                                                    'False')),
                                   accession=seq_dict.get('accession', ''),
                                   other_identifiers=seq_dict.get(
                                       'other_names', '').split(', '),
                                   pdb=seq_dict.get('pdb', ''),
                                   mutant_of=seq_dict.get('mutant_of', ''),
                                   notes=seq_dict.get('notes', ''),
                                   papers=[paper],
                                   owner=user)
                    seq.save()

            else:
                seq = Sequence.objects(enzyme_name=seq_dict['enzyme_name'])[0]
                if paper not in seq.papers:
                    seq.papers.append(paper)

                if seq.owner == user or seq.owner is None:
                    seq.owner = user
                    other_names = seq_dict.get('other_names', '').split(', ')
                    seq.other_names.extend(other_names)

                    if (seq.sequence is None or seq.sequence == ''):
                        seq.sequence = seq_dict.get('sequence', '')

                    if strtobool(seq_dict.get('sequence_unavailable',
                                              'False')) == True:
                        seq.sequence_unavailable = True

                    if (seq.accession is None or seq.accession == ''):
                        seq.accession = seq_dict.get('accession', '')

                    if seq_dict.get('pdb', '') != '':
                        seq.pdb = seq_dict.get('pdb', '')

                    if (seq.mutant_of is None or seq.mutant_of == ''):
                        seq.mutant_of = seq_dict.get('mutant_of', '')

                    if (seq.notes is None or seq.notes == ''):
                        seq.notes = seq_dict.get('notes', '')

                else:
                    print(
                        'Sequence already exists but owned by another user - added to paper, but no data updated'
                    )
                    issues.append(
                        'Sequence already exists but owned by another user - added to paper, but no data updated'
                    )

                seq.save()

    return issues