示例#1
0
def get_pubchem_sqlite_local(pubchem_id):
    if not hasattr(settings, 'METAB_PUBCHEM_SQLITE_PTH'
                   ) and not settings.METAB_PUBCHEM_SQLITE_PTH:
        return ''

    if not pubchem_id:
        return ''

    conn = sqlite3.connect(settings.METAB_PUBCHEM_SQLITE_PTH)
    cursor = conn.cursor()

    try:
        cursor.execute(
            'SELECT * FROM  pubchem_compounds WHERE cid={}'.format(pubchem_id))
        names = sql_column_names(cursor)
        rows = cursor.fetchall()

    except OperationalError as e:
        print(e)
        return ''

    if rows:
        # should only be 1 entrie per cid so take first row
        return create_compound_from_pubchem_local(rows[0], names)
    else:
        return ''
示例#2
0
    def save_xcms_individual_peaks(self, xfid):
        cursor = self.cursor
        cursor.execute('SELECT * FROM  c_peaks')
        names = sql_column_names(cursor)

        cpeaks = []

        for row in cursor:

            if len(cpeaks) % 500 == 0:
                CPeak.objects.bulk_create(cpeaks)
                cpeaks = []

            cpeak = CPeak(idi=row[names['cid']],
                          mz=row[names['mz']],
                          mzmin=row[names['mzmin']],
                          mzmax=row[names['mzmax']],
                          rt=row[names['rt']],
                          rtmin=row[names['rtmin']],
                          rtmax=row[names['rtmax']],
                          rtminraw=row[names['rtminraw']]
                          if 'rtminraw' in names else None,
                          rtmaxraw=row[names['rtmaxraw']]
                          if 'rtmaxraw' in names else None,
                          intb=row[names['intb']] if 'intb' in names else None,
                          _into=row[names['_into']],
                          maxo=row[names['maxo']],
                          sn=row[names['sn']] if 'sn' in names else None,
                          xcmsfileinfo=xfid[row[names['fileid']]])
            cpeaks.append(cpeak)

        CPeak.objects.bulk_create(cpeaks)
示例#3
0
    def save_xcms_grouped_peaks(self):
        md = self.md
        cursor = self.cursor

        cursor.execute('SELECT * FROM  c_peak_groups')
        names = sql_column_names(cursor)

        cpeakgroups = []
        cpeakgroup_d = {}

        for row in cursor:

            if len(cpeakgroups) % 500 == 0:
                CPeakGroup.objects.bulk_create(cpeakgroups)
                cpeakgroups = []

            cpeakgroup = CPeakGroup(
                idi=row[names['grpid']],
                mzmed=row[names['mz']],
                mzmin=row[names['mzmin']],
                mzmax=row[names['mzmax']],
                rtmed=row[names['rt']],
                rtmin=row[names['rtmin']],
                rtmax=row[names['rtmax']],
                npeaks=row[names['npeaks']],
                cpeakgroupmeta=self.cpgm,
                isotopes=row[names['isotopes']]
                if 'isotopes' in names else None,
                adducts=row[names['adduct']] if 'adduct' in names else None,
                pcgroup=row[names['pcgroup']] if 'pcgroup' in names else None,
            )
            cpeakgroups.append(cpeakgroup)
            cpeakgroup_d[row[names['grpid']]] = cpeakgroup

        CPeakGroup.objects.bulk_create(cpeakgroups)
示例#4
0
    def save_adduct_annotations(self, ruleset_d):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'adduct_annotations'):
            return 0

        nm_d = {
            n.idi: n.id
            for n in NeutralMass.objects.filter(metabinputdata=md)
        }
        cpeakgroups_d = {
            c.idi: c.pk
            for c in CPeakGroup.objects.filter(cpeakgroupmeta=self.cpgm)
        }
        cursor.execute('SELECT * FROM adduct_annotations')
        names = sql_column_names(cursor)
        ads = []
        for row in cursor:
            if len(row) % 500 == 0:
                Adduct.objects.bulk_create(ads)
                ads = []

            ad = Adduct(idi=row[names['add_id']],
                        adductrule_id=ruleset_d[row[names['rule_id']]],
                        cpeakgroup_id=cpeakgroups_d[row[names['grpid']]],
                        neutralmass_id=nm_d[row[names['nm_id']]])
            ads.append(ad)

        Adduct.objects.bulk_create(ads)
示例#5
0
    def save_isotope_annotations(self):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'isotope_annotations'):
            return 0

        cpeakgroups_d = {
            c.idi: c.pk
            for c in CPeakGroup.objects.filter(cpeakgroupmeta=self.cpgm)
        }
        cursor.execute('SELECT * FROM isotope_annotations')
        names = sql_column_names(cursor)
        isos = []
        for row in cursor:
            if len(row) % 500 == 0:
                Isotope.objects.bulk_create(isos)
                isos = []

            iso = Isotope(
                idi=row[names['iso_id']],
                iso=row[names['iso']],
                charge=row[names['charge']],
                cpeakgroup1_id=cpeakgroups_d[row[names['c_peak_group1_id']]],
                cpeakgroup2_id=cpeakgroups_d[row[names['c_peak_group2_id']]],
                metabinputdata=md)
            isos.append(iso)

        Isotope.objects.bulk_create(isos)
示例#6
0
    def save_xcms_group_peak_link(self):
        md = self.md
        cursor = self.cursor

        cursor.execute('SELECT * FROM  c_peak_X_c_peak_group')
        names = sql_column_names(cursor)

        cpeakgrouplink = []

        cpeakgroups_d = {
            c.idi: c.pk
            for c in CPeakGroup.objects.filter(cpeakgroupmeta=self.cpgm)
        }
        cpeaks_d = {
            c.idi: c.pk
            for c in CPeak.objects.filter(xcmsfileinfo__metabinputdata=md)
        }

        for row in cursor:

            if len(cpeakgrouplink) % 500 == 0:
                CPeakGroupLink.objects.bulk_create(cpeakgrouplink)
                cpeakgrouplink = []

            cpeakgrouplink.append(
                CPeakGroupLink(
                    cpeak_id=cpeaks_d[row[names['cid']]],
                    cpeakgroup_id=cpeakgroups_d[row[names['grpid']]],
                    best_feature=row[names['best_feature']]
                    if 'best_feature' in names else None,
                ))

        CPeakGroupLink.objects.bulk_create(cpeakgrouplink)

        return cpeakgrouplink
示例#7
0
    def save_adduct_rules(self):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'adduct_rules'):
            return 0

        # update adduct rules
        cursor.execute('SELECT * FROM adduct_rules')
        names = sql_column_names(cursor)
        addr = list(AdductRule.objects.filter().values('adduct_type', 'id'))
        if len(addr) > 0:
            addrd = {a['adduct_type']: a['id'] for a in addr}
        else:
            addrd = {}

        ruleset_d = {}

        for row in cursor:
            if row[names['name']] not in addrd:
                arulei = AdductRule(adduct_type=row[names['name']],
                                    nmol=row[names['nmol']],
                                    charge=row[names['charge']],
                                    massdiff=row[names['massdiff']],
                                    oidscore=row[names['oidscore']],
                                    quasi=row[names['quasi']],
                                    ips=row[names['ips']],
                                    frag_score=row[names['frag_score']]
                                    if 'frag_score' in names else None)
                arulei.save()
                ruleset_d[row[names['rule_id']]] = arulei.id
            else:
                ruleset_d[row[names['rule_id']]] = addrd[row[names['name']]]

        return ruleset_d
示例#8
0
    def save_spectral_matching_annotations(self):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'matches'):
            return 0

        cursor.execute(
            'SELECT * FROM  matches LEFT JOIN library_meta ON matches.lid=library_meta.lid'
        )
        names = sql_column_names(cursor)

        speakmeta_d = {
            c.idi: c.pk
            for c in SPeakMeta.objects.filter(metabinputdata=md)
        }

        library_d = {
            c.accession: c.pk
            for c in LibrarySpectraMeta.objects.all()
        }
        cpeakgroups_d = {
            c.idi: c.pk
            for c in CPeakGroup.objects.filter(cpeakgroupmeta=self.cpgm)
        }

        matches = []
        for row in cursor:

            if len(matches) % 500 == 0:
                SpectralMatching.objects.bulk_create(matches)
                matches = []

            if row[names['source_name']] in [
                    'massbank', 'mona-experimental', 'lipidblast'
            ]:
                # Currently only works for mass bank (or anything from the experimental MONA library)

                try:
                    lsm_id = library_d[row[names['accession']]]
                except KeyError as e:
                    print(e)
                    lsm_id = None

                match = SpectralMatching(
                    idi=row[names['mid']],
                    s_peak_meta_id=speakmeta_d[row[names['pid']]],
                    score=row[names['score']],
                    percentage_match=row[names['perc_mtch']],
                    match_num=row[names['match']],
                    accession=row[names['accession']],
                    name=row[names['name']],
                    library_spectra_meta_id=lsm_id)

                matches.append(match)

        SpectralMatching.objects.bulk_create(matches)
def upload_metplus(db_pth):
    # This is a quick way to get alot of important compounds.
    # The sqlite database is from https://github.com/ICBI/MetPlus-DB
    # It is 5 years old at the time of writing so it is potentially missing compounds so can't be completely relied
    # on. Also it does not contain all of PubChem so obviously we still have to add many compounds when we upload
    # the annotations that have used pubchem as the database

    conn = sqlite3.connect(db_pth)
    conn.text_factory = bytes
    cursor = conn.cursor()
    cursor.execute('SELECT * FROM  MetPlus')

    names = sql_column_names(cursor)
    comps = []
    cursor.next()  # first row is header (strange for an sqlite database!)
    c = 0
    for i, row in enumerate(cursor):

        if Compound.objects.filter(inchikey_id=row[names['INCHIKEY']]):
            continue

        if c > 1000:
            Compound.objects.bulk_create(comps)
            print(i)
            comps = []
            c = 0
        comp = Compound(
            inchikey_id=row[names['INCHIKEY']],
            exact_mass=row[names[
                'FORMULA']],  # I think this formula is actually molecular weight!!
            molecular_formula=row[names[
                'MONOISOTOPIC_WEIGHTS']],  # I think this formula is actually molecular weight!!
            iupac_name=row[names['IUPAC_NAME']].decode(
                'utf-8', 'ignore').encode('utf-8'),
            systematic_name=row[names['SYSTEMATIC_NAME']].decode(
                'utf-8', 'ignore').encode('utf-8'),
            name=row[names['COMMON_NAME']].decode('utf-8',
                                                  'ignore').encode('utf-8'),
            trade_name=row[names['TRADE_NAME']].decode(
                'utf-8', 'ignore').encode('utf-8'),
            hmdb_id=row[names['HMDB_ID']],
            lmdb_id=row[names['LMDB_ID']],
            humancyc_id=row[names['HUMANCYC_ID']],
            pubchem_id=row[names['PUBCHEM_CID']],
            chemspider_id=row[names['CHEMSPIDER_ID']],
            chebi_id=row[names['CHEBI_ID']],
            metlin_id=row[names['METLIN_ID']],
            kegg_id=row[names['KEGG_ID']],
            foodb_id=row[names['FooDB_ID']],
        )
        comps.append(comp)
        c += 1
    Compound.objects.bulk_create(comps)
示例#10
0
    def save_speakmeta_cpeak_frag_link(self):
        md = self.md
        cursor = self.cursor
        CPeakGroupMeta = self.cpeakgroupmeta_class

        if not check_table_exists_sqlite(cursor, 'c_peak_X_s_peak_meta'):
            return 0

        cursor.execute('SELECT * FROM  c_peak_X_s_peak_meta')
        names = sql_column_names(cursor)

        speakmeta = SPeakMeta.objects.filter(metabinputdata=md)
        speakmeta_d = {s.idi: s.pk for s in speakmeta}

        cpeaks = CPeak.objects.filter(xcmsfileinfo__metabinputdata=md)
        cpeak_d = {s.idi: s.pk for s in cpeaks}

        speakmeta_cpeak_frag_links = []

        for row in cursor:
            if len(speakmeta_cpeak_frag_links) % 500 == 0:
                SPeakMetaCPeakFragLink.objects.bulk_create(
                    speakmeta_cpeak_frag_links)
                speakmeta_cpeak_frag_links = []

            # this needs to be update after SQLite update in msPurity
            speakmeta_cpeak_frag_links.append(
                SPeakMetaCPeakFragLink(
                    speakmeta_id=speakmeta_d[row[names['pid']]],
                    cpeak_id=cpeak_d[row[names['cid']]],
                ))

        SPeakMetaCPeakFragLink.objects.bulk_create(speakmeta_cpeak_frag_links)

        # Add the number of msms events for grouped feature (not possible with django sql stuff)
        sqlstmt = '''UPDATE mbrowse_cpeakgroup t
                        INNER JOIN (
                                (SELECT cpg.id, COUNT(cpgl.id) AS counter FROM mbrowse_cpeakgroup as cpg 
    	                          LEFT JOIN mbrowse_cpeakgrouplink as cpgl 
                                    ON cpgl.cpeakgroup_id=cpg.id
                                  LEFT JOIN mbrowse_speakmetacpeakfraglink as scfl 
                                    ON cpgl.cpeak_id=scfl.cpeak_id
                                    WHERE scfl.id is not NULL AND cpg.cpeakgroupmeta_id={}
                                  group by cpg.id)
                                  ) m ON t.id = m.id
                                SET t.msms_count = m.counter'''.format(
            self.cpgm.id)

        with connection.cursor() as cursor:
            cursor.execute(sqlstmt)
示例#11
0
    def save_s_peak_meta(self, runs, celery_obj):
        md = self.md
        cursor = self.cursor

        cursor.execute('SELECT * FROM  s_peak_meta')
        names = sql_column_names(cursor)

        speakmetas = []

        for row in cursor:
            # this needs to be update after SQLite update in msPurity
            # to stop ram memory runnning out
            if len(speakmetas) % 500 == 0:
                if celery_obj:
                    celery_obj.update_state(state='RUNNING',
                                            meta={
                                                'current':
                                                10,
                                                'total':
                                                100,
                                                'status':
                                                'Upload scan peak {}'.format(
                                                    len(speakmetas))
                                            })
                SPeakMeta.objects.bulk_create(speakmetas)
                speakmetas = []

            speakmetas.append(
                SPeakMeta(run=runs[row[names['fileid']]],
                          idi=row[names['pid']],
                          precursor_mz=row[names['precursorMZ']],
                          precursor_i=row[names['precursorIntensity']],
                          precursor_rt=row[names['precursorRT']],
                          precursor_scan_num=row[names['precursorScanNum']],
                          precursor_nearest=row[names['precursorNearest']],
                          scan_num=row[names['precursorScanNum']],
                          a_mz=row[names['aMz']],
                          a_purity=row[names['aPurity']],
                          a_pknm=row[names['apkNm']],
                          i_mz=row[names['iMz']],
                          i_purity=row[names['iPurity']],
                          i_pknm=row[names['ipkNm']],
                          in_purity=row[names['inPurity']],
                          in_pknm=row[names['inPkNm']],
                          ms_level=2,
                          metabinputdata=md))

        SPeakMeta.objects.bulk_create(speakmetas)
示例#12
0
    def save_eics(self):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'eics'):
            return 0

        cursor.execute('SELECT * FROM  eics')
        names = sql_column_names(cursor)

        eicmeta = EicMeta(metabinputdata=md)
        eicmeta.save()

        cpeaks_d = {
            c.idi: c.pk
            for c in CPeak.objects.filter(xcmsfileinfo__metabinputdata=md)
        }
        cpeakgroups_d = {
            c.idi: c.pk
            for c in CPeakGroup.objects.filter(cpeakgroupmeta=self.cpgm)
        }

        eics = []
        c = 0
        for row in cursor:
            if c >= 1000:
                # to save memory
                Eic.objects.bulk_create(eics)
                eics = []
                c = 0

            eic = Eic(
                idi=row[names['eicidi']],
                scan=row[names['scan']],
                intensity=row[names['intensity']],
                rt_raw=row[names['rt_raw']],
                rt_corrected=row[names['rt_corrected']]
                if 'rt_corrected' in names else None,
                purity=row[names['purity']] if 'purity' in names else None,
                cpeak_id=cpeaks_d[row[names['c_peak_id']]],
                cpeakgroup_id=cpeakgroups_d[row[names['grpid']]],
                eicmeta_id=eicmeta.id)
            eics.append(eic)
            c += 1

        Eic.objects.bulk_create(eics)
示例#13
0
    def save_s_peaks(self, celery_obj):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 's_peaks'):
            return 0

        speakmeta = SPeakMeta.objects.filter(metabinputdata=md)
        speakmeta_d = {s.idi: s.pk for s in speakmeta}

        cursor.execute('SELECT * FROM  s_peaks')
        names = sql_column_names(cursor)
        speaks = []

        for row in cursor:

            speaks.append(
                SPeak(speakmeta_id=speakmeta_d[row[names['pid']]],
                      mz=row[names['mz']],
                      i=row[names['i']]))
            # to stop ram memory runnning out
            if len(speaks) > 1000:
                SPeak.objects.bulk_create(speaks)
                if celery_obj:
                    celery_obj.update_state(state='RUNNING',
                                            meta={
                                                'current':
                                                10,
                                                'total':
                                                100,
                                                'status':
                                                'Scan peaks upload, {}'.format(
                                                    len(speaks))
                                            })
                speaks = []

        if speaks:
            print('saving speak objects')
            SPeak.objects.bulk_create(speaks)
示例#14
0
    def save_neutral_masses(self):
        md = self.md
        cursor = self.cursor
        if not check_table_exists_sqlite(cursor, 'neutral_masses'):
            return 0
        # update neutral mass
        cursor.execute('SELECT * FROM neutral_masses')
        names = sql_column_names(cursor)

        nms = []
        for row in cursor:
            if len(row) % 500 == 0:
                NeutralMass.objects.bulk_create(nms)
                nms = []

            nm = NeutralMass(idi=row[names['nm_id']],
                             nm=row[names['mass']],
                             ips=row[names['ips']],
                             metabinputdata=md)
            nms.append(nm)

        NeutralMass.objects.bulk_create(nms)
示例#15
0
    def save_sirius_csifingerid(self, celery_obj, csi_speed=True):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'sirius_csifingerid_results'):
            return 0

        cursor.execute('SELECT * FROM  sirius_csifingerid_results')
        names = sql_column_names(cursor)

        speakmeta_d = {
            c.idi: c.pk
            for c in SPeakMeta.objects.filter(metabinputdata=md)
        }

        speaks = []
        matches = []

        meta = CSIFingerIDMeta()
        meta.save()
        comp_d = {}

        UID_old = ''

        for i, row in enumerate(cursor):

            UID = row[names['UID']]
            if UID == 'UID':
                continue

            uid_l = UID.split('-')
            pid = uid_l[2]

            try:
                rank = int(row[names['Rank']])
            except ValueError as e:
                print(e)
                continue

            if rank > 6:
                continue

            if TEST_MODE:
                if i > 3000:
                    break

            if celery_obj and i % 500 == 0:
                celery_obj.update_state(
                    state='RUNNING',
                    meta={
                        'current':
                        80,
                        'total':
                        100,
                        'status':
                        'SIRIUS CSI-FingerID upload, annotation {}'.format(i)
                    })

            if UID_old and not UID == UID_old:
                print(i)
                print(UID_old, UID)
                matches = self.rank_score_sirius(matches)
                CSIFingerIDAnnotation.objects.bulk_create(matches)
                matches = []

            UID_old = UID

            match = CSIFingerIDAnnotation(
                idi=i + 1,
                s_peak_meta_id=speakmeta_d[int(pid)],
                inchikey2d=row[names['InChIkey2D']],
                molecular_formula=row[names['molecularFormula']],
                rank=rank,
                score=row[names['Score']],
                name=row[names['Name']],
                links=row[names['links']],
                smiles=row[names['smiles']],
                csifingeridmeta=meta)
            matches.append(match)

            # match.compound.add(*comps)

            speaks.append(speakmeta_d[int(pid)])

        matches = self.rank_score_sirius(matches)

        CSIFingerIDAnnotation.objects.bulk_create(matches)
示例#16
0
    def save_probmetab(self, celery_obj):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'probmetab_results'):
            return 0

        cursor.execute('SELECT * FROM  probmetab_results')
        names = sql_column_names(cursor)

        cpeakgroups_d = {
            c.idi: c.pk
            for c in CPeakGroup.objects.filter(cpeakgroupmeta=self.cpgm)
        }

        matches = []

        for c, row in enumerate(cursor):
            if TEST_MODE:
                if c > 500:
                    break
            if not row[names['grp_id']]:
                continue

            if celery_obj and len(matches) % 100 == 0:
                celery_obj.update_state(
                    state='RUNNING',
                    meta={
                        'current': 70,
                        'total': 100,
                        'status': 'Probabmetab upload, annotation {}'.format(c)
                    })

            if len(matches) % 500 == 0:
                ProbmetabAnnotation.objects.bulk_create(matches)
                matches = []

            # Expect to have majority of KEGG in the Compound model already
            kegg_id = row[names['mpc']].split(':')[1]
            comp_search = Compound.objects.filter(
                kegg_id__regex='(^|.*,|")({})("|,.*|$)'.format(kegg_id)
            )  # this needs to be update to be proper relational as the regex fails in some cases!
            if comp_search:
                comp = comp_search[0]
            else:
                kegg_compound = get_kegg_compound(kegg_id)
                if 'chebi_id_single' in kegg_compound and kegg_compound[
                        'chebi_id_single']:
                    inchikey = get_inchi_from_chebi(
                        kegg_compound['chebi_id_single'])
                    if inchikey:
                        kegg_compound['inchikey_id'] = inchikey

                comp = save_compound_kegg(kegg_compound)

            match = ProbmetabAnnotation(idi=c + 1,
                                        cpeakgroup_id=cpeakgroups_d[int(
                                            row[names['grp_id']])],
                                        compound=comp,
                                        prob=row[names['proba']])

            matches.append(match)

        ProbmetabAnnotation.objects.bulk_create(matches)
示例#17
0
    def save_metfrag(self, celery_obj):
        md = self.md
        cursor = self.cursor

        if not check_table_exists_sqlite(cursor, 'metfrag_results'):
            return 0

        cursor.execute('SELECT * FROM  metfrag_results')
        names = sql_column_names(cursor)

        speakmeta_d = {
            c.idi: c.pk
            for c in SPeakMeta.objects.filter(metabinputdata=md)
        }

        matches = []

        for i, row in enumerate(cursor):
            if TEST_MODE:
                if i > 500:
                    break

            UID = row[names['UID']]

            if UID == 'UID':
                # duplicate header name
                continue

            uid_l = UID.split('-')
            pid = uid_l[2]

            if not row[names['InChIKey']]:
                # currently only add compounds we can have a name for (should be all cases if PubChem was used)
                continue

            try:
                score = float(row[names['Score']])
            except ValueError as e:
                print(e)
                continue

            if score < 0.6:
                # no point storing anything less than 0.6
                continue

            if celery_obj and len(matches) % 100 == 0:
                celery_obj.update_state(
                    state='RUNNING',
                    meta={
                        'current': 50,
                        'total': 100,
                        'status': 'Metfrag upload, annotation {}'.format(i)
                    })

            if len(matches) % 100 == 0:
                print(i)
                MetFragAnnotation.objects.bulk_create(matches)
                matches = []

            inchikey = row[names['InChIKey']]
            identifier = row[names['Identifier']]
            comp_search = Compound.objects.filter(inchikey_id=inchikey)
            # if comp_search:
            #     comp = comp_search[0]
            # else:
            #     comp = Compound(inchikey_id=inchikey,
            #                     name=row[names['CompoundName']] if row[names['CompoundName']] else '',
            #                     molecular_formula=row[names['MolecularFormula']],
            #                     exact_mass=row[names['MonoisotopicMass']],
            #                     monoisotopic_mass=row[names['MonoisotopicMass']],
            #                     smiles=row[names['SMILES']],
            #                     pubchem_id=identifier
            #                     )
            #     comp.save()

            # Takes too long to search pubchem
            if comp_search:
                comp = comp_search[0]
            else:
                print('CHECK LOCALLY')
                comp = get_pubchem_sqlite_local(identifier)

                if not comp:
                    print('CHECK CID')
                    pc_matches = get_pubchem_compound(identifier, 'cid')

                    if not pc_matches:
                        print('CHECK INCHIKEY')
                        pc_matches = get_pubchem_compound(inchikey, 'inchikey')
                        if not pc_matches:
                            print(row)
                            print(pc_matches)
                            print(inchikey)
                            continue

                    if len(pc_matches) > 1:
                        print('More than 1 match for inchi, taking the first match, should only really happen in rare cases' \
                              'and we have not got the power to distinguish between them anyway!')

                    pc_match = pc_matches[0]
                    comp = create_pubchem_comp(pc_match)
                    comp.save()

            match = MetFragAnnotation(
                idi=i + 1,
                s_peak_meta_id=speakmeta_d[int(pid)],
                compound=comp,
                explained_peaks=row[names['ExplPeaks']],
                formula_explained_peaks=row[names['FormulasOfExplPeaks']],
                fragmentor_score=row[names['FragmenterScore']],
                fragmentor_score_values=row[names['FragmenterScore_Values']],
                maximum_tree_depth=row[names['MaximumTreeDepth']],
                number_peaks_used=row[names['NumberPeaksUsed']],
                score=row[names['Score']])
            matches.append(match)

        MetFragAnnotation.objects.bulk_create(matches)
示例#18
0
    def save_xcms_file_info(self):
        md = self.md
        cursor = self.cursor
        mfiles = self.mfiles

        if check_table_exists_sqlite(cursor, 'xset_classes'):

            cursor.execute('SELECT * FROM  xset_classes')
            names = sql_column_names(cursor)
            xset_classes = {}
            for row in self.cursor:
                xset_classes[row[names['row_names']]] = row[names['class']]

        else:
            xset_classes = {}

        cursor.execute('SELECT * FROM  fileinfo')

        names = sql_column_names(cursor)

        xfi_d = {}
        mfile_d = {}

        for row in self.cursor:
            idi = row[names['fileid']]
            fn = row[names['filename']]

            if xset_classes:
                sampleType = xset_classes[os.path.splitext(fn)[0]]
            else:
                # old database schema has this stored in the same table
                sampleType = row[names['sampleclass']]

            mfile_qs = mfiles.filter(original_filename=fn)

            if mfile_qs:
                mfile = mfile_qs[0]
            else:
                # add the file with the most basic of information
                prefix, suffix = os.path.splitext(fn)

                if re.match('.*(?:_POS_|_POSITIVE_).*', prefix):
                    polarity_qs = Polarity.objects.filter(polarity='positive')
                elif re.match('.*(?:_NEG_|_NEGATIVE_).*', prefix):
                    polarity_qs = Polarity.objects.filter(polarity='positive')
                else:
                    polarity_qs = Polarity.objects.filter(polarity='unknown')

                if polarity_qs:
                    run = Run(prefix=prefix, polarity=polarity_qs[0])
                else:
                    run = Run(prefix=prefix)

                run.save()

                mfile = MFile(
                    original_filename=fn,
                    run=run,
                    mfilesuffix=MFileSuffix.objects.filter(suffix=suffix)[0])
                mfile.save()

            xfi = XCMSFileInfo(idi=idi,
                               filename=fn,
                               classname=sampleType,
                               mfile=mfile,
                               metabinputdata=md)

            xfi.save()
            xfi_d[idi] = xfi
            mfile_d[idi] = mfile

        return xfi_d, mfile_d