Пример #1
0
def get_save_isotope_patterns(db_filename, db_dump_folder, db_name, adduct, isocalc_sig, isocalc_resolution, charge, verbose, instrument):
            import pickle
            from pySM.parse_databases import parse_databases
            # Check if already generated and load if possible, otherwise calculate fresh
            sum_formulae = parse_databases.read_generic_csv(db_filename,header=0,idcol=0,namecol=1,sfcol=2, sep='\t')
            load_file = '{}/{}_{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name, instrument.__class__.__name__, adduct, isocalc_sig, isocalc_resolution)
            if os.path.isfile(load_file):
                if verbose:
                    print  "{} -> loading".format(load_file)
                try:
                    mz_list_tmp = pickle.load(open(load_file, 'r'))
                except ValueError as e:
                    if verbose:
                        print str(e)
                        print "{} -> generating".format(load_file)
                    mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, instrument=instrument, isocalc_sig=isocalc_sig,
                                                         isocalc_resolution=isocalc_resolution, charge=charge, verbose=False)
                    save_pattern(load_file, db_dump_folder, mz_list_tmp)
            else:
                if verbose:
                    print "{} -> generating".format(load_file)
                mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, isocalc_sig=isocalc_sig,
                                                         isocalc_resolution=isocalc_resolution, charge=charge, verbose=False, instrument=instrument)
                save_pattern(load_file, db_dump_folder, mz_list_tmp)
            return mz_list_tmp
Пример #2
0
def generate_isotope_patterns(config,verbose=True):
    #todo: verbose -> logging.debug
    from pySM.parse_databases import parse_databases
    from pySM.spatial_metabolomics import get_save_isotope_patterns
    from pyMSpec import instrument
    # Extract variables from config dict
    db_filenames = config['file_inputs']['database_file']
    db_dump_folder = config['file_inputs']['database_load_folder']
    #isocalc_sig = float(config['isotope_generation']['isocalc_sig'])
    #isocalc_resolution = float(config['isotope_generation']['isocalc_resolution'])
    if len(config['isotope_generation']['charge']) > 1:
        print 'Warning: only first charge state currently accepted'
    charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'],
                               config['isotope_generation']['charge'][0][
                                   'n_charges']))  # currently only supports first charge!!
    adducts = set([a['adduct'] for a in config['isotope_generation']['adducts']])
    instrument = get_instrument(config["isotope_generation"]["instrument"], config["isotope_generation"]["resolving_power"], config["isotope_generation"]["at_mz"])
    isocalc_sig = np.round(instrument.sigma_at_mz(200), decimals=4)
    isocalc_resolution = instrument.points_per_mz(isocalc_sig)
    # Get master list of sum formulae
    sum_formulae = {}
    for db_filename in db_filenames:
        sum_formulae = parse_databases.read_generic_csv(db_filename,header=0,idcol=0,namecol=1,sfcol=2, sep='\t', sum_formulae=sum_formulae)
        if '' in sum_formulae:
            if verbose:
                print 'empty sf removed from list'
            del sum_formulae['']
    # Get isotope patterns for all sum_formulae
    mz_list = {}
    for db_filename in db_filenames:
        db_name = os.path.splitext(os.path.basename(db_filename))[0]
        for adduct in adducts:
            _mz_list = get_save_isotope_patterns(db_filename, db_dump_folder, db_name, adduct, isocalc_sig, isocalc_resolution, charge, verbose, instrument)
            # add patterns to total list
            for sum_formula in sum_formulae:
                if sum_formula not in _mz_list:# could be missing if [M-a] would have negative atoms
                    continue
                if sum_formula not in mz_list:
                    mz_list[sum_formula]={}
                ## this limit of 4 is hardcoded to reduce the number of calculations todo: add to config file
                n = np.min([4,len(_mz_list[sum_formula][adduct][0])])
                sort_idx = np.argsort(_mz_list[sum_formula][adduct][1])[-n:][-1::-1]
                mz_list[sum_formula][adduct] = [_mz_list[sum_formula][adduct][0][sort_idx],_mz_list[sum_formula][adduct][1][sort_idx]]
    # Clean up
    # poorly formatted formulae may not recieve an isotope pattern
    rm_list = []
    for sum_formula in sum_formulae:
        if sum_formula not in mz_list:
              rm_list.append(sum_formula)
    if verbose:
        print "{} formula to remove".format(len(rm_list))
    for sum_formula in rm_list:
        sum_formulae.pop(sum_formula,None)
    if verbose:
        print  'all isotope patterns generated and loaded'
    return sum_formulae, adducts, mz_list
    isocalc_resolution = instrument.points_per_mz(isocalc_sig)
    db_dump_folder = config['file_inputs']['database_load_folder']
    if len(config['isotope_generation']['charge']) > 1:
        print 'Warning: only first charge state currently accepted'
    charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'],
                               config['isotope_generation']['charge'][0][
                                   'n_charges']))  # currently only supports first charge!!
    db_name = os.path.splitext(os.path.basename(db_filename))[0]
    load_file = '{}/{}_{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name, instrument.__class__.__name__, adduct, isocalc_sig, isocalc_resolution)
    print "{} -> generating".format(load_file)

    mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, isocalc_sig=isocalc_sig,
                                                    isocalc_resolution=isocalc_resolution, charge=charge, verbose=False, instrument=instrument)
    spatial_metabolomics.save_pattern(load_file, db_dump_folder, mz_list_tmp)
    return True


if __name__== '__main__':
    json_filename = "/home/palmer/Documents/tmp_data/MB/URenn/16135s1-3_mousebrain_dhbsub.json"
    config = spatial_metabolomics.get_variables(json_filename)
    adducts = [a['adduct'] for a in config['isotope_generation']['adducts']]
    db_filenames = config['file_inputs']['database_file']
    if isinstance(db_filenames, basestring):
        db_filenames = [db_filenames,]
    for db_filename in db_filenames:
        sum_formulae = parse_databases.read_generic_csv(db_filename,header=1,idcol=0,namecol=1,sfcol=2, sep='\t')
        if '' in sum_formulae:
            del sum_formulae['']
        for a in adducts:
            generate_isotope_pattern(sum_formulae,a,config, db_filename)
        adduct=adduct,
        isocalc_sig=isocalc_sig,
        isocalc_resolution=isocalc_resolution,
        charge=charge,
        verbose=False,
        instrument=instrument)
    spatial_metabolomics.save_pattern(load_file, db_dump_folder, mz_list_tmp)
    return True


if __name__ == '__main__':
    json_filename = "/home/palmer/Documents/tmp_data/MB/URenn/16135s1-3_mousebrain_dhbsub.json"
    config = spatial_metabolomics.get_variables(json_filename)
    adducts = [a['adduct'] for a in config['isotope_generation']['adducts']]
    db_filenames = config['file_inputs']['database_file']
    if isinstance(db_filenames, basestring):
        db_filenames = [
            db_filenames,
        ]
    for db_filename in db_filenames:
        sum_formulae = parse_databases.read_generic_csv(db_filename,
                                                        header=1,
                                                        idcol=0,
                                                        namecol=1,
                                                        sfcol=2,
                                                        sep='\t')
        if '' in sum_formulae:
            del sum_formulae['']
        for a in adducts:
            generate_isotope_pattern(sum_formulae, a, config, db_filename)