def mass_spec_factory(self, rt, datadict): #tic = sum(datadict.get('abundance')) scan_index = datadict['scan_number'][0] mz_list, abundance_list = zip(*sorted(zip(datadict['mz'], datadict['abundance']))) data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list} d_params = default_parameters(self._ms[scan_index]._filename) d_params["rt"] = rt d_params["scan_number"] = scan_index d_params['label'] = Labels.gcms_centroid d_params["polarity"] = self._ms[scan_index].polarity d_params['analyzer'] = self._ms[scan_index].analyzer d_params['instrument_label'] = self._ms[scan_index].instrument_label d_params["filename_path"] = self._ms[scan_index].instrument_label ms = MassSpecCentroidLowRes(data_dict, d_params ) return ms
def get_output_parameters(self, polarity, scan_index=0): d_params = default_parameters(self.file_location) d_params["filename_path"] = self.file_location d_params["scan_number"] = int(self.scans[scan_index]) d_params['polarity'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'polarity') d_params['rt'] = self.get_raw_data_attr_data(scan_index, 'MassSpecAttrs', 'rt') d_params['tic'] = self.get_raw_data_attr_data(scan_index, 'MassSpecAttrs', 'tic') d_params['mobility_scan'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_scan') d_params['mobility_rt'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_rt') d_params['Aterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Aterm') d_params['Bterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Bterm') d_params['Cterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Cterm') d_params['baselise_noise'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baselise_noise') d_params['baselise_noise_std'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baselise_noise_std') d_params['analyzer'] = self.get_high_level_attr_data('analyzer') d_params['instrument_label'] = self.get_high_level_attr_data( 'instrument_label') d_params['sample_name'] = self.get_high_level_attr_data('sample_name') return d_params
def get_output_parameters(self): d_params = default_parameters(self.file_location) d_params["polarity"] = self.polarity d_params["filename_path"] = self.file_location d_params["mobility_scan"] = 0 d_params["mobility_rt"] = 0 d_params["scan_number"] = 0 d_params["rt"] = self.get_attr_data(0, 'r_h_start_time') d_params['label'] = Labels.booster_profile d_params["Aterm"] = self.get_attr_data(0, 'r_cparams')[0] d_params["Bterm"] = self.get_attr_data(0, 'r_cparams')[1] return d_params
def get_output_parameters(polarity, file_location): d_params = default_parameters(file_location) d_params['analyzer'] = 'Generic Simulated' d_params['instrument_label'] = 'Generic Simulated' d_params["polarity"] = polarity d_params["filename_path"] = file_location d_params["mobility_scan"] = 0 d_params["mobility_rt"] = 0 d_params["scan_number"] = 0 d_params["rt"] = 0 d_params[Labels.label] = Labels.simulated_profile return d_params
def set_metadata(self, firstScanNumber=0, lastScanNumber=0, scans_list=False, label=Labels.thermo_profile): ''' Collect metadata to be ingested in the mass spectrum object scans_list: list[int] or false lastScanNumber: int firstScanNumber: int ''' d_params = default_parameters(self.file_path) # assumes scans is full scan or reduced profile scan d_params['label'] = label if scans_list: d_params['scan_number'] = scans_list d_params['polarity'] = self.get_polarity_mode(scans_list[0]) else: d_params['scan_number'] = '{}-{}'.format(firstScanNumber, lastScanNumber) d_params['polarity'] = self.get_polarity_mode(firstScanNumber) d_params['analyzer'] = self.iRawDataPlus.GetInstrumentData().Model d_params['instrument_label'] = self.iRawDataPlus.GetInstrumentData( ).Name return d_params
def get_output_parameters(self, polarity, scan_index=0): # TODO pull attrs from json settings file in load_settings function MassSpecAttrs group and analyzer, instrument_label and sample_name from copy import deepcopy output_parameters = default_parameters(self.file_location) if self.isCentroid: output_parameters['label'] = Labels.corems_centroid else: output_parameters['label'] = Labels.bruker_profile output_parameters['analyzer'] = self.analyzer output_parameters['instrument_label'] = self.instrument_label output_parameters['sample_name'] = self.sample_name output_parameters["Aterm"] = None output_parameters["Bterm"] = None output_parameters["Cterm"] = None output_parameters["polarity"] = polarity '''scan_number and rt will be need to lc ms''' output_parameters["mobility_scan"] = 0 output_parameters["mobility_rt"] = 0 output_parameters["scan_number"] = scan_index output_parameters["rt"] = 0 return output_parameters
def run(self): '''populate the gcms obj''' d_parameters = default_parameters(self.file_location) self.import_mass_spectra(d_parameters)
def get_mass_spectra(self, auto_process=True): d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters, auto_process=auto_process) return self.lcms
def run(self): '''thread will automatically process mass spectrum use the get_mass_spectra class to import without processing mass spectrum''' d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters)
def get_summed_mass_spectrum(self, initial_scan_number, final_scan_number=None, auto_process=True, pd_method=True, pd_merge_n=100): d_params = default_parameters(self.file_location) # assumes scans is full scan or reduced profile scan d_params["label"] = Labels.thermo_profile if type(initial_scan_number) is list: d_params["polarity"] = self.get_polarity_mode( initial_scan_number[0]) scanrange = initial_scan_number else: d_params["polarity"] = self.get_polarity_mode(initial_scan_number) if final_scan_number == None: final_scan_number = self._final_scan_number scanrange = range(initial_scan_number, final_scan_number + 1) if pd_method: def sort_sum_df(df): """ Nested function to sort dataframe and sum rows with exact matching indexes (m/z) """ df = df.sort_index() df = df.groupby(level=0).sum() return df # initialise empty Pandas series big_df = pd.Series(index=[], dtype='float64') for scan_number in tqdm(scanrange): scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber( scan_number, scanStatistics) tmp_df = pd.Series(index=list(segmentedScan.Positions), dtype='float64', data=list(segmentedScan.Intensities)) big_df = big_df.append(tmp_df) #this allows you to merge/sum the values earlier, however it slows down a lot #limited benefit unless running into memory issues #for complex data it is necessary to stop the iterations getting too slow if scan_number % pd_merge_n == 0: big_df = sort_sum_df(big_df) big_df = sort_sum_df(big_df) data_dict = { Labels.mz: list(big_df.index.values), Labels.abundance: list(big_df.values), } else: all_mz = dict() for scan_number in tqdm(scanrange): scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber( scan_number, scanStatistics) len_data = segmentedScan.Positions.Length for i in range(len_data): mz = segmentedScan.Positions[i] abundance = segmentedScan.Intensities[i] if mz in all_mz: all_mz[mz] = all_mz[mz] + abundance else: all_mz[mz] = abundance mz_all = [] abun_all = [] for mz in sorted(all_mz): mz_all.append(mz) abun_all.append(all_mz[mz]) data_dict = { Labels.mz: mz_all, Labels.abundance: abun_all, } print('Summed. Now Processing.') mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) return mass_spec
def run(self): '''creates the lcms obj''' d_parameters = default_parameters(self.file_location) self._import_mass_spectra(d_parameters)
def deconvolution(self, peaks_entity_data, maximum_tic): i = 0 tic_list = [] rt_list = [] for apex_rt, datadict in sorted(peaks_entity_data.items()): if apex_rt in datadict.keys(): apex_data = datadict[apex_rt] ref_apex_rt = datadict["ref_apex_rt"] tic = sum(apex_data.get('abundance')) norm_smooth_tic = (tic / maximum_tic) * 100 if norm_smooth_tic > self.chromatogram_settings.peak_height_min_percent and len( apex_data['mz']) > 3: scan_index = apex_data['scan_number'][0] mz_list, abundance_list = zip( *sorted(zip(apex_data['mz'], apex_data['abundance']))) data_dict = { Labels.mz: mz_list, Labels.abundance: abundance_list } d_params = default_parameters( self._ms[scan_index]._filename) d_params["rt"] = apex_rt d_params["scan_number"] = scan_index d_params['label'] = Labels.gcms_centroid d_params["polarity"] = self._ms[scan_index].polarity d_params['analyzer'] = self._ms[scan_index].analyzer d_params['instrument_label'] = self._ms[ scan_index].instrument_label d_params["filename_path"] = self._ms[ scan_index].instrument_label ms = MassSpecCentroidLowRes(data_dict, d_params) #needs to define peak start and end, passing just minus and plus one from apex pos for now gc_peak = GCPeak(ms, (i - 1, i, i + 1)) i += 1 self.gcpeaks.append(gc_peak) tic_list.append(tic) rt_list.append(ref_apex_rt) peak_rt = [] peak_tic = [] for rt, each_datadict in datadict.items(): if rt != "ref_apex_rt": peak_rt.append(rt) peak_tic.append(sum(each_datadict["abundance"])) peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) #ax = plt.gca() #markerline_a, stemlines_a, baseline_a = ax.stem(data[0], data[1], linefmt='-', markerfmt=" ", use_line_collection =True, label=rt) #plt.setp(markerline_a, 'color', c, 'linewidth', 2) #plt.setp(stemlines_a, 'color', c, 'linewidth', 2) #plt.setp(baseline_a, 'color', c, 'linewidth', 2) #ax.set_xlabel("$\t{m/z}$", fontsize=12) #ax.set_ylabel('Abundance', fontsize=12) #ax.tick_params(axis='both', which='major', labelsize=12) #ax.axes.spines['top'].set_visible(False) #ax.axes.spines['right'].set_visible(False) #ax.get_yaxis().set_visible(False) #ax.spines['left'].set_visible(False) plt.plot(peak_rt, peak_tic) #plt.legend() #plt.show() #plt.close() #self.rt_clustering(rt_list, tic_list) plt.plot(self.retention_time, self._processed_tic, c='black') plt.plot(rt_list, tic_list, c='black', marker='^', linewidth=0) plt.show()
def get_summed_mass_spectrum(self, auto_process=True, pd_method=True, pd_merge_n=100) -> MassSpecProfile: ''' Manually sum mass spectrum over a scan range start_scan: int end_scan: int auto_process: bool If true performs peak picking, and noise threshold calculation after creation of mass spectrum object pd_method: bool If true uses pandas to align and sum data Else: Assumes data is aligned and sum each data point across all mass spectra Returns: MassSpecProfile ''' d_params = default_parameters(self.file_path) # assumes scans is full scan or reduced profile scan d_params['label'] = Labels.thermo_profile if type(self.start_scan) is list: d_params['polarity'] = self.get_polarity_mode(self.start_scan[0]) scanrange = self.start_scan else: d_params['polarity'] = self.get_polarity_mode(self.start_scan) scanrange = range(self.start_scan, self.end_scan + 1) if pd_method: def sort_sum_df(df): ''' Nested function to sort dataframe and sum rows with exact matching indexes (m/z) ''' df = df.sort_index() df = df.groupby(level=0).sum() return df # initialise empty Pandas series big_df = pd.Series(index=[], dtype='float64') for scan_number in tqdm(scanrange): scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber( scan_number, scanStatistics) tmp_df = pd.Series(index=list(segmentedScan.Positions), dtype='float64', data=list(segmentedScan.Intensities)) big_df = big_df.append(tmp_df) # this allows you to merge/sum the values earlier, however it slows down a lot # limited benefit unless running into memory issues # for complex data it is necessary to stop the iterations getting too slow if scan_number % pd_merge_n == 0: big_df = sort_sum_df(big_df) big_df = sort_sum_df(big_df) data_dict = { Labels.mz: list(big_df.index.values), Labels.abundance: list(big_df.values), } else: all_mz = dict() for scan_number in tqdm(scanrange): scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( scan_number) segmentedScan = self.iRawDataPlus.GetSegmentedScanFromScanNumber( scan_number, scanStatistics) len_data = segmentedScan.Positions.Length for i in range(len_data): mz = segmentedScan.Positions[i] abundance = segmentedScan.Intensities[i] if mz in all_mz: all_mz[mz] = all_mz[mz] + abundance else: all_mz[mz] = abundance mz_all = [] abun_all = [] for mz in sorted(all_mz): mz_all.append(mz) abun_all.append(all_mz[mz]) data_dict = { Labels.mz: mz_all, Labels.abundance: abun_all, } print('Summed. Now Processing.') mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) return mass_spec
def get_transient(self, scan_number=1): file_d_params = self.parse_parameters(self.parameter_filename_location) self.fix_freq_limits(file_d_params) from sys import platform if platform == "win32": # Windows... dt = dtype("l") else: dt = dtype("i") # get rt, scan, and tic from scan.xml file, otherwise using 0 defaults values output_parameters = deepcopy( default_parameters(self.d_directory_location)) if self.transient_data_path.name == 'ser': if self.scan_attr.exists: dict_scan_rt_tic = self.get_scan_attr() output_parameters["scan_number"] = scan_number output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0] output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1] output_parameters["analyzer"] = "ICR" output_parameters["label"] = "Bruker_Frequency" output_parameters["Aterm"] = float(file_d_params.get("ML1")) output_parameters["Bterm"] = float(file_d_params.get("ML2")) output_parameters["Cterm"] = float(file_d_params.get("ML3")) output_parameters["exc_high_freq"] = float( file_d_params.get("EXC_Freq_High")) output_parameters["exc_low_freq"] = float( file_d_params.get("EXC_Freq_Low")) output_parameters["bandwidth"] = float(file_d_params.get("SW_h")) output_parameters["number_data_points"] = int(file_d_params.get("TD")) output_parameters["polarity"] = str(file_d_params.get("Polarity")) data_points = int(file_d_params.get("TD")) scan = output_parameters["scan_number"] if self.transient_data_path.name == 'ser': with open(self.transient_data_path, 'rb') as databin: #seek start scan data databin.seek((scan - 1) * 4 * data_points) #read scan data and parse to 32int struct data = frombuffer(databin.read(4 * data_points), dtype=dt) else: data = fromfile(self.transient_data_path.open(), dtype=dt) return Transient(data, output_parameters)