def nist_ms_comparison(sample_name, mass_list, mass_spec): data_dict = {} try: pynist.generate_ini(nist_path, "mainlib", 5) def remove_chars(input_string): return input_string.replace("Hit 1 : ","").replace("Hit 2 : ","")\ .replace("Hit 3","").replace("Hit 4","")\ .replace("Hit 5","").replace("MF:","")\ .replace(":","").replace("<","").replace(">","")\ .replace(" ","").replace(sample_name,"") raw_output = pynist.nist_db_connector(nist_path,"MSP/{}.MSP".format(sample_name)) # Process output for i in range(1,6): raw_output = raw_output.replace("Hit {} : ".format(i),"Hit{};".format(i)) raw_output = raw_output.replace("<<",'"').replace(">>",'"').split("\n") #for row in raw_output: # print(row) matches_dict = {} for i in range(1,6): row = raw_output[i].split(";") matches_dict[row[0]] = {"Name":row[1], "MF":(row[3].replace("MF:",'').replace(" ",'')), "RMF":(row[4].replace("RMF:",'').replace(" ",'')), "CAS":(row[6].replace("CAS:",'').replace(" ",'')),"Lib":(row[8].replace("Lib:",'').replace(" ",''))} #for match in matches_dict: # print(match) # print(matches_dict[match]) except: traceback.print_exc() #print the error pynist.reload_ini(nist_path) sys.exit(1) print("\n") pynist.reload_ini(nist_path) return matches_dict
def nist_ms_comparison(self, sample_name, n_hits=5): """ :param sample_name: :type sample_name: :param n_hits: :type n_hits: :return: :rtype: """ # data_dict = {} try: pynist.generate_ini(internal_config.nist_path, "mainlib", n_hits) # def remove_chars(input_string): # for i in range(n_hits + 1): # input_string = input_string.replace("Hit {} : ", "") # # return input_string.replace("MF:", "") \ # .replace(":", "").replace("<", "").replace(">", "") \ # .replace(" ", "").replace(self.name, "") raw_output = pynist.nist_db_connector( internal_config.nist_path, os.path.join(internal_config.msp_dir, "{}.MSP".format(sample_name))) # Process output for i in range(n_hits + 1): raw_output = raw_output.replace("Hit {} : ".format(i), "Hit{};".format(i)) \ .replace("Hit {} : ".format(i), "Hit{};".format(i)) \ .replace("Hit {}: ".format(i), "Hit{};".format(i)) raw_output = raw_output.replace("<<", '"').replace(">>", '"').split("\n") matches_dict = {} for i in range(1, n_hits + 1): row = list( csv.reader([raw_output[i]], delimiter=";", quotechar='"'))[0] matches_dict[row[0]] = { "Name": row[1], "MF": (row[3].replace("MF:", '').replace(" ", '')), "RMF": (row[4].replace("RMF:", '').replace(" ", '')), "CAS": (row[6].replace("CAS:", '').replace(" ", '')), # "Lib": (row[8].replace("Lib:", '').replace(" ", '')) } except: traceback.print_exc() # print the error pynist.reload_ini(internal_config.nist_path) sys.exit(1) print("\r\033[KSearch Complete") # , end='') pynist.reload_ini(internal_config.nist_path) return matches_dict
def run(self): # Indicate which steps to perform print(f"do_qualitative: {self.config.do_qualitative}") print(f"do_merge: {self.config.do_merge}") print(f"do_counter: {self.config.do_counter}") print(f"do_spectra: {self.config.do_spectra}") print(f"do_charts: {self.config.do_charts}") # Loads the experiment file created during Quantitative Processing for prefix in self.config.prefixList: file_name = os.path.join(self.config.expr_dir, prefix + ".expr") self.expr_list.append(load_expr(file_name)) if self.config.do_qualitative: print("Qualitative Processing in Progress...") for prefix in self.config.prefixList: # print(list(rt_alignment[prefix])) self.qualitative_processing(prefix, list(rt_alignment[prefix])) if self.config.do_merge: self.merge() if self.config.do_counter: chart_data = self.match_counter(self.ms_comparisons(ms_alignment)) chart_data = chart_data.set_index("Compound", drop=True) # remove duplicate compounds: # chart_data_count = Counter(chart_data["Compound"]) chart_data_count = Counter(chart_data.index) replacement_data = { "Compound": [], f"{self.lot_name} Peak Area": [], f"{self.lot_name} Standard Deviation": [] } for prefix in self.config.prefixList: replacement_data[prefix] = [] for compound in chart_data_count: if chart_data_count[compound] > 1: replacement_data["Compound"].append(compound) replacement_data[f"{self.lot_name} Peak Area"].append( sum(chart_data.loc[compound, f"{self.lot_name} Peak Area"])) peak_data = [] for prefix in self.config.prefixList: replacement_data[prefix].append( sum(chart_data.loc[compound, prefix])) peak_data.append(sum(chart_data.loc[compound, prefix])) replacement_data[ f"{self.lot_name} Standard Deviation"].append( numpy.std(peak_data)) chart_data = chart_data.drop(compound, axis=0) replacement_data = pandas.DataFrame(replacement_data) replacement_data = replacement_data.set_index("Compound", drop=False) chart_data = chart_data.append(replacement_data, sort=False) chart_data.sort_index(inplace=True) chart_data = chart_data.drop("Compound", axis=1) chart_data['Compound Names'] = chart_data.index chart_data.to_csv(os.path.join( self.config.csv_dir, "{}_CHART_DATA.csv".format(self.lot_name)), sep=";") else: chart_data = pandas.read_csv(os.path.join( self.config.csv_dir, "{}_CHART_DATA.csv".format(self.lot_name)), sep=";", index_col=0) # chart_data = chart_data.set_index("Compound", drop=True) if self.config.do_spectra: self.generate_spectra_from_alignment(rt_alignment, ms_alignment) # Write Mass Spectra to OpenChrom-like CSV files def generate_spectra_csv(rt_data, ms_data, name): # Write Mass Spectra to OpenChrom-like CSV files ms = ms_data[0] # first mass spectrum spectrum_csv_file = os.path.join(self.config.spectra_dir, self.lot_name, f"{name}_data.csv") spectrum_csv = open(spectrum_csv_file, 'w') spectrum_csv.write( 'RT(milliseconds);RT(minutes) - NOT USED BY IMPORT;RI;') spectrum_csv.write(';'.join(str(mz) for mz in ms.mass_list)) spectrum_csv.write("\n") for rt, ms in zip(rt_data, ms_data): spectrum_csv.write( f"{int(rt * 60000)};{rounders(rt, '0.0000000000')};0;") spectrum_csv.write(';'.join( str(intensity) for intensity in ms.mass_spec)) spectrum_csv.write('\n') spectrum_csv.close() for prefix in self.config.prefixList: print(prefix) # print(rt_alignment[prefix]) # print(ms_alignment[prefix]) generate_spectra_csv(rt_alignment[prefix], ms_alignment[prefix], prefix) if self.config.do_charts: print("\nGenerating Charts") chart_data.to_csv(os.path.join( self.config.csv_dir, "{}_CHART_DATA.csv".format(self.lot_name)), sep=";") maybe_make(os.path.join(self.config.charts_dir, self.lot_name)) if chart_data.empty: print("ALERT: No peaks were found for compounds that have") print(" previously been reported in literature.") print(" Check the results for more information\n") else: from GSMatch.GSMatch_Core.charts import box_whisker_wrapper, radar_chart_wrapper, \ mean_peak_area_wrapper, \ peak_area_wrapper # from GSMatch.GSMatch_Core.charts import peak_area_wrapper, radar_chart_wrapper radar_chart_wrapper(chart_data, [self.lot_name], use_log=10, legend=False, mode=os.path.join(self.config.charts_dir, self.lot_name, "radar_log10_peak_area")) radar_chart_wrapper(chart_data, [self.lot_name], use_log=False, legend=False, mode=os.path.join(self.config.charts_dir, self.lot_name, "radar_peak_area")) mean_peak_area_wrapper(chart_data, [self.lot_name], mode=os.path.join( self.config.charts_dir, self.lot_name, "mean_peak_area")) peak_area_wrapper(chart_data, self.lot_name, self.config.prefixList, mode=os.path.join(self.config.charts_dir, self.lot_name, "peak_area_percentage")) peak_area_wrapper(chart_data, self.lot_name, self.config.prefixList, percentage=False, mode=os.path.join(self.config.charts_dir, self.lot_name, "peak_area")) peak_area_wrapper(chart_data, self.lot_name, self.config.prefixList, use_log=10, mode=os.path.join( self.config.charts_dir, self.lot_name, "log10_peak_area_percentage")) samples_to_compare = [(self.lot_name, self.config.prefixList)] box_whisker_wrapper(chart_data, samples_to_compare, mode=os.path.join(self.config.charts_dir, self.lot_name, "box_whisker")) with open( os.path.join(self.config.results_dir, f"{self.lot_name}.info"), "w") as info_file: for prefix in self.config.prefixList: info_file.write(f"{prefix}\n") # TODO: self.make_archive() pynist.reload_ini(self.config.nist_path) print("\nComplete.")