def create_splash(input_file, output_file, separator, spectrum_type, spectrum_col, origin_col): start_time = time.time() splasher = Splash() with open(input_file, 'r') as f, \ (open(output_file, 'w') if output_file is not None else sys.stdout) as fout: for i, line in enumerate(f): # Handle input line = line.strip().split(separator) origin = line[origin_col - 1] spectrum_string = line[spectrum_col - 1] spectrum = Spectrum(spectrum_string, spectrum_type) splash_code = splasher.splash(spectrum) # Print the spectrum id with the calculated splash id print(splash_code, *line, sep = separator, file = fout) if (i + 1) % 10000 == 0: print('processed %d spectra, %.2f ms average time to splash a spectrum' % (i + 1, 1000 * (time.time() - start_time) / (i + 1)), file = sys.stderr) print('finished processing, processing took: %.2f s' % (time.time() - start_time), file = sys.stderr) print('processed %d spectra' % (i + 1), file = sys.stderr) print('average time including io to splash a spectra is %.2f ms' % (1000 * (time.time() - start_time) / (i + 1)), file = sys.stderr)