示例#1
0
def create_splash(input_file, output_file, separator, spectrum_type, spectrum_col, origin_col):
    start_time = time.time()
    splasher = Splash()

    with open(input_file, 'r') as f, \
        (open(output_file, 'w') if output_file is not None else sys.stdout) as fout:

        for i, line in enumerate(f):
            # Handle input
            line = line.strip().split(separator)

            origin = line[origin_col - 1]
            spectrum_string = line[spectrum_col - 1]

            spectrum = Spectrum(spectrum_string, spectrum_type)
            splash_code = splasher.splash(spectrum)

            # Print the spectrum id with the calculated splash id
            print(splash_code, *line, sep = separator, file = fout)

            if (i + 1) % 10000 == 0:
                print('processed %d spectra, %.2f ms average time to splash a spectrum' % (i + 1, 1000 * (time.time() - start_time) / (i + 1)), file = sys.stderr)

    print('finished processing, processing took: %.2f s' % (time.time() - start_time), file = sys.stderr)
    print('processed %d spectra' % (i + 1), file = sys.stderr)
    print('average time including io to splash a spectra is %.2f ms' % (1000 * (time.time() - start_time) / (i + 1)), file = sys.stderr)