def write_event_durations_to_file(directories, destination): # make the files to hold durations template_file = open(destination + "template_durations.csv", 'w') complement_file = open(destination + "complement_durations.csv", 'w') all_together_file = open(destination + "all_durations.csv", 'w') # go through each directory and gather up durations for directory in directories: directory_files = directory + "*.fast5" nb_files = len(glob(directory_files)) sample_set = int(floor(0.25 * nb_files)) print("Found {nb_files} in folder {folder} sampling {nb_sample_set}".format(nb_files=nb_files, folder=directory, nb_sample_set=sample_set), file=sys.stderr) # make this random.sample, sample 10% of the files in the directory for f in sample(glob(directory_files), sample_set): npRead = NanoporeRead(f) if hasattr(npRead, 'template_event_table') and hasattr(npRead, 'complement_event_table'): file_name = f.split('/')[-1] print(file_name, file=sys.stderr) for t, c in map(None, npRead.template_event_table, npRead.complement_event_table): if t is not None: print(t[3], t[6], 't', sep=',', end='\n', file=template_file) print(t[3], t[6], 't',sep=',', end='\n', file=all_together_file) if c is not None: print(c[3], c[6], 'c', sep=',', end='\n', file=complement_file) print(c[3], c[6], 'c', sep=',', end='\n', file=all_together_file) npRead.close()
def main(): if len(sys.argv) < 3: print("USAGE: python thisScript.py /path/to/file.fast5 /path/to/destination") if len(sys.argv) == 3: # setup out_file = open(sys.argv[2], 'w') # load and transform npRead = NanoporeRead(sys.argv[1]) npRead.get_2d_event_map() npRead.transform_events(npRead.template_events, npRead.template_drift) npRead.transform_events(npRead.complement_events, npRead.complement_drift) # output # line 1 print(len(npRead.twoD_read_sequence), end=' ', file=out_file) # 2D read length print(len(npRead.template_events), end=' ', file=out_file) # nb of template events print(len(npRead.complement_events), end=' ', file=out_file) # nb of complement events print(npRead.template_scale, end=' ', file=out_file) # template scale print(npRead.template_shift, end=' ', file=out_file) # template shift print(npRead.template_var, end=' ', file=out_file) # template var print(npRead.template_scale_sd, end=' ', file=out_file) # template scale_sd print(npRead.template_var_sd, end=' ', file=out_file) # template var_sd print(npRead.complement_scale, end=' ', file=out_file) # complement scale print(npRead.complement_shift, end=' ', file=out_file) # complement shift print(npRead.complement_var, end=' ', file=out_file) # complement var print(npRead.complement_scale_sd, end=' ', file=out_file) # complement scale_sd print(npRead.complement_var_sd, end='\n', file=out_file) # complement var_sd # line 2 print(npRead.twoD_read_sequence, end='\n', file=out_file) # line 3 for _ in npRead.template_event_map: print(_, end=' ', file=out_file) print("", end="\n", file=out_file) # line 4 for mean, start, stdev, length in npRead.template_events: print(mean, stdev, length, sep=' ', end=' ', file=out_file) print("", end="\n", file=out_file) # line 5 for _ in npRead.complement_event_map: print(_, end=' ', file=out_file) print("", end="\n", file=out_file) # line 6 for mean, start, stdev, length in npRead.complement_events: print(mean, stdev, length, sep=' ', end=' ', file=out_file) print("", end="\n", file=out_file)