Пример #1
0
 def load_data_inference(self):
     """Load data in using inference functions"""
     f_signal = read_signal(self.file_path, normalize=True)
     f_signal = f_signal[self.start_index:]
     sig_len = len(f_signal)
     for indx in range(0, sig_len, self.step):
         segment_sig = f_signal[indx:indx + self.seq_len]
         segment_len = len(segment_sig)
         padded_segment_sig = self.padding(segment_sig, self.seq_len)
         yield self.inference_labels(input=np.asarray(padded_segment_sig),
                                     seq_len=np.asarray(segment_len))
Пример #2
0
def trim_signal(signal_file, label_file, outdir):
    """Trim signal file to only have signal aligned from label file"""
    outpath = os.path.join(
        outdir,
        signal_file.split('/')[-1].split('.')[0] + ".trim.signal")
    signal = read_signal(signal_file, normalize=False)
    label = read_label(label_file)
    start = label.start[0]
    end = label.start[-1]
    final_signal = signal[start:end]
    with open(outpath, 'w+') as f_signal:
        f_signal.write(" ".join(str(val) for val in final_signal))
    return outpath
Пример #3
0
 def trim_complement_signal(self, outdir):
     """Trim signal file to only have signal aligned from label file"""
     assert os.path.isdir(outdir), "{} does not exist".format(outdir)
     assert os.path.isfile(self.label_file), "{} does not exist".format(
         self.label_file)
     out_path = os.path.join(
         outdir,
         self.signal_file.split('/')[-1].split('.')[0] + ".trim.signal")
     signal = read_signal(self.signal_file, normalize=False)
     label = read_label(self.label_file)
     start = label.start[0]
     end = label.start[-1]
     final_signal = signal[start:end]
     out_path = self.write_signal(final_signal, out_path)
     return out_path
Пример #4
0
 def load_data(self):
     """Read in data from signal files and create specific motif comparisons"""
     event = list()
     event_length = list()
     label = list()
     label_length = list()
     count = 0
     file_count = 0
     for name in self.file_list:
         if name.endswith(".signal"):
             try:
                 file_pre = os.path.splitext(name)[0]
                 f_signal = read_signal(name)
                 label_name = file_pre + '.label'
                 trim_signal = SignalLabel(name, label_name)
                 motif_generator = trim_signal.trim_to_motif(["CCAGG", "CCTGG", "CEAGG", "CETGG"],
                                                             prefix_length=0,
                                                             suffix_length=0,
                                                             methyl_index=1,
                                                             blank=self.blank)
                 for motif in motif_generator:
                     tmp_event, tmp_event_length, tmp_label, tmp_label_length = read_raw(f_signal, motif,
                                                                                         self.seq_len,
                                                                                         short=True)
                     event += tmp_event
                     event_length += tmp_event_length
                     label += tmp_label
                     label_length += tmp_label_length
                     count = len(event)
                 if file_count % 10 == 0:
                     sys.stdout.write("%d lines read.   \n" % (count))
                 file_count += 1
             except ValueError:
                 print("Error Reading Data from file {}".format(name))
                 continue
     padded_labels = []
     pad_len = max(label_length)
     for i in range(len(label)):
         padded_labels.append(np.lib.pad(label[i], (0, pad_len - label_length[i]), 'constant', constant_values=-1))
     return self.training_labels(input=np.asarray(event), seq_len=np.asarray(event_length),
                                 label=padded_labels)
Пример #5
0
    def load_data(self):
        """Read in data from signal files and create specific motif comparisons"""
        event = list()
        event_length = list()
        label = list()
        label_length = list()
        count = 0
        file_count = 0
        for name in self.file_list:
            if name.endswith(".signal"):
                try:
                    file_pre = os.path.splitext(name)[0]
                    f_signal = read_signal(name)
                    label_name = file_pre + '.label'
                    f_label = read_label(label_name, skip_start=10, window_n=(self.kmer - 1) / 2,
                                         alphabet=self.len_y)
                    tmp_event, tmp_event_length, tmp_label, tmp_label_length = read_raw(f_signal, f_label,
                                                                                        self.seq_len,
                                                                                        max_event_length=
                                                                                        self.max_event_len)
                    event += tmp_event
                    event_length += tmp_event_length
                    label += tmp_label
                    label_length += tmp_label_length
                    count = len(event)
                    if file_count % 10 == 0:
                        sys.stdout.write("%d files read.   \n" % (count))
                    file_count += 1
                except ValueError:
                    print("Error Reading Data from file {}".format(name))
                    continue
        padded_labels = []
        pad_len = max(label_length)
        for i in range(len(label)):
            padded_labels.append(np.lib.pad(label[i], (0, pad_len - label_length[i]), 'constant', constant_values=-1))

        return self.training_labels(input=np.asarray(event), seq_len=np.asarray(event_length),
                                    label=padded_labels)
Пример #6
0
 def read_signal(self, normalize):
     return read_signal(self.signal_file, normalize=normalize)