class FisherRunner(object): def __init__(self): self.data_class = JointData self.classes = ('Reference', 'Germline', 'Somatic', 'LOH', 'Unknown') def run(self, args): self.reader = JointCountsReader(args.jcnt_file_name) self.writer = csv.writer(open(args.tsv_file_name, 'w'), delimiter='\t') chr_list = self.reader.get_chr_list() for chr_name in sorted(chr_list): self._classify_chromosome(chr_name) self.reader.close() def _classify_chromosome(self, chr_name): counts = self.reader.get_counts(chr_name) jcnt_rows = self.reader.get_rows(chr_name) end = self.reader.get_chr_size(chr_name) n = int(1e5) start = 0 stop = min(n, end) while start < end: sub_counts = counts[start:stop] sub_rows = jcnt_rows[start:stop] data = self.data_class(sub_counts) labels = self.model.classify(data) self._write_rows(chr_name, sub_rows, labels) start = stop stop = min(stop + n, end) def _write_rows(self, chr_name, rows, labels): for i, row in enumerate(rows): out_row = [chr_name] out_row.extend(row) label = int(labels[i]) class_name = self.classes[label] out_row.append(class_name) if class_name == 'Somatic': print out_row self.writer.writerow(out_row)
def jcnt_to_cncnt( args ): reader = JointCountsReader( args.jcnt_file_name ) chr_list = reader.get_chr_list() cncnt_file = ConanCountsFile( args.cncnt_file_name, 'w' ) segment_reader = csv.reader( open( args.segment_file_name ), delimiter='\t' ) for row in segment_reader: print row chr_name = row[0] if chr_name == '23': chr_name = 'X' if chr_name == '24': chr_name = 'Y' start = int( row[1] ) stop = int( row[2] ) cn_status = row[3] if cn_status == '7': cn_status = '1' elif cn_status == '8': cn_status = '2' elif cn_status == '9': cn_status = '4' elif cn_status == '10': cn_status = '5' elif cn_status == '11': cn_status = '6' if chr_name not in chr_list: continue rows = reader.get_rows( chr_name ) segment_indices = np.logical_and( rows['position'] >= start, rows['position'] <= stop ) segment_rows = rows[segment_indices] if len( segment_rows ) == 0: continue cncnt_file.add_rows( cn_status, chr_name, segment_rows ) reader.close() cncnt_file.close()