def get_gc(some_gbk): """ Get GC content for contigs in genbank file, output file for circs line plot """ with open (some_gbk, 'r') as file_handle: gc_points = [] sp_name = None for record in SeqIO.parse(file_handle, 'gb'): for b in range(len(record))[500::1000]: gc_cont = SeqUtils.GC(record.seq[b-500:b+499]) gc_points.append((record.name, b-500, b+499, gc_cont)) sp_name = kv.parse_genbank_name(some_gbk) sp_strain = sp_name[2] with open('circos/GC/gc_{}.txt'.format(os.path.basename(some_gbk)[:-13]), 'w+') as out_handle: values = [x[3] for x in gc_points] stats = (min(values), max(values), np.average(values), np.std(values)) out_handle.write("# Min: {}\n# Max: {}\n# Avg, Std: {}, {}\n".format( stats[0], stats[1], stats[2], stats[3] ) ) for point in gc_points: out_handle.write('{0}{1} {2} {3} {4}\n'.format( sp_strain, point[0], point[1], point[2], point[3] ) ) return stats
def get_karyotype(some_gbk): """ Convert Genbank file into Karyotype file for Circos - Each contig is a "chromosome" - format: 'chr - ID LABEL START END COLOR' """ with open (some_gbk, 'r') as file_handle: contigs = [] sp_name = None for record in SeqIO.parse(file_handle, 'gb'): sp_name = kv.parse_genbank_name(some_gbk) contigs.append((record.name, len(record))) sp_strain = sp_name[2] if not os.path.isdir('circos/karyotypes/'): os.makedirs('circos/karyotypes/') with open('circos/karyotypes/karyotype_{}.txt'.format(os.path.basename(some_gbk)[:-13]), 'w+') as karyotype: color = [np.random.randint(0,255), np.random.randint(0,255), np.random.randint(0,255)] for contig in contigs: if contig[1] > 1000: karyotype.write('chr - {0}{1} {2} {3} {4} {5},{6},{7}\n'.format( sp_strain, contig[0], contig[0], '1', contig[1], *color ) ) else: break