def listTable(self, inPath): """List entries in a TAB file.""" file = BinaryFile(inPath, 'rb') tbl = TabFile(file) print("Item C Flg Offset (C=compressed?)") for i, entry in enumerate(tbl.getEntries()): printf("%5d: %s %02X %06X\n", i, 'Y' if entry['compressed'] else '-', entry['flags'], entry['offset'])
def __init__(self, dimensions): # input dimensions as (dimX, dimY) # self.dimensions = dimensions self.file = TabFile('input/tile_info.txt') self.tile_map = {} self.tile_img_ext = '.png' self.tile_img_maindir = 'input/images/' self.tile_img_dimname = '%d_%d' % (self.dimensions[0], self.dimensions[1]) self.tile_img_targetdir = self.tile_img_maindir+self.tile_img_dimname+'/' self.setup()
def __init__(self, filename): TabFile.__init__(self, filename) self.gpa_4 = get_dict(omsas) self.marks_by_year = {} self.all_marks = [] self.get()
class TileMaster: def __init__(self, dimensions): # input dimensions as (dimX, dimY) # self.dimensions = dimensions self.file = TabFile('input/tile_info.txt') self.tile_map = {} self.tile_img_ext = '.png' self.tile_img_maindir = 'input/images/' self.tile_img_dimname = '%d_%d' % (self.dimensions[0], self.dimensions[1]) self.tile_img_targetdir = self.tile_img_maindir+self.tile_img_dimname+'/' self.setup() def setup(self): symbol_num = -1 for line in self.file.parse(): if line.isspace() or line == '': continue else: symbol_num += 1 n = -1 tile_info = {} for raw_info in self.file.parse_tabline(line, skip_emptyinfo=True): info = '' for char in raw_info: if char.isalpha() or char.isdigit() or char == '-': info += char else: continue if info == '': raise ValueError('Line: "%s" provided empty info: "%s".' % (line, raw_info)) n += 1 header = self.file.header[n] try: tile_info[header] = int(info) except ValueError: tile_info[header] = info self.tile_map[SYMBOLS[symbol_num]] = tile_info def det_sealevel(self): ocean = self.tile_map['0'] if ocean['name'] != 'ocean': raise ValueError() shore = self.tile_map['1'] if shore['name'] != 'shore': raise ValueError() return shore['maxE'] def setup_images(self): for key, tile in self.tile_map.items(): output_id = '%s.%s%s' % (tile['name'], self.tile_img_dimname, self.tile_img_ext) filename = os.path.join(self.tile_img_maindir, tile['name']+self.tile_img_ext) resized_path = check_if_resized(self.tile_img_targetdir, output_id, self.dimensions, filename) img = Image.open(resized_path) tile['image'] = ImageTk.PhotoImage(image=img) def choose_tile(self, elevation, temperature): possibilities = {} for key, tile in self.tile_map.items(): if tile['minE'] <= elevation <= tile['maxE']: if tile['minT'] <= temperature <= tile['maxT']: possibilities[key] = tile['value'] else: continue else: continue wc = WeightedChoice(possibilities) return wc.get()
def find_sites( peaks_file, fasta_file, motif, bed=True, xls=False, output_dir=None, motif_type="MEME", src_fnc="find_sites", bysummit=False, **kwargs ): """ findSites(peaks_file,FASTAfile,motif) takes the NAME_peaks.xls file outputed by MACS, as well as a FASTAfile, and finds instances of the motif specified by motif (a Bio.Motif object). It will output two new files for peaks and sites called NAME.peaks.info and NAME.sites.info. It will also create files called NAMES.peaks.bed and NAME.sites.bed which are proper BED files (scores are tag density, and information content, respectively). All files are 0-based, half-open in line with the BED convention. MACS coordinates are corrected accordingly. f.peaks.info contains Peak (1) chr, (2) start (3) end (4) Peak ID (5) Relative summit (6) Number of unique tags in peak region (7) -10*log10(pvalue) (8) fold_enrichment (9) FDR (10) # motif instances found (11) Total Ri for discovered motif instances (12) Greatest Ri of any motif in peak region (13) Sequence of that motif instance (14) Position (offset) of that motif (left-end) f.peaks.bed contains Peak (1) chr, (2) start (3) end (4) Peak ID (5) Number of unique tags in peak region (6) Strand . (7) Summit position (absolute) (8) Summit position + 1 f.sites.info contains Site (1) chr (2) start (3) end (4) Unique Site ID (internally generated) (5) The motif information content Ri, in bits (6) motif orientation, best score (+) or (-) ---- BED file ends here ---- (7) the motif sequence (e.g., ACAACA) (8) Position (offset) of that motif (left-end) (9) peak ID, fetched from MACS (10) used peak length (11) true peak length (11) peak summit offset """ if type(motif) is str: motif = Bio.Motif.read(open(motif), motif_type) # start the output file prefix = os.path.splitext(os.path.basename(peaks_file))[0] if output_dir is not None: prefix = os.path.join(output_dir, prefix) sites_info = TabFile(os.extsep.join([prefix, "sites", "info"]), "w") sites_bed = TabFile(os.extsep.join([prefix, "sites", "bed"]), "w") peaks_info = TabFile(os.extsep.join([prefix, "peaks", "info"]), "w") peaks_bed = TabFile(os.extsep.join([prefix, "peaks", "bed"]), "w") peaks_cols = [ "chr", "start", "end", "peak_ID", "peak_intensity", "site_count", "total_Ri", "best_Ri", "best_seq", "best_offset", "best_strand", "clean_peak_length", "peak_summit", "peak_misc", ] peaks_msg = os.linesep.join( [ "# This file was generated by " + src_fnc, "# comments are retained from original file", "\t".join(peaks_cols), "", ] ) peaks_info.write(peaks_msg) sites_cols = [ "chr", "start", "end", "site_ID", "Ri", "strand", "offset", "motif_seq", "peak_ID", "peak_length", "reported_peak_length", "peak_summit", ] sites_msg = os.linesep.join(["# This file was generated by " + src_fnc, "\t".join(sites_cols), ""]) sites_info.write(sites_msg) if bed: peak_generator = BedFile(peaks_file) elif xls: peak_generator = MacsFile(peaks_file) else: raise ValueError("Neither bed nor xls") # peakSeqs is a generator peak_seqs = (r.seq for r in Bio.SeqIO.parse(open(fasta_file, "rU"), "fasta")) nosites = 0 peaknumber = 0 for peak in iter(peak_generator): # if peaknumber%10000 is 0: print peaknumber peaknumber += 1 seq = peak_seqs.next() # Generate a peak ID try: peak_ID = peak.name() except NameError: peak_ID = "{!s}_{!s}".format(prefix, peaknumber) # Change behavior to use sequences centered at summit (peak_info, peak_bed, sites_info_rows, sites_bed_rows) = search_peak( peak_ID, peak, seq, motif, bysummit=bysummit ) peaks_info.write_row(peak_info) peaks_bed.write_row(peak_bed) sites_info.write_rows(sites_info_rows) sites_bed.write_rows(sites_bed_rows) if len(sites_info_rows) is 0: nosites += 1 sites_info.close() sites_bed.close() peaks_info.close() peaks_bed.close() message = "There were {!s} of {!s} peaks with no identifiable \ sites in {!s} using a cutoff of 0 bits".format( nosites, peaknumber, fasta_file ) stdout_buffer = message # get the motif motif_str = "" try: motif_str = os.linesep.join( [ ", ".join(["".join([str(base), ": ", str(odds)]) for base, odds in position.items()]) for position in motif.log_odds() ] ) except AttributeError: motif_str = str(motif) message = os.linesep.join([message, "The following motif was used", motif_str]) # print message and write it to a log g = open(prefix + ".log", "w") g.write(message) g.close() return stdout_buffer