def main(inputfile, outputfile, gfffile, gffmin, gffmax, takeStop, upstream, downstream, verbose): takeStart = True if takeStop: takeStart = False sites = ParclipSiteContainer() sites.loadFromFile(inputfile) anno = gff.GFF(gfffile) anno.filterSize(gffmin, gffmax) anno.getChromosomePositions() if anno.size() < 10: print('Warning: Low number of annotation enries! ' + str(anno.size())) fsites = ParclipSiteContainer() percent_old = 0 percent_new = 0 for i in range(sites.size()): if anno.isAround(sites.chrs[i], sites.pos[i], sites.strand[i], takeStart, upstream, downstream)[1]: fsites.addSite(sites.chrs[i], sites.pos[i], sites.m[i], sites.r[i], sites.result[i], sites.strand[i], sites.occ[i]) percent_new = round(i / sites.size() * 100) if percent_new > percent_old: if verbose: functions.showProgress(i, anno.size(), 'selecting sites') percent_old = percent_new fsites.save2File(outputfile)
def main(inputfile, outputfile): if os.path.isfile(inputfile) == False: print('Inputfile: '+inputfile+' does not exist') sys.exit(-1) sites = ParclipSiteContainer() sites.loadFromFile(inputfile) for i in range(sites.size()): sites.occ[i] = sites.m[i]/sites.r[i] sites.save2File(outputfile)
def main(input_file, output_file, q): if not 0 <= q < 1: print('q must lie between 0 and 1 - got %s' % q) sys.exit(1) sites = ParclipSiteContainer.from_file(input_file) # dirty hack to avoid errors on empty files occ_vals = [] for rec in sites: occ_vals.append(rec.occupancy) if len(occ_vals) > 0: max_occ = functions.getQuantile(occ_vals, q) records = [] for rec in sites: if rec.occupancy > max_occ: rec = rec._replace(occupancy=max_occ) records.append(rec) new_sites = ParclipSiteContainer(records) new_sites.save2File(output_file)