parser = argparse.ArgumentParser( description= 'takes bed as input, get the middle point and extend it to both sides') parser.add_argument('-i', required=True, help='input') parser.add_argument('-o', required=True, help='output') parser.add_argument('-g', required=True, help='genomeFile') parser.add_argument('-w', required=True, help='windowSize') args = parser.parse_args() bedFile = args.i output = args.o genome = args.g windowSize = int(args.w) chrDict = {} for line in open(genome, 'r'): ll = line.split('\t') chrDict[ll[0]] = int(ll[1]) def line2newLine(line): bedLine = bed.bedline(line) chromosome = bedLine.chromosome() start = bedLine.start() newEnd = min(start + windowSize, chrDict[chromosome]) return bedLine.newline(start, newEnd) generalUtils.lineBasedFileOperation(bedFile, output, line2newLine, [])
parser = argparse.ArgumentParser(description='fix fragment length if possible') parser.add_argument('-i', required=True, help='<Required> input') parser.add_argument('-c1', required=True, help='<Required> start position tab for first file') parser.add_argument('-c2', required=True, help='<Required> start position tab for second file') parser.add_argument('-o', required=True, help='<Required> output') args = parser.parse_args() input = args.i output = args.o s1tab = int(args.c1) - 1 s2tab = int(args.c2) - 1 def bedClosest2distance(line, s1tab, s2tab): ll = line.split('\t') start1 = int(ll[s1tab]) end1 = int(ll[s1tab + 1]) start2 = int(ll[s2tab]) end2 = int(ll[s2tab + 1]) pos1 = generalUtils.mean([start1, end1]) pos2 = generalUtils.mean([start2, end2]) distance = pos2 - pos1 return str(distance) generalUtils.lineBasedFileOperation(input, output, bedClosest2distance, [s1tab, s2tab])
parser.add_argument('-o', required= True, help='output') parser.add_argument('-w', required= True, help='windowSize') parser.add_argument('-g', required= False, default=False, help='genomeFile') parser.add_argument('--randomMid', required= False, action='store_true', help='for cases of .5 middle point, randomly select between positions 0 or 1') args = parser.parse_args() bedFile = args.i output = args.o windowSize = int(args.w) if args.g: chromosomeSizes = {} for line in open(args.g, 'r'): ll = line.split('\t') chromosomeSizes[ll[0]] = int(ll[1]) def getInterval(line, randomness=False): bedLine = bed.bedline(line) middlePoint = bedLine.midpoint() start = middlePoint - windowSize end = middlePoint + windowSize if args.g: chromosome = bedLine.chromosome() chrEnd = chromosomeSizes[chromosome] if start > 0 and end < chrEnd: return bedLine.newline(start, end) return False return bedLine.newline(start, end) generalUtils.lineBasedFileOperation(bedFile, output, getInterval, [])
#!/usr/bin/env python import generalUtils import argparse import gff parser = argparse.ArgumentParser( description='prints a meta field from gff by order') parser.add_argument('-i', required=True, help='<Required> input') parser.add_argument('-o', required=True, help='<Required> output') parser.add_argument('-f', required=True, help='<Required> field of interest') args = parser.parse_args() generalUtils.lineBasedFileOperation(args.i, args.o, gff.getGeneInformationFromGFFline, [args.f])
import sys import argparse import generalUtils import fasta from sequence import DNA parser = argparse.ArgumentParser( description='convert cufflinks output to bed file with counts') parser.add_argument('-i', required=True, help='<Required> input') parser.add_argument('-s', required=True, help='<Required> strand') parser.add_argument('-o', required=True, help='<Required> output') args = parser.parse_args() def cuffLinksLine2bedLine(line, strand): ll = line.split('\t') locus = ll[6] locusL = locus.split(':') chromosome = locusL[0] interval = locusL[1] intervalL = interval.split('-') start = intervalL[0] end = intervalL[1] FPKM = ll[9] LL = [chromosome, start, end, strand, FPKM] return '\t'.join(LL) generalUtils.lineBasedFileOperation(args.i, args.o, cuffLinksLine2bedLine, [args.s])