def load_genomes(UTRfilestring, twobitfile):
	"""
	make this a separate function so that these only need to be loaded a single time
	"""
	UTRdict= rph.readindict(open(UTRfilestring, "rU"))
	genome= twobitreader.TwoBitFile(twobitfile) # do we actually need to load this in here?
	return UTRdict, genome
def main():
    ## using argparser to load arguments from workflow
    parser = argparse.ArgumentParser()
    parser.add_argument('--trspdictfilestring',
                        help='input transcript density files')
    parser.add_argument('--UTRfilestring', help='UTRs file')
    parser.add_argument('--cdsDenThresh',
                        help='boolean value- should density filter be used?')
    parser.add_argument(
        '--norm_type', help='type of normalizaion, should be raw reads or rpm')
    parser.add_argument('--raw_dense_thresh',
                        help='threshold for CDS density for raw normalization')
    parser.add_argument('--rpm_dense_thresh',
                        help='threshold for CDS density for rpm normalization')
    parser.add_argument(
        '--inset_choice',
        help='inset values to be used to avoid start and stop codon peaks')
    parser.add_argument('--outfilestring', help='output file name')
    # parser.add_argument
    args = parser.parse_args()

    args.cdsDenThresh = args.cdsDenThresh == 'True'

    print "ARGS: ", args.cdsDenThresh
    # print bool(args.cdsDenThresh)

    trspdict = rph.readcountsf(args.trspdictfilestring)
    UTRdict = rph.readindict(open(args.UTRfilestring, "rU"))
    countsIDlist, countsOutdict = build_count_tables(
        trspdict, UTRdict, args.inset_choice, args.cdsDenThresh,
        args.norm_type, float(args.raw_dense_thresh),
        float(args.rpm_dense_thresh), args.outfilestring)
    write_countTable_to_csv(countsIDlist, countsOutdict, args.outfilestring)
Пример #3
0
def load_genomes(UTRfilestring, firstStopsCSV, twobitfile):
	"""
	make this a separate function so that these only need to be loaded a single time
	"""
	UTRdict= rph.readindict(open(UTRfilestring, "rU"))
	utr3adj = pd.read_csv(firstStopsCSV, index_col=0)
	genome= twobitreader.TwoBitFile(twobitfile) # do we actually need to load this in here?
	return UTRdict, utr3adj, genome
Пример #4
0
def main():
    ## using argparser to load arguments from workflow
    parser = argparse.ArgumentParser()
    parser.add_argument('--trspdictfilestring',
                        help='input transcript density files')
    parser.add_argument('--UTRfilestring', help='UTRs file')
    parser.add_argument('--cdsDenThresh',
                        help='boolean value- should density filter be used?')
    parser.add_argument(
        '--norm_type', help='type of normalizaion, should be raw reads or rpm')
    parser.add_argument('--raw_dense_thresh',
                        help='threshold for CDS density for raw normalization')
    parser.add_argument('--rpm_dense_thresh',
                        help='threshold for CDS density for rpm normalization')
    parser.add_argument(
        '--inset_choice',
        help='inset values to be used to avoid start and stop codon peaks')
    parser.add_argument('--outfilestring', help='output file name')
    parser.add_argument(
        '--totreads',
        help='total number of reads used after raw densebuilder run')
    parser.add_argument(
        '--stopcodons',
        help=
        'csv file with positions of all stopcodons, riboseq_stopcodon_finder.py'
    )
    # parser.add_argument
    args = parser.parse_args()

    # utr3adj = pd.read_csv(stopframe_csv, index_col=0)
    utr3adj = pd.read_csv(args.stopcodons, index_col=0)
    # print utr3adj.head()
    # print utr3adj.loc['ENST00000426362.6']
    # print utr3adj.loc[utr3adj['#transcript'] == 'ENST00000426362.6']

    trspdict = rph.readcountsf(args.trspdictfilestring)
    UTRdict = rph.readindict(open(args.UTRfilestring, "rU"))

    # print UTRdict['ENST00000426362.6']

    countsIDlist, countsOutdict = build_count_tables(
        trspdict, UTRdict, utr3adj, args.inset_choice, bool(args.cdsDenThresh),
        args.norm_type, float(args.raw_dense_thresh),
        float(args.rpm_dense_thresh), args.outfilestring, int(args.totreads),
        minUtr3len)
    write_countTable_to_csv(countsIDlist, countsOutdict, args.outfilestring)
Пример #5
0
    def codonaverage(self):
        outlist, headers, motiffilelist = [], [], []
        headers.append("motif")

        for motif in self.motifs:

            motiffile = args.motiffilerootpath + motif + "_1.csv"
            motiffilelist.append(motiffile)
            headers.append(motif)
        outlist.append(headers)

        codon_occu = []
        codon_occu.append(self.sample_name)

        f_output = open(args.outfileparams, "w")
        f_output.write("Density file is " + str(self.sample_name) + "\n")
        f_output.write("cds5trim is " + str(args.cds5trim) + "\n")
        f_output.write("cds3trim is " + str(args.cds3trim) + "\n")
        f_output.write("Seqwin is " + str(args.seqwin) + "\n")
        f_output.write("Motiflist is " + str(motiffilelist) + "\n")

        readcountsdict = rph.readcountsf(args.trspdictfilestring)
        exclusionmodule = exclusionfiles[0]

        if exclusionmodule != '0':
            exclusiondict = self.readindict(open(exclusionmodule, "rU"))
        else:
            exclusiondict = '0'
        print "Exclusion file is " + str(exclusionmodule)

        UTRdict = rph.readindict(open(args.UTRfilestring, "rU"))
        occupancy = self.occupancy(readcountsdict, motiffilelist,
                                   exclusiondict, codon_occu, UTRdict,
                                   f_output)
        outlist.append(codon_occu)
        f_output.close()

        co = np.asarray(outlist)  # convert outlist to a np.array
        output = co.T
        # print "output: ", output
        # print "self.outlistfile: ", self.outlistfile
        self.writerows(output, self.outlistfile)  # write these rows to a csv
Пример #6
0
    comments += "Threshold signifies minimal rpkm needed in coding region for gene to be in the average.\n"
    comments += "alignpos =1 anchors average around the start codon and only includes 5'UTRs. alignpos =2 is the same for stop codon."
    fc = open(args.outfilebase + "_" + str(args.alignpos) + "_output.txt", "w")
    fc.write(comments)
    fc.write("\n")
    fc.write("Avggene was called with parameters:\n")
    fc.write("transcripts= " + str(args.trspdictfilestring) + "\n")
    fc.write("filtermodule= " + str(args.filtermodule) + "\n")
    fc.write("exclusionmodule= " + str(args.exclusionmodule) + "\n")
    fc.write("threshold= " + str(args.threshold) + "\n")
    fc.write("regionlength5= " + str(args.regionlength5) + "\n")
    fc.write("regionlength3= " + str(args.regionlength3) + "\n")
    fc.write("equalweight= " + str(args.equalweight) + "\n")
    fc.write("alignpos= " + str(args.alignpos) + "\n")
    fc.close()

    if args.filtermodule != '0':
        filterdict = rph.readindict(open(args.filtermodule, "rU"))
    else:
        filterdict = '0'
    if args.exclusionmodule != '0':
        exclusiondict = rph.readindict(open(args.exclusionmodule, "rU"))
    else:
        exclusiondict = '0'
    trspdict = rph.readcountsf(args.trspdictfilestring)
    UTRdict = rph.readindict(open(args.UTRfilestring, "rU"))
    metagene = Avggene(args.regionlength5, args.regionlength3, trspdict,
                       UTRdict, filterdict, exclusiondict, args.threshold,
                       args.alignpos, args.equalweight, args.outfilebase)
    metagene.totalavg()
for attr in dir(libset):
	if not attr.startswith("_"):
		globals()[attr] = getattr(libset, attr)
threadNumb = str(args.threadNumb)
import rphelper as rph



### function inputs

inset_choice = 'zero'
# samplelist = samplelist


### load UTRdict to be used for all samples
UTRdict= rph.readindict(open(UTRfilestring, "rU"))
# countsIDlist, countsOutdict = build_count_tables(trspdict, UTRdict, utr3adj, args.inset_choice, 
# 												bool(args.cdsDenThresh), args.norm_type, float(args.raw_dense_thresh), 
# 												float(args.rpm_dense_thresh), args.outfilestring, int(args.totreads),
# 												minUtr3len)

### sample inputs:




# def build_count_table_single(sample, UTRdict = UTRdict, inset_choice = 'default'):
def build_count_table_single(sample, UTRdict = UTRdict,):

	"""
	This is the function that returns total counts within the following regions: