def main():
    parser = argparse.ArgumentParser(description='Searches for reactivity differences around given motifs')
    parser.add_argument('control',type=str,help='control <.react> file')
    parser.add_argument('experimental',type=str,help='experimental <.react> file')
    parser.add_argument('fasta',type=str,help='<.fasta> to pull sequences from')
    parser.add_argument('motif',type=str,help='Input file or motif')
    parser.add_argument('-fp',default=5,type=int, help='[default = 5] Bases to include 5\' of the motif')
    parser.add_argument('-tp',default=5,type=int, help='[default = 5] Bases to include 3\' of the motif')
    parser.add_argument('-restrict',default = None, help = '<.txt > Limit analysis to these specific transcripts')
    parser.add_argument('-outdir',type=str,default = 'motif_out',help='[default = motif_out] Out Directory')
    parser.add_argument('-fastaout',action='store_true',default=False,help='Write windows to <.fasta> format as well')
    parser.add_argument('-reactout',action='store_true',default=False,help='Write accompanying <.react> files as well')
    args = parser.parse_args()
    
    #Check if path exists
    if not os.path.isdir(args.outdir):
        os.mkdir(args.outdir)

    #Read in reactivities and fasta
    control,experimental = map(structure_io.read_react,[args.control,args.experimental])
    sequences = structure_io.read_fasta(args.fasta)

    #Apply filter if input
    if args.restrict:
        covered = structure_io.read_restrict(args.restrict)
        sequences = {name:seq for name,seq in sequences.items() if name in covered}
    
    #Read in motifs
    motifs = read_motif(args.motif)
    
    #Out Nomenclature
    name_block_1 = [zz.replace('.react','') for zz in [args.control,args.experimental]]
    name_block_2 = [str(qq)+q for qq,q in zip([args.fp,args.tp],['fp','tp'])]
    
    #Iterate through motif(s)
    for motif in motifs:
        
        #Create a full MotifReport
        report = MotifReport(motif,sequences,control,experimental,args.fp,args.tp)
        
        #Generate name for each outfile
        out_name = '_'.join(name_block_1+[report.motif]+name_block_2)+'.csv'
        
        #Write out motif <.csv>
        write_motif_csv(report,os.path.join(args.outdir,out_name))

        #Write out motif <.fasta>
        if args.fastaout:
            out_fasta_name = out_name.replace('.csv','.fasta')
            fasta_dict = {r.generate_name():r.generate_seq() for r in report.records.values()}
            structure_io.write_fasta(fasta_dict,os.path.join(args.outdir,out_fasta_name))

        #Write out motif <.react>
        if args.reactout:
            control_out = {c.generate_name():c.A_react() for c in report.records.values()}
            exp_out = {e.generate_name():e.B_react() for e in report.records.values()}
            control_new = '_'.join([args.control.replace('.react',''),motif,str(args.fp)+'fp',str(args.tp)+'tp'])+'.react'
            exp_new = '_'.join([args.experimental.replace('.react',''),motif,str(args.fp)+'fp',str(args.tp)+'tp'])+'.react'
            structure_io.write_react(control_out,os.path.join(args.outdir,control_new))
            structure_io.write_react(exp_out,os.path.join(args.outdir,exp_new))
def main():
    parser = argparse.ArgumentParser(description='Downscales <.rtsc> files.')
    parser.add_argument('-f',
                        default=None,
                        nargs='+',
                        help='Specific <.rtsc> to operate on')
    parser.add_argument('mode',
                        type=str.upper,
                        choices=['FRACTIONAL', 'RANDOMREAD', 'RANDOMPOSITION'])
    parser.add_argument('-ratio',
                        type=float,
                        default=.50,
                        help='[default = 0.50] Fraction of RT stops to retain')
    parser.add_argument(
        '-restrict',
        default=None,
        help='Limit downscaling to these specific transcripts <.txt> ')
    parser.add_argument('-sort',
                        action='store_true',
                        default=False,
                        help='Sort output by transcript name')
    args = parser.parse_args()

    #Files to operate on, dictionary of functions
    fyle_lyst = sorted(glob.glob('*.rtsc')) if args.f == None else sorted(
        args.f)
    downscale_methods = {
        'FRACTIONAL': keep_static_percentage,
        'RANDOMPOSITION': keep_random_downsample,
        'RANDOMREAD': random_per_stop
    }

    #Iterate through file(s)
    for fyle in fyle_lyst:

        #Read in the <.rtsc>, generate new name
        data = read_rtsc(fyle)
        new_fyle = fyle.replace(
            '.rtsc',
            '_' + args.mode + '_' + str(args.ratio).replace('.', '') + '.rtsc')

        #You could be doing random for a long time if you do not filter by coverage.
        if args.restrict != None:
            covered = read_restrict(args.restrict)
            data = {
                name: stops
                for name, stops in data.items() if name in covered
            }

        new_data = {
            k: downscale_methods[args.mode](v, args.ratio)
            for k, v in data.items()
        }
        write_rtsc(new_data, new_fyle, args.sort)
示例#3
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Finds reactivity differences in windows of a given size and composition'
    )
    parser.add_argument('control', type=str, help='control <.react> file')
    parser.add_argument('experimental',
                        type=str,
                        help='experimental <.react> file')
    parser.add_argument('fasta',
                        type=str,
                        help='<.fasta> to pull sequences from')
    parser.add_argument('bases',
                        type=str.upper,
                        help="Bases to query (i.e.\'GC\' or \'AT\')")
    parser.add_argument('-size',
                        default=8,
                        type=int,
                        help='[default = 8] Size of window')
    parser.add_argument('-perc',
                        default=1.0,
                        type=float,
                        help='[default = 1.0] Percent specified bases')
    parser.add_argument('-unique',
                        action='store_false',
                        default=True,
                        help='[default = True] Remove overlapping windows')
    parser.add_argument('-fp',
                        default=5,
                        type=int,
                        help='[default = 5] Bases to include 5\' of the motif')
    parser.add_argument('-tp',
                        default=5,
                        type=int,
                        help='[default = 5] Bases to include 3\' of the motif')
    parser.add_argument(
        '-restrict',
        default=None,
        help='<.txt > Limit analysis to these specific transcripts')
    parser.add_argument('-fastaout',
                        action='store_true',
                        default=False,
                        help='Write windows to <.fasta> format as well')
    parser.add_argument('-reactout',
                        action='store_true',
                        default=False,
                        help='Write accompanying <.react> files as well')
    args = parser.parse_args()

    #Read in reactivities and fasta
    control, experimental = map(structure_io.read_react,
                                [args.control, args.experimental])
    sequences = structure_io.read_fasta(args.fasta)

    #Apply filter if input
    if args.restrict:
        covered = structure_io.read_restrict(args.restrict)
        sequences = {
            name: seq
            for name, seq in sequences.items() if name in covered
        }

    #Out Nomenclature
    name_block_1 = [
        zz.replace('.react', '') for zz in [args.control, args.experimental]
    ]
    name_block_2 = [
        str(qq) + q for qq, q in zip([args.fp, args.tp], ['fp', 'tp'])
    ]

    #Generate a full CompositionReport
    report = CompositionReport(args.size, args.bases, args.perc, args.unique,
                               sequences, control, experimental, args.fp,
                               args.tp)

    #Generate out name
    detail = 'shared' if args.unique else 'unique'
    parameters = [str(x) for x in [args.size, args.bases, args.perc, detail]]
    out_name = '_'.join(name_block_1 + parameters + name_block_2) + '.csv'

    #Write out motif <.csv>
    write_comp_csv(report, out_name)

    #Write out motif <.fasta>
    if args.fastaout:
        out_fasta_name = out_name.replace('.csv', '.fasta')
        fasta_dict = {
            r.generate_name(): r.generate_seq(True)
            for r in report.records.values()
        }
        structure_io.write_fasta(fasta_dict, out_fasta_name)

    if args.reactout:
        control_out = {
            c.generate_name(): c.A_react(True)
            for c in report.records.values()
        }
        exp_out = {
            e.generate_name(): e.B_react(True)
            for e in report.records.values()
        }
        control_namae = [args.control.replace('.react', '')] + parameters + [
            str(args.fp) + 'fp', str(args.tp) + 'tp'
        ]
        control_new = '_'.join(control_namae) + '.react'
        exp_namae = [args.experimental.replace('.react', '')] + parameters + [
            str(args.fp) + 'fp', str(args.tp) + 'tp'
        ]
        exp_new = '_'.join(exp_namae) + '.react'
        structure_io.write_react(control_out, control_new)
        structure_io.write_react(exp_out, exp_new)
示例#4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Searches for reactivity differences around given multi-motifs')
    parser.add_argument('control', type=str, help='control <.react> file')
    parser.add_argument('experimental',
                        type=str,
                        help='experimental <.react> file')
    parser.add_argument('fasta',
                        type=str,
                        help='<.fasta> to pull sequences from')
    parser.add_argument('motifs', nargs='+', help='multi-motif components')
    parser.add_argument(
        '-unique',
        action='store_false',
        default=True,
        help='[default = True] Remove overlapping multi-motifs')
    parser.add_argument('-mn',
                        default=3,
                        type=int,
                        help='[default = 3] Number of multi-motifs required')
    parser.add_argument(
        '-mw',
        default=50,
        type=int,
        help='[default = 50] Query window size for multi-motifs')
    parser.add_argument('-FP',
                        default=30,
                        type=int,
                        help='[default = 30] Bases 5\' of multi-motifs')
    parser.add_argument('-TP',
                        default=30,
                        type=int,
                        help='[default = 30] Bases 3\' of multi-motifs')
    parser.add_argument(
        '-restrict',
        default=None,
        help='<.txt > Limit analysis to these specific transcripts')
    parser.add_argument('-fastaout',
                        action='store_true',
                        default=False,
                        help='Write windows to <.fasta> format as well')
    parser.add_argument('-reactout',
                        action='store_true',
                        default=False,
                        help='Write accompanying <.react> files as well')
    args = parser.parse_args()

    #Read in reactivities and fasta
    control, experimental = map(structure_io.read_react,
                                [args.control, args.experimental])
    sequences = structure_io.read_fasta(args.fasta)

    #Apply filter if input
    if args.restrict:
        covered = structure_io.read_restrict(args.restrict)
        sequences = {
            name: seq
            for name, seq in sequences.items() if name in covered
        }

    #Out Nomenclature
    name_block_1 = [
        zz.replace('.react', '') for zz in [args.control, args.experimental]
    ]
    name_block_2 = [
        str(qq) + q for qq, q in zip([args.mn, args.mw, args.FP, args.TP],
                                     ['mn', 'mw', 'FP', 'TP'])
    ]

    #Generate base out name
    detail = 'shared' if args.unique else 'unique'
    out_name = '_'.join(name_block_1 + name_block_2 + [detail] +
                        args.motifs) + '.csv'

    #Generate a full MetaReport
    report = MetaMotifReport(args.motifs, args.mn, args.mw, args.FP, args.TP,
                             args.unique, sequences, control, experimental)

    #Write out motif <.csv>
    write_meta_csv(report, out_name)

    #Write out motif <.fasta>
    if args.fastaout:
        out_fasta_name = out_name.replace('.csv', '.fasta')
        fasta_dict = {
            r.generate_name(): r.generate_seq()
            for r in report.records.values()
        }
        structure_io.write_fasta(fasta_dict, out_fasta_name)

    if args.reactout:
        control_out = {
            c.generate_name(): c.A_react()
            for c in report.records.values()
        }
        exp_out = {
            e.generate_name(): e.B_react()
            for e in report.records.values()
        }
        #Writeout Control
        control_namae = [args.control.replace('.react', '')
                         ] + name_block_2 + [detail] + args.motifs
        control_new = '_'.join(control_namae) + '.react'
        structure_io.write_react(control_out, control_new)
        #Writeout Experimental
        exp_namae = [args.experimental.replace('.react', '')
                     ] + name_block_2 + [detail] + args.motifs
        exp_new = '_'.join(exp_namae) + '.react'
        structure_io.write_react(exp_out, exp_new)