def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Plot correlation bar of multiple condition')

    # parser.add_argument('--condition_ls', type=str, default='seq+shape:seq:shape', help='Condition list')
    # parser.add_argument('--correlation_ls', type=str, default='0.78:0.305:0.343', help='Correlation list')
    # parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/condition_compare_correlation.pdf', help='Path to plot file')

    parser.add_argument(
        '--condition_ls',
        type=str,
        default=
        'HEK293(wc, vivo):HEK293(wc, vitro):HEK293(ch, vivo):HEK293(np, vivo):HEK293(cy, vivo):mES(wc, vivo)',
        help='Condition list')
    parser.add_argument('--correlation_ls',
                        type=str,
                        default='0.78:0.702:0.704:0.761:0.723:0.675',
                        help='Correlation list')
    parser.add_argument(
        '--savefn',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/results/condition_compare_correlation.vivo_vitro_WcChNpCy.pdf',
        help='Path to plot file')

    # get args
    args = parser.parse_args()
    util.print_args('Plot correlation bar of multiple condition', args)

    plot_corr_bar(condition_ls=args.condition_ls,
                  correlation_ls=args.correlation_ls,
                  savefn=args.savefn)
示例#2
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Plot dms-seq m6AC score dist')

    parser.add_argument(
        '--shape_ls',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/RBMbase/download_20191204/RMBase_hg38_all_m6A_site.tran.e1.tx_has_shape_base_valid.bed.shape100.txt:/home/gongjing/project/shape_imputation/exper/d06_DMSseq_K562_vitro_trainRandmask0.3x50_vallownull100_lossDMSloss_all/prediction.dmsseq_k562_vivo_m6A_null.txt',
        help='List of shape file')
    parser.add_argument('--label_ls',
                        type=str,
                        default='valid:null_predict',
                        help='Label list')
    parser.add_argument(
        '--savefn',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/results/dmsseq_m6AC_valid_vs_nullpredict.pdf',
        help='Savefn pdf')

    args = parser.parse_args()
    util.print_args('Plot dms-seq m6AC score dist', args)

    plot_m6AC(shape_ls=args.shape_ls,
              label_ls=args.label_ls,
              savefn=args.savefn)
示例#3
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Plot AUC for single known structure')

    parser.add_argument(
        '--AUC_txt_ls',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/exper/b28_trainLossall_GmultiplyX_randomNperfragmentpct0.3L20x10_randomNperValidate/prediction.rfam.AUCs.txt:/home/gongjing/project/shape_imputation/exper/b92_trainLossall_shapeOnly_x10/prediction.rfam.AUCs.txt:/home/gongjing/project/shape_imputation/exper/b91_trainLossall_seqOnly_x10/prediction.rfam.AUCs.txt',
        help='AUC file list')
    parser.add_argument('--AUC_label_ls',
                        type=str,
                        default='seq+shape:shape:seq',
                        help='AUC label list')
    parser.add_argument(
        '--savefn',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/results/condition_compare_AUC_rfam.pdf',
        help='Pdf file to save plot')

    # get args
    args = parser.parse_args()
    util.print_args(parser.description, args)

    compare_AUCs(AUC_txt_ls=args.AUC_txt_ls,
                 AUC_label_ls=args.AUC_label_ls,
                 savefn=args.savefn)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Plot correlation bar of multiple condition')

    parser.add_argument(
        '--condition_ls',
        type=str,
        default=
        'hek_wc_vivo:hek_wc_vitro:hek_ch_vivo:hek_np_vivo:hek_cy_vivo:mes_wc_vivo',
        help='Condition list')
    parser.add_argument('--SHAPEImpute_ls',
                        type=str,
                        default='0.766:0.715:0.699:0.755:0.710:0.666',
                        help='Correlation list')
    parser.add_argument('--ShaKer_ls',
                        type=str,
                        default='0.274:0.256:0.226:0.228:0.242:0.264',
                        help='Correlation list')
    parser.add_argument(
        '--savefn',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/results/condition_compare_correlation.shapeimpute_vs_shaker_multiple_set.pdf',
        help='Path to plot file')

    # get args
    args = parser.parse_args()
    util.print_args('Plot correlation bar of multiple condition', args)

    plot_corr_bar(condition_ls=args.condition_ls,
                  SHAPEImpute_ls=args.SHAPEImpute_ls,
                  ShaKer_ls=args.ShaKer_ls,
                  savefn=args.savefn)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description=
        'Compare replicates correlation between true and predicted shape')

    parser.add_argument('--rep1_out', type=str, help='Rep1 shape.out')
    parser.add_argument('--rep1_validate', type=str, help='Rep1 fragment')
    parser.add_argument('--rep1_predict',
                        type=str,
                        help='Rep1 fragment prediction')
    parser.add_argument('--rep2_out', type=str, help='Rep2 shape.out')
    parser.add_argument('--rep2_validate', type=str, help='Rep2 fragment')
    parser.add_argument('--rep2_predict',
                        type=str,
                        help='Rep2 fragment prediction')
    parser.add_argument('--tx_null_pct',
                        type=float,
                        default=0.3,
                        help='Cutoff filtering fragment with null pct')
    parser.add_argument('--savefn', type=str, help='Pdf file to save plot')

    # get args
    args = parser.parse_args()
    util.print_args(parser.description, args)

    rep_compare(rep1_out=args.rep1_out,
                rep1_validate=args.rep1_validate,
                rep1_predict=args.rep1_predict,
                rep2_out=args.rep2_out,
                rep2_validate=args.rep2_validate,
                rep2_predict=args.rep2_predict,
                tx_null_pct=args.tx_null_pct,
                savefn=args.savefn)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Split into train & validation based on blastn')

    parser.add_argument(
        '--blastn',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/seq_similarity/windowLen100.sliding10.all2.outputfile_E10',
        help='Path to blastn file')
    parser.add_argument(
        '--txt',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding10.txt',
        help='Path to all fragment file')
    parser.add_argument('--validation_pct',
                        type=float,
                        default=0.2,
                        help='Valiation percentage')

    # get args
    args = parser.parse_args()
    util.print_args('Split into train & validation based on blastn', args)
    split(blastn=args.blastn, txt=args.txt, validation_pct=args.validation_pct)
示例#7
0
def main():
    # get argument
    args = util.get_args()

    # print argument
    util.print_args(args)

    # run te
    mlus = []
    rcs = []

    # at every T step
    for t in range(args.num_test):
        if t % args.T == 0:
            repetita_args = util.get_repetita_args(args, t)
            print('command:', ' '.join(repetita_args))
            stdout = util.call(repetita_args)
            if stdout:
                print('stdout:', stdout)
                mlu, rc = util.parse_result(t, stdout, args)
                if len(mlu) == args.T:
                    mlus.append(mlu)
                if rc is not None:
                    rcs.append(rc)

    util.save(mlus, rcs, args)
示例#8
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Sort shape by NULL count')

    parser.add_argument(
        '--shape1',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip/CLIPDB20162_IGF2BP1_HEK293_trx.tx_has_shape_region_null_ok.fimo/fimo.new.IGF2BP1_11.txt.shape100.txt',
        help='Path to shape2')
    parser.add_argument(
        '--shape2',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip/CLIPDB20162_IGF2BP1_HEK293_trx.tx_has_shape_region_null_ok.fimo/fimo.new.IGF2BP1_11.txt.shape',
        help='Path to shape1')
    parser.add_argument('--value_col1',
                        type=int,
                        default=7,
                        help='Which column index to sort')

    # get args
    args = parser.parse_args()
    util.print_args(parser.description, args)

    sort(shape1=args.shape1, shape2=args.shape2, value_col1=args.value_col1)
示例#9
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='RNA modification site analysis')

    parser.add_argument(
        '--modification_bed',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/RBMbase/download_20191204/RMBase_hg38_all_PseudoU_site.tran.bed',
        help='Modification bed file')
    parser.add_argument(
        '--icshape',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out',
        help='icSHAPE out file')
    parser.add_argument('--label', type=str, default='e1')

    # get args
    args = parser.parse_args()
    util.print_args('RNA modification site analysis', args)
    # modification_has_shape(bed=args.modification_bed, out=args.icshape)
    modification_extend_1_has_shape(
        bed=args.modification_bed, out=args.icshape,
        label=args.label)  # for dms-seq, check xxACx, C=>e1
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description=
        'Retain common fragments in two validation file with different depth')
    parser.add_argument('--fragment_high',
                        type=str,
                        help='Path to fragment file with depth1')
    parser.add_argument('--fragment_low',
                        type=str,
                        help='Path to fragment file with depth2')
    parser.add_argument('--fragment_common',
                        type=str,
                        help='Path to fragment file with depth2')
    parser.add_argument('--savefn', type=str, help='Path to save plot file')
    parser.add_argument(
        '--process_type',
        type=str,
        default='generate_common_fragment|corr_of_common_fragment',
        help='process')

    # get args
    args = parser.parse_args()
    util.print_args(
        'Retain common fragments in two validation file with different depth',
        args)

    if args.process_type == 'generate_common_fragment':
        fragment_for_test(fragment_high=args.fragment_high,
                          fragment_low=args.fragment_low)
    if args.process_type == 'corr_of_common_fragment':
        fragment_compare_corr(fragment_common=args.fragment_common,
                              savefn=args.savefn)
示例#11
0
def extract_process_kmers(name):
    """Extract k-mers from genomic sequence and run initial processing.

    Load project arguments and produce three files:
    extract k-mers from the genome: <name>/<name>_kmers.txt.gz
    shuffle all extracted k-mers: <name>/<name>_kmers_shuffled.txt.gz
    count occurrences of k-mers: <name>/<name>_kmers_counts.txt.gz

    Args:
    name: project name, used to get project args and in all output
    """
    util.print_log('start extract_process_kmers()')
    util.print_log('load arguments...')
    args = util.load_args(name)
    util.print_args(args)
    util.print_log('done')

    util.print_log('load FASTA...')
    util.print_log('load from %s' % args['fasta'])
    fasta = load_fasta(args['fasta'])
    util.print_log('done')

    util.print_log('extract k-mers...')
    kmers_filename = '%s/%s_kmers.txt.gz' % (name, name)
    allpams = [args['pam']] + args['altpam']
    util.print_log('write in file %s' % kmers_filename)
    genome = extract_kmers(name=name,
                           fasta=fasta,
                           length=args['length'],
                           pams=allpams,
                           pampos=args['pampos'],
                           filename=kmers_filename,
                           chroms=args['chrom'],
                           minchrlen=args['minchrlen'],
                           processes=args['processes'])
    sys.stdout.write('genome: %s' % genome)
    util.print_log('save genome info')
    args['genome'] = genome
    util.save_args(args)
    util.print_log('calculate k-mer statistics')
    print_stats_kmers(kmers_filename, gnupath=args['gnupath'])
    util.print_log('done')

    util.print_log('shuffle k-mers...')
    kmers_shuffled_filename = '%s/%s_kmers_shuffled.txt.gz' % (name, name)
    util.print_log('write in file %s' % kmers_shuffled_filename)
    shuffle_kmers(fileinput=kmers_filename,
                  fileoutput=kmers_shuffled_filename,
                  gnupath=args['gnupath'])
    util.print_log('done')

    util.print_log('count k-mers...')
    count_filename = '%s/%s_kmers_counts.txt.gz' % (name, name)
    util.print_log('write in file %s' % count_filename)
    sort_count_kmers(fileinput=kmers_filename,
                     fileoutput=count_filename,
                     mincount=args['maxoffpos'],
                     gnupath=args['gnupath'])
    util.print_log('done')
    return True
示例#12
0
def produce_bams_main(kmers_trie, name):
    """Produce BAM file with all guideRNAs and info about their off-targets.

    Run after all files and trie were generated
    by kmers.extract_process_kmers() and guides.analyze_guides()

    Produce files:
    sorted BAM file with off-target info: <name>/<name>_guides.bam
    index for the BAM file with off-target info: <name>/<name>_guides.bam.bai
    also, BAM file and index for all guideRNAs without any off-target info
    (produced much faster):
        <name>/<name>_guides_nooff.bam
        <name>/<name>_guides_nooff.bam.bai

    Args:
    kmers_trie: trie.trie object as produced by guides.analyze_guides()
    name: project name, used to get project args and in all output
    """
    util.print_log('start produce_bam()')
    util.print_log('load arguments...')
    args = util.load_args(name)
    util.print_args(args)
    util.print_log('done')

    util.print_log('produce SAM file with guideRNAs only (no off-targets)...')
    # guides_filename = '%s/%s_guides.txt.gz' % (name, name)
    # parts = 256
    n = args['greateroffdist']
    parts = 4 ** n

    guides_dir = '%s%s' % (name,'/classifiedfiles/guides')
    guides_filenames = ['%s/%s.txt.gz' % (guides_dir, i) for i in range(parts)]
    util.print_log('read guides from %s' % guides_dir)
    produce_bam_custom(kmers_trie=kmers_trie, name=name, label='nooff',
                       guides_filename=guides_filenames,
                       args=args, offdist=-1,  # -1 for no off-targets
                       maxoffcount=args['maxoffcount'],
                       processes=args['processes'],
                       n = n,
                       parts=parts)
    util.print_log('done')

    if args['offdist'] != -1:
        util.print_log('produce SAM file with guideRNAs'
                       ' and off-target info...')
        # guides_filename = '%s/%s_guides.txt.gz' % (name, name)
        util.print_log('read guides from %s' % guides_dir)
        produce_bam_custom(kmers_trie=kmers_trie, name=name, 
                           label='offdist%s' % args['offdist'],
                           guides_filename=guides_filenames,
                           args=args, offdist=args['offdist'],
                           maxoffcount=args['maxoffcount'],
                           processes=args['processes'],
                           n = n,
                           parts=parts)
        util.print_log('done')
示例#13
0
def main():
    # get argument
    args = util.get_args()

    # print argument
    util.print_args(args)

    # at every T + 1 step
    t = 1995
    repetita_args = util.get_repetita_args(args, t)
    print('command:', ' '.join(repetita_args))
    stdout = util.call(repetita_args)
    print('stdout:', stdout)
示例#14
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Parse dataset of bpRNA in SPOT-RNA project')

    parser.add_argument('--d', type=str, help='Dir of bpRNA')
    parser.add_argument('--savefn', type=str, help='File to save parsed info')

    # get args
    args = parser.parse_args()
    util.print_args(parser.description, args)

    read_dir_rfam(d=args.d, savefn=args.savefn)
示例#15
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Plot NULL percentage along iterations')
    
    parser.add_argument('--stat_ls', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/t.out.predict/iteration.stat.txt,/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.predict/iteration.stat.txt', help='List of stat file')
    parser.add_argument('--label_ls', type=str, default='test,hek_wc_vivo', help='Label list')
    parser.add_argument('--corr_ls', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.c80.newwithNULL.nominus.predict/corr.txt', help='Correlation text list')
    parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/null_pct_interation.pdf', help='Savefn pdf')
    
    args = parser.parse_args()
    util.print_args('Plot NULL percentage along iterations', args)
    
    plot_iteration(stat_ls=args.stat_ls, label_ls=args.label_ls, savefn=args.savefn, corr_ls=args.corr_ls)
示例#16
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Plot correlation bar of multiple condition')
    
    parser.add_argument('--validation_ls', type=str, help='Validation file list')
    parser.add_argument('--predict_ls', type=str, help='Predict file list')
    parser.add_argument('--label_ls', type=str, help='Lable list')
    parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/condition_compare_correlation.track.wc_vs_cy.pdf', help='Path to plot file')
    
    # get args
    args = parser.parse_args()
    util.print_args('Plot correlation bar of multiple condition', args)
    compare_predict(validation_ls=args.validation_ls, predict_ls=args.predict_ls, label_ls=args.label_ls, savefn=args.savefn)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Plot correlation bar of multiple condition')
    
    parser.add_argument('--condition_ls', type=str, default='0.1:0.2:0.3:0.4:0.5)', help='Condition list')
    parser.add_argument('--correlation_ls', type=str, default='0.940:0.915:0.885:0.810:0.755', help='Correlation list')
    parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/condition_compare_correlation.18S.pdf', help='Path to plot file')
    
    # get args
    args = parser.parse_args()
    util.print_args('Plot correlation bar of multiple condition', args)
    
    plot_corr_bar(condition_ls=args.condition_ls, correlation_ls=args.correlation_ls, savefn=args.savefn)
示例#18
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Extract start/stop condon shape')
    
    parser.add_argument('--species', type=str, default='human', help='human')
    parser.add_argument('--icshape', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out', help='icSHAPE out file')
    parser.add_argument('--savefn', type=str, help='Pdf file to save plot', default='/home/gongjing/project/shape_imputation/results/start_stop_codon/hek_wc.shape')
    
    # get args
    args = parser.parse_args()
    util.print_args(parser.description, args)
    
    extract_start_codon_shape(species=args.species, shape=args.icshape, savefn=args.savefn)
示例#19
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Compare two shape column in a fragment file')

    parser.add_argument('--fragment', type=str, help='fragment file')
    parser.add_argument('--savefn', type=str, help='Pdf file to save plot')

    # get args
    args = parser.parse_args()
    util.print_args(parser.description, args)

    compare(fragment=args.fragment, savefn=args.savefn)
示例#20
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Complete a input shape.out')

    parser.add_argument(
        '--icshape',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out',
        help='icSHAPE out file')
    parser.add_argument('--species', type=str, default='human', help='Species')
    parser.add_argument('--predict_label',
                        type=str,
                        default='wc_all_fragment',
                        help='Predict_label')
    parser.add_argument(
        '--predict_model',
        type=str,
        default=
        'b28_trainLossall_GmultiplyX_randomNperfragmentpct0.3L20x10_randomNperValidate2',
        help='Model used to predict')
    parser.add_argument('--pct',
                        type=float,
                        default=0.5,
                        help='Max NULL percentage in fragment to predict')
    parser.add_argument('--window_len',
                        type=int,
                        default=100,
                        help='window_len')
    parser.add_argument('--sliding', type=int, default=10, help='sliding')
    parser.add_argument('--shape_null_pct',
                        type=float,
                        default=0.3,
                        help='Stop predict when remains pct(NULL) <= cutoff')
    parser.add_argument('--gpu_id', type=str, default="1", help='GPU id')

    # get args
    args = parser.parse_args()
    util.print_args('Complete a input shape.out', args)
    complete_shape_out_nullpct(icshape=args.icshape,
                               species=args.species,
                               predict_label=args.predict_label,
                               predict_model=args.predict_model,
                               pct=args.pct,
                               window_len=args.window_len,
                               sliding=args.sliding,
                               shape_null_pct=args.shape_null_pct,
                               gpu_id=args.gpu_id)
示例#21
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='CLIP data analysis')

    parser.add_argument(
        '--clip_bed',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip/STARBASE20007_DGCR8_HEK293T_trx.bed',
        help='Bed file of CLIP data')
    parser.add_argument(
        '--clip_bed_dir',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip',
        help='Dir to bed file')
    parser.add_argument('--bed_peak_len',
                        type=int,
                        default=10,
                        help='Min peak length to keep')
    parser.add_argument(
        '--icshape',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out',
        help='icSHAPE out file')
    parser.add_argument('--max_null_pct',
                        type=float,
                        default=0.4,
                        help='Max percentage of NULL values in peak regions')
    parser.add_argument(
        '--clip_table_list',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/CLIP/human.RBP.CLIP.combined.tbl',
        help='All CLIP table list')

    # get args
    args = parser.parse_args()
    util.print_args('CLIP data analysis', args)

    # read_bed(bed=args.clip_bed)
    # iclip_has_shape(bed=args.clip_bed, bed_peak_len=args.bed_peak_len, out=args.icshape, max_null_pct=args.max_null_pct)
    iclip_has_shape_batch(clip_table_list=args.clip_table_list,
                          clip_bed_dir=args.clip_bed_dir,
                          bed_peak_len=args.bed_peak_len,
                          out=args.icshape,
                          max_null_pct=args.max_null_pct)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Plot NULL corr across iterations')

    parser.add_argument(
        '--icshape',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.c80',
        help='predict file dir')
    parser.add_argument(
        '--predict_dir',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.predict',
        help='predict file dir')
    parser.add_argument(
        '--validation',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/validation_randomnullfragment/windowLen100.sliding100.validation.randomNperfragmentNullPct0.3.maxL20.S1234.txt',
        help='validation file')
    parser.add_argument('--max_iterations',
                        type=int,
                        default=100,
                        help='plot <= max_iterations')
    parser.add_argument('--tx',
                        type=str,
                        default='ENST00000331434',
                        help='shape plot of tx')
    parser.add_argument(
        '--icshape_true',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.c80',
        help='predict file dir')

    # get args
    args = parser.parse_args()
    util.print_args('Plot NULL corr across iterations', args)

    # plot_dir_null_corr(args)

    # generate_new_shape_out_with_validation_null(args)

    plot_tx_shape_iterations(args)
示例#23
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Generate N DA data for a train set')
    
    parser.add_argument('--txt', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/train_randomnullfragment_DA/windowLen100.sliding100.train.txt', help='Path to blastn file')
    parser.add_argument('--seed', type=int, default=1234, help='random seed')
    parser.add_argument('--times', type=int, default=20, help='DA times')
    parser.add_argument('--strategy', type=str, default='random', help='DA strategy: random|shadow_null_shuffle')
    
    # get args
    args = parser.parse_args()
    util.print_args('Generate N DA data for a train set', args)
    random.seed(args.seed)
    
    data_agumentation(txt=args.txt, times=args.times, strategy=args.strategy)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Search motif from a fasta')

    parser.add_argument('--species', type=str, default='human', help='Species')
    parser.add_argument(
        '--savefn',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/RBMbase/download_20191204/hg38_m6A_motif.bed',
        help='Savefn')

    # get args
    args = parser.parse_args()
    util.print_args('Search motif from a fasta', args)
    search(species=args.species, savefn=args.savefn)
示例#25
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Set random NULL position as other signal')

    parser.add_argument(
        '--txt',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo_rRNA/3.shape/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding100.fulllength18S.validation_randomNULL0.3.txt',
        help='validation data set')

    # get args
    args = parser.parse_args()
    util.print_args('Set random NULL position as other signal', args)
    convert(txt=args.txt)
示例#26
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Plot null pct scatter of common tx between two icshape.out'
    )

    parser.add_argument(
        '--icshape1',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out',
        help='icSHAPE out file1')
    parser.add_argument(
        '--icshape2',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.allfragment.0.5+exceed0.5.txt2.predict.out',
        help='icSHAPE out file2')
    parser.add_argument('--out1_label',
                        type=str,
                        default='True',
                        help='icSHAPE out file1 label')
    parser.add_argument('--out2_label',
                        type=str,
                        default='Predict',
                        help='icSHAPE out file2 label')
    parser.add_argument(
        '--savefn',
        type=str,
        default=
        '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.allfragment.0.5+exceed0.5.txt2.predict.out.scatter.pdf',
        help='Save plot file')
    parser.add_argument('--species', type=str, default='human', help='Species')

    # get args
    args = parser.parse_args()
    util.print_args(
        'Plot null pct scatter of common tx between two icshape.out', args)
    plot_shape_tx_null_pct(out1=args.icshape1,
                           out2=args.icshape2,
                           out1_label=args.out1_label,
                           out2_label=args.out2_label,
                           savefn=args.savefn,
                           species=args.species)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Generate a shuffle fragment based on a null pattern')

    parser.add_argument(
        '--txt',
        type=str,
        default=
        '/data/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/low_depth_null/sampling/windowLen100.sliding100.validation.low_60_1234.null_pattern.txt',
        help='Path to dir')

    args = parser.parse_args()
    util.print_args('Generate a shuffle fragment based on a null pattern',
                    args)

    null_pattern_to_bed(txt=args.txt)
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(description='Calc NULL/nonNULL value distribution in train/validation set')
    
    parser.add_argument('--data', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/c200T2/w100s100.train_null0.1.txt', help='Path to fragment file')
    parser.add_argument('--col', type=str, default='7:8', help='Columns to calc')
    parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/c200T2/w100s100.train_null0.1.stat.pdf', help='Path to save stat file')
    parser.add_argument('--dist_type', type=str, default='null_pattern', help='Type of dist')
    
    # get args
    args = parser.parse_args()
    util.print_args('Calc NULL/nonNULL value distribution in train/validation set', args)
    
    if 'null_pattern' in args.dist_type:
        null_count_dist(data=args.data, col=args.col, savefn=(args.data).replace('.txt', '.pdf'))
    if 'shape_value' in args.dist_type:
        reactivity_dist(data=args.data, col=args.col, savefn=args.savefn)
示例#29
0
def run_model() -> None:
    "Execute model according to the configuration"
    print('#' * 5, 'PARAMETERS', '#' * 5)
    print_args(ARGS)
    print('#' * 10, '\n\n')

    # Which model to use?
    build_fn, reader_type = common.get_modelfn_reader()
    reader = common.create_reader(reader_type)

    def optimiser(model: Model) -> torch.optim.Optimizer:
        return AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3)

    # Create SAVE_FOLDER if it doesn't exist
    ARGS.SAVE_PATH.mkdir(exist_ok=True, parents=True)
    train_dataset = load_data(data_path=ARGS.TRAIN_DATA_PATH,
                              reader=reader,
                              pre_processed_path=ARGS.TRAIN_PREPROCESSED_PATH)
    val_dataset = load_data(data_path=ARGS.VAL_DATA_PATH,
                            reader=reader,
                            pre_processed_path=ARGS.VAL_PREPROCESSED_PATH)
    test_dataset = load_data(data_path=ARGS.TEST_DATA_PATH,
                             reader=reader,
                             pre_processed_path=ARGS.TEST_PREPROCESSED_PATH)

    model = train_model(build_fn,
                        train_data=train_dataset,
                        val_data=val_dataset,
                        test_data=test_dataset,
                        save_path=ARGS.SAVE_PATH,
                        num_epochs=ARGS.NUM_EPOCHS,
                        batch_size=ARGS.BATCH_SIZE,
                        optimiser_fn=optimiser,
                        cuda_device=ARGS.CUDA_DEVICE,
                        sorting_keys=reader.keys)

    common.evaluate(model, reader, test_dataset)
    result = make_prediction(model, reader, verbose=False)
    common.error_analysis(model, test_dataset)

    print('Save path', ARGS.SAVE_PATH)

    cuda_device = 0 if is_cuda(model) else -1
    test_load(build_fn, reader, ARGS.SAVE_PATH, result, cuda_device)
示例#30
0
def main():
    ####################################################################
    ### define parser of arguments
    parser = argparse.ArgumentParser(
        description='Plot AUC for single known structure')

    parser.add_argument('--dot', type=str, help='Dot file for known structure')
    parser.add_argument('--validate', type=str, help='Validate fragment file')
    parser.add_argument('--predict', type=str, help='Predicted fragment file')
    parser.add_argument('--tx', type=str, help='Transcript to plot')
    parser.add_argument('--start',
                        type=int,
                        default=0,
                        metavar='N',
                        help='Dot start index')
    parser.add_argument('--savefn', type=str, help='Pdf file to save plot')
    parser.add_argument('--title',
                        type=str,
                        default='',
                        help='Title of the plot')
    parser.add_argument(
        '--predict_bases',
        type=str,
        default='ATCG',
        help='Bases considered while calc AUC for predict sample')
    parser.add_argument(
        '--validate_bases',
        type=str,
        default='ATCG',
        help='Bases considered while calc AUC for validate sample')

    # get args
    args = parser.parse_args()
    util.print_args(parser.description, args)

    known_structure_compare(dot=args.dot,
                            validate=args.validate,
                            predict=args.predict,
                            tx=args.tx,
                            start=args.start,
                            savefn=args.savefn,
                            title=args.title,
                            predict_bases=args.predict_bases,
                            validate_bases=args.validate_bases)