#!/usr/bin/env python import sys import pygeneann import sequtils import pysam import argparse parser = argparse.ArgumentParser() parser.add_argument('cff_file', action='store', help='CFF file, can be .cff or cff.reann') parser.add_argument('ensbed', action='store', help='Ensemble gene file') parser.add_argument('ref_fa', action='store', help='Reference genome file') args = parser.parse_args() gene_ann = pygeneann.GeneAnnotation(args.ensbed) ref = pysam.FastaFile(args.ref_fa) def remove_underscores(gene): return gene.replace("_",".") seq_dict ={} for line in open(args.cff_file, "r"): fusion = pygeneann.CffFusion(line) # in a downstream script, "_" is used as a field separator. Need to remove "_" from gene names gene1 = remove_underscores(fusion.reann_gene1) gene2 = remove_underscores(fusion.reann_gene2) lib = fusion.library fusion_id = fusion.fusion_id
parser.add_argument('gene_bed', action='store', help='Gene annotation bed file') parser.add_argument('tmp_dir', action='store', help='Temp file directory') args = parser.parse_args() cff_file = args.cff_file dna_bam_list = args.dna_bam_list out_dir = args.tmp_dir gene_ann_file = args.gene_bed #dna_bam = pysam.AlignmentFile(dna_bam_file, "rb") bam_dict = load_bam_dict(dna_bam_list) gene_ann = pygeneann.GeneAnnotation(gene_ann_file) win_size = 100000 rlen = 100 isize = 500 supp_cluster_num = -99 # No dna file: -1; gene not in annotation: -2; chr not in bam's refrerence: -3; confilicting windonw start and end: -4 for line in open(cff_file, "r"): fusion = pygeneann.CffFusion(line) if fusion.sample_name in bam_dict: dna_bam_file = bam_dict[fusion.sample_name] dna_bam = pysam.AlignmentFile(dna_bam_file, "rb") else: #print >> sys.stderr, fusion.sample_name, "has no dna file." supp_cluster_num = -1 #print "Fusion:", line.strip(), supp_cluster_num
#!/usr/bin/env python import sys sys.path.append( "/hpf/largeprojects/ccmbio/jiangyue/DIPG_analysis_by_samples/Scripts/pygeneann/pygenefusionann" ) import pygeneann import sequtils import pysam cff_file = sys.argv[1] #ref_fa = sys.argv[2] ensbed = sys.argv[2] # gene order annotation test ref = sys.argv[3] gene_ann = pygeneann.GeneAnnotation(ensbed) n = 1 for line in open(cff_file, "r"): fusion = pygeneann.CffFusion(line) fusion.ann_gene_order(gene_ann) #annotate fusion id and seq fusion.fusion_id = "F" + (str(n)).zfill(8) pygeneann.get_fusion_seq(fusion, ref, 100) fusion.check_codon(gene_ann, ref) print fusion.tostring() n += 1