#!/usr/bin/env python
import sys
import pygeneann
import sequtils
import pysam
import argparse

parser = argparse.ArgumentParser()

parser.add_argument('cff_file', action='store', help='CFF file, can be .cff or cff.reann')
parser.add_argument('ensbed', action='store', help='Ensemble gene file')
parser.add_argument('ref_fa', action='store', help='Reference genome file')

args = parser.parse_args()

gene_ann = pygeneann.GeneAnnotation(args.ensbed)
	
ref = pysam.FastaFile(args.ref_fa)

def remove_underscores(gene):
    return gene.replace("_",".")

seq_dict ={}
for line in open(args.cff_file, "r"):
	fusion = pygeneann.CffFusion(line)
        # in a downstream script, "_" is used as a field separator. Need to remove "_" from gene names 
        gene1 = remove_underscores(fusion.reann_gene1)
        gene2 = remove_underscores(fusion.reann_gene2)
        lib = fusion.library
        fusion_id = fusion.fusion_id
	
Пример #2
0
parser.add_argument('gene_bed', action='store', help='Gene annotation bed file')
parser.add_argument('tmp_dir', action='store', help='Temp file directory')

args = parser.parse_args()



cff_file = args.cff_file
dna_bam_list = args.dna_bam_list
out_dir = args.tmp_dir
gene_ann_file = args.gene_bed

#dna_bam =  pysam.AlignmentFile(dna_bam_file, "rb")
bam_dict = load_bam_dict(dna_bam_list)

gene_ann = pygeneann.GeneAnnotation(gene_ann_file)


win_size = 100000
rlen = 100
isize = 500
supp_cluster_num = -99 # No dna file: -1; gene not in annotation: -2; chr not in bam's refrerence: -3; confilicting windonw start and end: -4 
for line in open(cff_file, "r"):
	fusion = pygeneann.CffFusion(line)
	if fusion.sample_name in bam_dict:
		dna_bam_file = bam_dict[fusion.sample_name]
		dna_bam =  pysam.AlignmentFile(dna_bam_file, "rb")
	else:
		#print >> sys.stderr, fusion.sample_name, "has no dna file."
		supp_cluster_num = -1 
		#print "Fusion:", line.strip(), supp_cluster_num
Пример #3
0
#!/usr/bin/env python
import sys

sys.path.append(
    "/hpf/largeprojects/ccmbio/jiangyue/DIPG_analysis_by_samples/Scripts/pygeneann/pygenefusionann"
)
import pygeneann
import sequtils
import pysam

cff_file = sys.argv[1]
#ref_fa = sys.argv[2]
ensbed = sys.argv[2]
# gene order annotation test
ref = sys.argv[3]
gene_ann = pygeneann.GeneAnnotation(ensbed)

n = 1
for line in open(cff_file, "r"):
    fusion = pygeneann.CffFusion(line)
    fusion.ann_gene_order(gene_ann)
    #annotate fusion id and seq
    fusion.fusion_id = "F" + (str(n)).zfill(8)
    pygeneann.get_fusion_seq(fusion, ref, 100)

    fusion.check_codon(gene_ann, ref)
    print fusion.tostring()
    n += 1