def main():
    args = parse_args()
    classifiers = [AbutsUnknownBases, StartOutOfFrame, BadFrame, BeginStart, EndStop, CdsGap, UtrGap, UnknownGap,
                   CdsNonCanonSplice, CdsUnknownSplice, UtrNonCanonSplice, UtrUnknownSplice, InFrameStop, ShortCds,
                   UnknownBases]
    classify_dicts = {}
    details_dicts = {}
    fn_args = {"transcript_dict": get_transcript_dict(args.refGp), "seq_dict": seq_lib.get_sequence_dict(args.refFasta)}
    for fn in classifiers:
        classify_dicts[fn.__name__], details_dicts[fn.__name__] = fn(**fn_args)
    for data_dict, database_path in itertools.izip(*[[classify_dicts, details_dicts], ["classify.db", "details.db"]]):
        sql_lib.write_dict(data_dict, database_path, args.refGenome)
Exemplo n.º 2
0
import itertools

gp = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/transMap/gorilla/transMapGencodeBasicV23.gp"
ref_gp = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/data/wgEncodeGencodeBasicV23.gp"
aug_gp = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/augustus/tmr/gorilla.output.gp"
aln_psl = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/transMap/gorilla/transMapGencodeBasicV23.psl"
ref_psl =  "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/transMap/2015-10-06/data/wgEncodeGencodeBasicV23.psl"
ref_fasta = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/assemblies/susie_3_2/human.fa"
target_fasta = "/hive/groups/recon/projs/gorilla_eichler/pipeline_data/assemblies/susie_3_2/gorilla.fa"

tx_dict = seq_lib.get_transcript_dict(gp)
ref_dict = seq_lib.get_transcript_dict(ref_gp)
aug_dict = seq_lib.get_transcript_dict(aug_gp)
aln_dict = psl_lib.get_alignment_dict(aln_psl)
ref_aln_dict = psl_lib.get_alignment_dict(ref_psl)
seq_dict = seq_lib.get_sequence_dict(target_fasta)
ref_seq_dict = seq_lib.get_sequence_dict(ref_fasta)

con, cur = sql_lib.attach_databases("/hive/groups/recon/projs/gorilla_eichler/pipeline_data/comparative/susie_3_2/comparativeAnnotation/2015-10-12/GencodeBasicV23", mode="augustus")

genome = 'gorilla'
ref_genome = 'human'
biotype = 'protein_coding'
filter_chroms = ["Y", "chrY"]

stats = merge_stats(cur, 'gorilla')
highest_cov_dict = sql_lib.highest_cov_aln(cur, "gorilla")
highest_cov_ids = set(zip(*highest_cov_dict.itervalues())[0])
biotype_ids = sql_lib.get_biotype_aln_ids(cur, 'gorilla', 'protein_coding')
highest_cov_ids &= biotype_ids
best_stats = {x: y for x, y in stats.iteritems() if psl_lib.remove_augustus_alignment_number(x) in highest_cov_ids}
Exemplo n.º 3
0
 def get_fasta(self):
     self.ref_seq_dict = seq_lib.get_sequence_dict(self.ref_fasta)
 def getSeqDict(self):
     self.seqDict = seq_lib.get_sequence_dict(self.fasta)
 def getRefDict(self):
     self.refDict = seq_lib.get_sequence_dict(self.refFasta)
 def getSeqDict(self):
     self.seqDict = seq_lib.get_sequence_dict(self.fasta)
 def getRefDict(self):
     self.refDict = seq_lib.get_sequence_dict(self.refFasta)