Exemplo n.º 1
0
def _execute_tRNA_Scan(options):
    global errorcode
    args = []

    if options.trna_executable:
        args.append(options.trna_executable)

    if options.trna_i:
        args += ["-i", options.trna_i]

    if options.trna_o:
        args += ["-o", options.trna_o]

    if options.trna_D:
        args += ["-D", options.trna_D]

    if options.trna_T:
        args += ["-T", options.trna_T]

    if options.trna_F:
        args += ["-F", options.trna_F]
    result = getstatusoutput(' '.join(args))

    if result[0] != 0:
        insert_error(errorcode)
    return result
def  _execute_tRNA_Scan(options):
    global errorcode
    args= [ ]

    if options.trna_executable :
       args.append( options.trna_executable )

    if options.trna_i:
       args += [ "-i", options.trna_i ]
    
    if options.trna_o:
       args += [ "-o", options.trna_o ]

    if options.trna_D:
       args += [ "-D",  options.trna_D ]
    
    if options.trna_T:
       args += [ "-T",  options.trna_T ]
    
    if options.trna_F:
       args += [ "-F",  options.trna_F]
    result = getstatusoutput(' '.join(args) )

    if result[0]!=0:
       insert_error(errorcode)
    return result
def write_annotation_for_orf(outputgff_file, candidatedbname, dbname_weight,
                             results_dictionary, orf_dictionary, contig,
                             candidate_orf_pos, orfid, compact_output):
    global errorcode
    try:
        fields = [
            'source', 'feature', 'start', 'end', 'score', 'strand', 'frame'
        ]

        output_line = orf_dictionary[contig][candidate_orf_pos]['seqname']

        #if compact_output:
        #output_line = ShortenContigId(output_line)

        for field in fields:
            output_line += "\t" + str(
                orf_dictionary[contig][candidate_orf_pos][field])

        #if compact_output:
        try:
            attributes = "ID=" + ShortenORFId(
                orf_dictionary[contig][candidate_orf_pos]['id'])
            attributes += ";" + "locus_tag=" + ShortenORFId(
                orf_dictionary[contig][candidate_orf_pos]['locus_tag'])
        except:
            attributes = "ID=" + orf_dictionary[contig][candidate_orf_pos]['id']
            attributes += ";" + "locus_tag=" + orf_dictionary[contig][
                candidate_orf_pos]['locus_tag']

        attributes += ";" + "contig_length=" + orf_dictionary[contig][
            candidate_orf_pos]['contig_length']
        attributes += ";" + "orf_length=" + orf_dictionary[contig][
            candidate_orf_pos]['orf_length']
        attributes += ";" + "partial=" + orf_dictionary[contig][
            candidate_orf_pos]['partial']
        attributes += ";" + "sourcedb=" + candidatedbname

        if candidatedbname in results_dictionary:
            attributes += ";" + "annotvalue=" + str(
                results_dictionary[candidatedbname][orfid]['value'])
            attributes += ";" + "ec=" + str(
                results_dictionary[candidatedbname][orfid]['ec'])
            attributes += ";" + "product=" + results_dictionary[
                candidatedbname][orfid]['product']
        else:
            attributes += ";" + "annotvalue=" + str('0')
            attributes += ";" + "ec=" + str('')
            attributes += ";" + "product=" + 'hypothetical protein'

        output_line += '\t' + attributes

        if candidatedbname in results_dictionary:
            fprintf(outputgff_file, "%s\n", output_line)
    except:
        eprintf("ERROR : Failure to annotate in contig %s\n", contig)
        #print orf_dictionary[contig]
        print traceback.print_exc(10)
        insert_error(errorcode)
        exit_process()
Exemplo n.º 4
0
def MetaPathways_parse_blast(argv, errorlogger=None, runstatslogger=None):
    createParser()
    try:
        main(argv, errorlogger=errorlogger, runstatslogger=runstatslogger)
    except:
        insert_error(5)
        return (0, '')

    return (0, '')
Exemplo n.º 5
0
def Multiseq_preprocess(argv, errorlogger=None, runstatslogger=None):
    createParser()
    try:
        main(argv, errorlogger=errorlogger, runstatslogger=runstatslogger)
    except:
        insert_error(1)
        return (1, '')

    return (0, '')
def MetaPathways_parse_blast(argv, errorlogger = None, runstatslogger = None):       
    createParser()
    try:
       main(argv, errorlogger = errorlogger, runstatslogger = runstatslogger)
    except:
       insert_error(5)
       return (0,'')

    return (0,'')
def MetaPathways_filter_input(argv, errorlogger = None, runstatslogger = None):
    createParser()
    global errorcode
    try:
       main(argv, errorlogger = errorlogger, runstatslogger = runstatslogger) 
    except:
       insert_error(errorcode)
       return (0,'')

    return (0,'')
Exemplo n.º 8
0
def MetaPathways_filter_input(argv, errorlogger=None, runstatslogger=None):
    createParser()
    global errorcode
    try:
        main(argv, errorlogger=errorlogger, runstatslogger=runstatslogger)
    except:
        insert_error(errorcode)
        return (0, '')

    return (0, '')
def MetaPathways_run_pathologic(argv, extra_command = None, errorlogger = None, runstatslogger =None): 
    if errorlogger != None:
       errorlogger.write("#STEP\tBUILD_PGDB\n")
    createParser()
    try:
       main(argv, errorlogger = errorlogger, runcommand= extra_command, runstatslogger = runstatslogger)
    except:
       insert_error(error_code)
       return (1,'Error running pathologic')
    return (0,'')
Exemplo n.º 10
0
def MetaPathways_annotate_fast(argv, errorlogger=None, runstatslogger=None):
    createParser()
    errorlogger.write("#STEP\tANNOTATE_ORFS\n")
    try:
        main(argv, errorlogger=errorlogger, runstatslogger=runstatslogger)
    except:
        insert_error(errorcode)
        return (0, '')

    return (0, '')
def MetaPathways_create_amino_sequences(argv, errorlogger = None, runstatslogger = None):
    global errorcode
    createParser()
    try:
       res = main(argv, errorlogger = errorlogger, runstatslogger = runstatslogger)
    except:
       insert_error(errorcode)
       return (1,'')

    return (res[0], res[1])
def MetaPathways_run_pathologic(argv, extra_command = None, errorlogger = None, runstatslogger =None): 
    if errorlogger != None:
       errorlogger.write("#STEP\tBUILD_PGDB\n")
    createParser()
    try:
       main(argv, errorlogger = errorlogger, runcommand= extra_command, runstatslogger = runstatslogger)
    except:
       insert_error(error_code)
       return (1,'Error running pathologic')
    return (0,'')
Exemplo n.º 13
0
def MetaPathways_orf_prediction(argv, extra_command = None, errorlogger = None, runstatslogger =None): 
    global errorcode
    if errorlogger != None:
       errorlogger.write("#STEP\tORF_PREDICTION\n")
    createParser()
    try:
       main(argv, errorlogger = errorlogger, runcommand= extra_command, runstatslogger = runstatslogger)
    except:
       insert_error(errrocode)

    return (0,'')
def MetaPathways_orf_prediction(argv, extra_command = None, errorlogger = None, runstatslogger =None): 
    global errorcode
    if errorlogger != None:
       errorlogger.write("#STEP\tORF_PREDICTION\n")
    createParser()
    try:
       main(argv, errorlogger = errorlogger, runcommand= extra_command, runstatslogger = runstatslogger)
    except:
       insert_error(errrocode)

    return (0,'')
def MetaPathways_refscore(argv, errorlogger = None, runstatslogger = None):
    createParser( )
    if errorlogger:
       errorlogger.write("#STEP\tCOMPUTE_REFSCORE\n")
    try:
       main(argv, errorlogger = errorlogger, runstatslogger = runstatslogger)
    except:
       insert_error(15)
       return (0,traceback.format_exc(10))

    return (0,'')
Exemplo n.º 16
0
def MetaPathways_refscore(argv, errorlogger=None, runstatslogger=None):
    createParser()
    if errorlogger:
        errorlogger.write("#STEP\tCOMPUTE_REFSCORE\n")
    try:
        main(argv, errorlogger=errorlogger, runstatslogger=runstatslogger)
    except:
        insert_error(15)
        return (0, traceback.format_exc(10))

    return (0, '')
def MetaPathways_annotate_fast(argv, errorlogger = None, runstatslogger = None):       
    createParser()
    errorlogger.write("#STEP\tANNOTATE_ORFS\n")
    try:
       main(argv, errorlogger = errorlogger, runstatslogger = runstatslogger)
    except:
       insert_error(errorcode)
       return (0,'')


    return (0,'')
def MetaPathways_func_search(argv, extra_command = None, errorlogger = None, runstatslogger =None): 
    if errorlogger != None:
       errorlogger.write("#STEP\tFUNC_SEARCH\n")
    createParser()
    try:
       code = main(argv, errorlogger = errorlogger, runcommand= extra_command, runstatslogger = runstatslogger)
    except:
       insert_error(4)
       return (0,'')

    return (0,'')
def MetaPathways_rRNA_stats_calculator(argv, extra_command = None, errorlogger = None, runstatslogger =None): 
    if errorlogger != None:
       errorlogger.write("#STEP\tSTATS_rRNA\n")
    createParser()

    try:
      main(argv, errorlogger = errorlogger, runcommand= extra_command, runstatslogger = runstatslogger)
    except:
       insert_error(6)
       return (0,'')
    
    return (0,'')
def main(argv, errorlogger =None, runstatslogger = None): 
    global parser
    (opts, args) = parser.parse_args(argv)

    if not check_arguments(opts, args):
       print usage
       sys.exit(0)

    results_dictionary={}
    dbname_weight={}

    contig_lengths = {}     
    read_contig_lengths(opts.contig_map_file, contig_lengths) 

    
    if opts.blastdir !=None and opts.sample_name != None:
        try:
           database_names, input_blastouts, weight_dbs = getBlastFileNames(opts) 
        except:
           print traceback.print_exc(10)
           insert_error(errorcode)
           pass
    else:
        database_names = opts.database_name
        input_blastouts = opts.input_blastout 
        weight_dbs = opts.weight_db 


    priority = 6000
    count_annotations = {}
    
    print ''
    for dbname, blastoutput, weight in zip(database_names, input_blastouts, weight_dbs): 
        results_dictionary[dbname]={}
        dbname_weight[dbname] = weight
        count = process_parsed_blastoutput(dbname, weight, blastoutput, opts, results_dictionary[dbname])
        if runstatslogger!=None:
           runstatslogger.write("%s\tProtein Annotations from %s\t%s\n" %( str(priority), dbname, str(count)))
        count_annotations 
        priority += 1

    for dbname in results_dictionary: 
      print dbname, len(results_dictionary[dbname].keys())
      for seqname in results_dictionary[dbname]: 
         count_annotations[seqname] = True      
    count = len(count_annotations)
    if runstatslogger!=None:
       runstatslogger.write("%s\tTotal Protein Annotations\t%s\n" %( str(priority),  str(count)))
        

    #create the annotations from he results
    create_annotation(dbname_weight, results_dictionary, opts.input_gff, opts.rRNA_16S, opts.tRNA, opts.output_gff, opts.output_comparative_annotation, contig_lengths, compact_output = opts.compact_output)
def MetaPathways_tRNA_scan(argv, extra_command = None, errorlogger = None, runstatslogger =None): 
    global errorcode
    if errorlogger != None:
       errorlogger.write("#STEP\ttRNA_SCAN\n")
    createParser()
    result =[0, '']
    try:
       result = main(argv, errorlogger = errorlogger, runcommand= extra_command, runstatslogger = runstatslogger)
    except:
       insert_error(errorcode)
       return (res[0], res[1])
    
    return (result[0],'')
def write_new_file(lines, output_file):
    
    print("Fixing file " + output_file )
    try:
       outputfile = open(output_file,'w')
       pass
    except IOError:
         print("ERROR :Cannot open output file "  + output_file)
         insert_error(9)
   
    for line in lines:
       fprintf(outputfile, "%s\n", line)

    outputfile.close()
def MetaPathways_create_amino_sequences(argv,
                                        errorlogger=None,
                                        runstatslogger=None):
    global errorcode
    createParser()
    try:
        res = main(argv,
                   errorlogger=errorlogger,
                   runstatslogger=runstatslogger)
    except:
        insert_error(errorcode)
        return (1, '')

    return (res[0], res[1])
def write_new_file(lines, output_file):
    
    print "Fixing file " + output_file 
    try:
       outputfile = open(output_file,'w')
       pass
    except IOError:
         print "ERROR :Cannot open output file "  + output_file
         insert_error(9)
   
    for line in lines:
       fprintf(outputfile, "%s\n", line)

    outputfile.close()
def write_annotation_for_orf(outputgff_file, candidatedbname, dbname_weight, results_dictionary, orf_dictionary, contig, candidate_orf_pos,  orfid, compact_output):
   global errorcode
   try:
      fields = [  'source', 'feature', 'start', 'end', 'score', 'strand', 'frame' ]


      output_line= orf_dictionary[contig][candidate_orf_pos]['seqname']

      #if compact_output:
      #output_line = ShortenContigId(output_line)

      for field in fields:
         output_line += "\t"+ str(orf_dictionary[contig][candidate_orf_pos][field])

      #if compact_output:
      try:
         attributes = "ID="+ShortenORFId(orf_dictionary[contig][candidate_orf_pos]['id'])
         attributes += ";" + "locus_tag="+ShortenORFId(orf_dictionary[contig][candidate_orf_pos]['locus_tag'])
      except:
         attributes = "ID="+orf_dictionary[contig][candidate_orf_pos]['id']
         attributes += ";" + "locus_tag="+orf_dictionary[contig][candidate_orf_pos]['locus_tag']


      attributes += ";" + "contig_length="+orf_dictionary[contig][candidate_orf_pos]['contig_length']
      attributes += ";" + "orf_length="+orf_dictionary[contig][candidate_orf_pos]['orf_length']
      attributes += ";" + "partial="+orf_dictionary[contig][candidate_orf_pos]['partial']
      attributes += ";" + "sourcedb="+candidatedbname
     
      if candidatedbname in results_dictionary:
         attributes += ";" + "annotvalue="+str(results_dictionary[candidatedbname][orfid]['value'])
         attributes += ";" + "ec="+str(results_dictionary[candidatedbname][orfid]['ec'])
         attributes += ";" + "product="+results_dictionary[candidatedbname][orfid]['product']
      else:
         attributes += ";" + "annotvalue="+str('0')
         attributes += ";" + "ec="+str('')
         attributes += ";" + "product="+'hypothetical protein'

      output_line += '\t' + attributes

      if candidatedbname in results_dictionary:
         fprintf(outputgff_file, "%s\n", output_line);
   except:
      eprintf("ERROR : Failure to annotate in contig %s\n", contig)
      #print orf_dictionary[contig]
      print traceback.print_exc(10)
      insert_error(errorcode)
      exit_process()
Exemplo n.º 26
0
def read_contig_lengths(contig_map_file, contig_lengths):
    try:
        mapfile = open(contig_map_file, 'r')
    except IOError:
        insert_error(errorcode)
        return

    mapfile_lines = mapfile.readlines()
    mapfile.close()

    for line in mapfile_lines:
        line = line.strip()
        fields = [x.strip() for x in line.split('\t')]
        if len(fields) != 3:
            contig_lengths = {}
            return
        contig_lengths[fields[0]] = int(fields[2])
Exemplo n.º 27
0
def MetaPathways_func_search(argv,
                             extra_command=None,
                             errorlogger=None,
                             runstatslogger=None):
    if errorlogger != None:
        errorlogger.write("#STEP\tFUNC_SEARCH\n")
    createParser()
    try:
        code = main(argv,
                    errorlogger=errorlogger,
                    runcommand=extra_command,
                    runstatslogger=runstatslogger)
    except:
        insert_error(4)
        return (0, '')

    return (0, '')
def read_contig_lengths(contig_map_file, contig_lengths):
     try:
        mapfile = open(contig_map_file, 'r')
     except IOError:
        print "Cannot read file " + contig_map_file + " !"
        insert_error(errorcode)
        return

     mapfile_lines = mapfile.readlines()
     mapfile.close()
     
     for line in mapfile_lines:
        line = line.strip() 
        fields = [ x.strip() for x in line.split('\t') ]
        if len(fields) != 3: 
            contig_lengths = {}
            return 
        contig_lengths[fields[0] ] = int(fields[2])
def MetaPathways_rRNA_stats_calculator(argv,
                                       extra_command=None,
                                       errorlogger=None,
                                       runstatslogger=None):
    if errorlogger != None:
        errorlogger.write("#STEP\tSTATS_rRNA\n")
    createParser()

    try:
        main(argv,
             errorlogger=errorlogger,
             runcommand=extra_command,
             runstatslogger=runstatslogger)
    except:
        insert_error(6)
        return (0, '')

    return (0, '')
Exemplo n.º 30
0
def MetaPathways_tRNA_scan(argv,
                           extra_command=None,
                           errorlogger=None,
                           runstatslogger=None):
    global errorcode
    if errorlogger != None:
        errorlogger.write("#STEP\ttRNA_SCAN\n")
    createParser()
    result = [0, '']
    try:
        result = main(argv,
                      errorlogger=errorlogger,
                      runcommand=extra_command,
                      runstatslogger=runstatslogger)
    except:
        insert_error(errorcode)
        return (res[0], res[1])

    return (result[0], '')
Exemplo n.º 31
0
def halt_process(secs=4, verbose=False):
    time.sleep(secs)

    errors = get_error_list()
    if len(errors) > 1:
        insert_error(200)

    if verbose:
        for errorcode in errors.keys():
            eprintf("ERROR:\t%d\t%s\n", errorcode, errors[errorcode])

    if len(errors.keys()) > 1:
        errorcode = 200
        _exit(errorcode)
    elif len(errors.keys()) == 1:
        errorcode = errors.keys()[0]
        _exit(errorcode)

    _exit(0)
def halt_process(secs=4, verbose =False):
    time.sleep(secs)

    errors=get_error_list()
    if len(errors)>1:
       insert_error(200)

    if verbose:
      for errorcode in errors.keys():
         eprintf("ERROR:\t%d\t%s\n",errorcode, errors[errorcode])
      
    if len(errors.keys())>1:
        errorcode = 200
        _exit(errorcode)
    elif len(errors.keys())==1:
        errorcode = errors.keys()[0]
        _exit(errorcode)

    _exit(0)
def main(argv, errorlogger = None, runcommand = None, runstatslogger = None):
    global parser

    options, args = parser.parse_args(argv)
    if options.inputfolder ==None:
       parser.error('ERROR\tInput folder for Pathologic not found')
    else:
      # required files to be able to build ePGDB
      files = [ 
                #options.inputfolder + PATHDELIM + '0.pf',
                # options.inputfolder + PATHDELIM + '0.fasta',
                options.inputfolder + PATHDELIM + 'genetic-elements.dat',  
                options.inputfolder + PATHDELIM + 'organism-params.dat'
              ]

      if files_exist( files , errorlogger = errorlogger):
        exit_process("ERROR\tCannot find all inputs for Pathologic in folder %s : "  %(options.inputfolder) )

    # is there a pathwaytools executable installed
    if not path.exists(options.ptoolsExec):
       eprintf("ERROR\tPathwayTools executable %s not found!\n", options.ptoolsExec)
       if errorlogger:
          errorlogger.printf("ERROR\tPathwayTools executable %s not found!\n",  options.ptoolsExec)
       exit_process("ERROR\tPathwayTools executable %s not found!\n" %(options.ptoolsExec))


    # command to build the ePGDB
    command = "%s -patho %s"  %(options.ptoolsExec, options.inputfolder)
    if options.no_taxonomic_pruning:
       command += " -no-taxonomic-pruning "

    if options.no_web_cel_overview:
       command += " -no-web-cel-overview"

    command += " -tip"
    command += " -api"

    status =0
    fix_pgdb_input_files(options.pgdbdir, pgdbs = [])


    if not path.exists(options.pgdbdir):
      status  = runPathologicCommand(runcommand = command) 
      fix_pgdb_input_files(options.pgdbdir, pgdbs = [])
    if status!=0:
       eprintf("ERROR\tFailed to run Pathologic on input %s : \n" %(options.inputfolder))
       eprintf("INFO\tKill any other PathwayTools instance running on the machine and try again\n")
       if errorlogger:
          errorlogger.write("ERROR\tFailed to run Pathologic on input %s : " %(options.inputfolder))
          errorlogger.write("INFO\tKill any other PathwayTools instance running on the machine and try again")
          errorlogger.write("     : " + command)
          insert_error(9)
       sys.exit(0)
       #exit_process("ERROR\tFailed to run Pathologic on input %s : "  %(options.inputfolder) )


    if not path.exists(options.reactions_list):
       try:
           pythonCyc = startPathwayTools(options.sample_name.lower(), options.ptoolsExec, True)
           pythonCyc.setDebug() # disable pathway debug statements
           printf("INFO\tExtracting the reaction list from ePGDB " + options.sample_name + "\n")
           resultLines = pythonCyc.getReactionListLines()
           #pythonCyc.stopPathwayTools()
           reaction_list_file = open(options.reactions_list + ".tmp", 'w')
           for line in resultLines:
              fprintf(reaction_list_file,"%s\n",line.strip())
           reaction_list_file.close()
           rename(options.reactions_list + ".tmp", options.reactions_list)
           StopPathwayTools()

       except:
           print(traceback.print_exc(10))
           eprintf("ERROR\tFailed to run extract pathways for %s : \n" %(options.sample_name))
           eprintf("INFO\tKill any other PathwayTools instance running on the machine and try again")
           if errorlogger:
               errorlogger.write("ERROR\tFailed to run extract pathways for %s : " %(options.sample_name))
               errorlogger.write("INFO\tKill any other PathwayTools instance running on the machine and try again\n")
           insert_error(9)
           StopPathwayTools()

    if not path.exists(options.table_out):
        ExtractPathway_WTD(options)
def  ExtractPathway_WTD(options):
    # Extract pathways and WTD
   # place to store list of expected taxonomic range(s)
    printf('\n')
    printf('INFO\tEntering the WTD calculations!\n')
    serialized_metacyc_taxa_ranges = "/tmp/metacyc_pwy_taxa_range.pk"

    try:
        #print  options.wtd,  not path.isfile(serialized_metacyc_taxa_ranges),  serialized_metacyc_taxa_ranges
        if options.wtd and not path.isfile(serialized_metacyc_taxa_ranges):
            # get MetaCyc's expected taxonomic range(s) and serialize for later use in /tmp
            # try:
            printf('INFO\tGetting MetaCyc Expected Taxonomic Range(s)\n')
            pythonCyc = startPathwayTools('meta', options.ptoolsExec, True)

            pwys = pythonCyc.getAllPathways()

            pwy_taxa_range = {} # hash from pwy to expected taxonomic range(s)
            pwy_taxa_range_pk = open(serialized_metacyc_taxa_ranges ,"w")

            # get expected taxonomic ranges for each pathway
            for pwy in pwys:
                printf(" " + pwy) 
                my_expected_taxonomic_range = pythonCyc.getExpectedTaxonomicRange(pwy)
                pwy_taxa_range[pwy] = my_expected_taxonomic_range
            # printf(" " + pwy)

            # write the pathway
            pickle.dump(pwy_taxa_range, pwy_taxa_range_pk)
            pwy_taxa_range_pk.close()
            StopPathwayTools()

        # read expected taxonomic range from serialized file
        exepected_taxa_in = open(serialized_metacyc_taxa_ranges ,"r")
        pwy_taxa_range = pickle.load(exepected_taxa_in)

        # create mapping of preferred NCBI to MEGAN taxonomy
        megan_map = {}
        if options.ncbi_megan_map:
            with open(options.ncbi_megan_map) as megan_map_file:
                for line in megan_map_file:
                    fields = line.split("\t")
                    fields = map(str.strip, fields)
                    megan_map[ fields[0] ] = fields[1]

        # get ORF to taxa map from annotation_table
        printf("INFO\tGetting ORF to Taxa Map from AnnotationTable\n")
        orf_lca = {}
        with open(options.annotation_table) as f:
            for line in f:
                fields = line.split("\t")
                orf_lca[fields[0].strip()] = fields[8].strip()

        # get pathway ORFs and Rxns
        pwy_to_orfs = {}
        pwy_to_long = {}
        pwy_to_rxns = {}
        try:
            pythonCyc = startPathwayTools(options.sample_name.lower(), options.ptoolsExec, True)
            pwys = pythonCyc.getAllPathways()

            for pwy in pwys:
                printf(" " + pwy)
                genes = pythonCyc.getPathwayORFs(pwy)
                rxns = pythonCyc.getPathwayReactionInfo(pwy)
                pwy_to_orfs[pwy] = genes
                pwy_to_long[pwy] = cleanup(pythonCyc.get_slot_value(pwy, "common-name"))
                pwy_to_rxns[pwy] = rxns
            # printf("\n")
            StopPathwayTools()

        except:
            insert_error(9)
            print("""
            Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
            """)
    except:
        print("""
        Problem calculating WTD via Pathway Tools. Check the /tmp/ptools-socket file.
        """)
        insert_error(9)

    # get LCA per pathway
    pwy_lca = {}
    # load NCBI taxonomy map
    printf("\nINFO\tLoading NCBI Taxonomy Map\n")
    lca = LCAComputation([ options.ncbi_tree ], )

    for pwy in pwy_to_orfs:
        orfs = pwy_to_orfs[pwy]
        taxa_ids = []
        for orf in orfs:
            if orf in orf_lca:
                # could strip out id here
                res = re.search("(.+?)\(([0-9]+?)\)",  orf_lca[orf] )
                if res:
                    taxa_annotation = res.group(1)
                    id = res.group(2)
                else:
                    id = lca.get_a_Valid_ID([ orf_lca[orf] ])
                taxa_ids.append(id)
        pwy_lca_id = lca.get_lca(taxa_ids, True)
        # print "In run_pathologic"
        # print pwy_lca_id
        # print pwy_lca_id
        lca.clear_cells(taxa_ids)

        pwy_lca[pwy] = [pwy_lca_id, lca.translateIdToName(pwy_lca_id)]

    # calculate weighted taxonomic distance
    pwy_to_wtd = {}
    printf("INFO\tCalculating WTD\n")

    for pwy in pwy_lca:

        C = [] # list of distances
        C_taxa = [] # list of parallel observed-expected taxa pairs
        C_pos = [] # list of non-negative distances
        C_pos_taxa = [] # list of parallel observed-expected taxa pairs
        C_neg = [] # list of negative distances
        C_neg_taxa = [] # list of parallel observed-expected taxa pairs

        if pwy in pwy_taxa_range and  len(pwy_taxa_range[pwy]) :
            for expected in pwy_taxa_range[pwy]:
                dist = lca.wtd(expected[0], pwy_lca[pwy][0])
                if dist or dist == 0:
                    # valid distance
                    # add distance respective lists
                    C.append(dist) # add distance
                    C_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                    if dist >= 0:
                        C_pos.append(dist)  # add to non-negative list
                        C_pos_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                    else:
                        C_neg.append(dist)  # add to negative list
                        C_neg_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                else:
                    print("Not a valid distance")
                    continue
        else:
            # no expected taxonomy, set to root
            min_taxa = "1"
            dist = lca.wtd(min_taxa, pwy_lca[pwy][0])
            # add distance respective lists
            C.append(dist) # add distance
            C_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
            if dist >= 0:
                C_pos.append(dist)  # add to non-negative list
                C_pos_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
            else:
                C_neg.append(dist)  # add to negative list
                C_neg_taxa.append([ min_taxa, pwy_lca[pwy][0] ])

        # find index with max distance (closest to expected taxonomy)
        max_index, max_dist = max(enumerate(C), key=operator.itemgetter(1))
        max_taxa = C_taxa[max_index]

        # remap to preferred names
        observed = get_preferred_taxa_name(max_taxa[1], megan_map, lca.id_to_name)
        expected = get_preferred_taxa_name(max_taxa[0], megan_map, lca.id_to_name)

        pwy_to_wtd[pwy] = [ max_dist, observed, expected ]

    # write out pathway table
    table_out_tmp  = options.table_out + ".tmp"
    try:
        out = open(table_out_tmp, "w")
    except:
        print("Had problems opening file: " + options.table_out)
        insert_error(9)

    # write appropreate header
    if options.wtd:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tWTD\tOBSERVED\tEXPECTED\tORFS\n"
    else:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tORFS\n"
    out.write(header)

    sample = options.sample_name # sample name
    for pwy in pwy_to_orfs:
        # generate output line
        line = []
        line.append(sample) # sample name
        line.append(pwy) # pathway name
        line.append(pwy_to_long[pwy]) # pathway longname
        line.append(pwy_to_rxns[pwy][0]) # pathway num reactions
        line.append(pwy_to_rxns[pwy][1]) # pathway covered reactions
        line.append(len(pwy_to_orfs[pwy])) # num orfs
        if options.wtd:
            line.append(pwy_to_wtd[pwy][0]) # wtd
            line.append(pwy_to_wtd[pwy][1]) # wtd observed taxa
            line.append(pwy_to_wtd[pwy][2]) # wtd expected taxa
        line.append("[" + ",".join(pwy_to_orfs[pwy]) + "]") # list of ORFs

        line = map(str, line) # cast all to string

        out.write("\t".join(line) + "\n") # write out line
    try:
        out.close() # close file
        rename(table_out_tmp, options.table_out)
    except:
        print("Had problems closing file: " + options.table_out)
        insert_error(9)
Exemplo n.º 35
0
    def __init__(self,
                 dbname,
                 blastoutput,
                 database_mapfile,
                 refscore_file,
                 opts,
                 errorlogger=None):
        self.Size = 10000
        self.dbname = dbname
        self.ln2 = 0.69314718055994530941
        self.lnk = math.log(opts.k)
        self.Lambda = opts.Lambda
        self.blastoutput = blastoutput
        self.database_mapfile = database_mapfile
        self.refscore_file = refscore_file
        self.annot_map = {}
        self.i = 0
        self.opts = opts
        self.hits_counts = {}
        self.data = {}
        self.refscores = {}
        self.refBitScores = {}
        self.needToPermute = False

        self.MAX_READ_ERRORS_ALLOWED = 10
        self.ERROR_COUNT = 0
        self.STEP_NAME = 'PARSE_BLAST'
        self.error_and_warning_logger = errorlogger

        #print "trying to open blastoutput file " + blastoutput
        query_dictionary = {}

        try:
            create_query_dictionary(self.blastoutput,
                                    query_dictionary,
                                    self.opts.algorithm,
                                    errorlogger=errorlogger)
        except:
            insert_error(5)

        try:
            self.blastoutputfile = open(self.blastoutput, 'r')
        except:
            eprintf("\nERROR : cannot open B/LAST output file " + blastoutput + " to parse "+\
                      "      : make sure \"B/LAST\"ing was done for the particular database" )

            if self.error_and_warning_logger:
                self.error_and_warning_logger.write("ERROR : cannot open B/LAST output file %s %s to parse \n" +\
                                              "      : make sure \"B/LAST\"ing was done for "+\
                                              "the particular database" %(blastoutput) )
            insert_error(5)
            exit_process("Cannot open B/LAST output file " + blastoutput)

        try:
            self.create_refBitScores()
        except:
            print traceback.print_exc(10)
            exit_process("Error while reading from  B/LAST refscore file " +
                         self.refscore_file)
        try:
            create_dictionary(database_mapfile, self.annot_map,
                              query_dictionary)
            query_dictionary = {}
        except AttributeError:
            eprintf("Cannot read the map file for database : %s\n" % (dbname))
            if errorlogger != None:
                errorlogger.write(
                    "PARSE_BLAST\tERROR\tCannot read the map file %s for database : %s\tDelete the formatted files for the database in the \"formatted\" folder\n"
                    % (database_mapfile, dbname))

            exit_process("Cannot read the map file for database  " + dbname)
Exemplo n.º 36
0
def main(argv, errorlogger=None, runstatslogger=None):
    global parser
    (opts, args) = parser.parse_args(argv)

    if not check_arguments(opts, args):
        print(usage)
        sys.exit(0)

    results_dictionary = {}
    dbname_weight = {}

    contig_lengths = {}
    read_contig_lengths(opts.contig_map_file, contig_lengths)

    if opts.blastdir != None and opts.sample_name != None:
        try:
            database_names, input_blastouts, weight_dbs = getBlastFileNames(
                opts)
        except:
            insert_error(errorcode)
            pass
    else:
        database_names = opts.database_name
        input_blastouts = opts.input_blastout
        weight_dbs = opts.weight_db

    priority = 6000
    count_annotations = {}

    for dbname, blastoutput, weight in zip(database_names, input_blastouts,
                                           weight_dbs):
        results_dictionary[dbname] = {}
        dbname_weight[dbname] = weight
        count = process_parsed_blastoutput(dbname, weight, blastoutput, opts,
                                           results_dictionary[dbname])
        if runstatslogger != None:
            runstatslogger.write("%s\tProtein Annotations from %s\t%s\n" %
                                 (str(priority), dbname, str(count)))
        count_annotations
        priority += 1

    for dbname in results_dictionary:
        print(dbname, len(results_dictionary[dbname].keys()))
        for seqname in results_dictionary[dbname]:
            count_annotations[seqname] = True
    count = len(count_annotations)
    if runstatslogger != None:
        runstatslogger.write("%s\tTotal Protein Annotations\t%s\n" %
                             (str(priority), str(count)))

    #create the annotations from he results
    create_annotation(dbname_weight,
                      results_dictionary,
                      opts.input_gff,
                      opts.rRNA_16S,
                      opts.tRNA,
                      opts.output_gff,
                      opts.output_comparative_annotation,
                      contig_lengths,
                      sample_name=opts.sample_name,
                      compact_output=opts.compact_output)
def main(argv, errorlogger = None, runcommand = None, runstatslogger = None):
    global parser

    options, args = parser.parse_args(argv)
    if options.inputfolder ==None:
       parser.error('ERROR\tInput folder for Pathologic not found')
    else:
      # required files to be able to build ePGDB
      files = [ 
                #options.inputfolder + PATHDELIM + '0.pf',
                # options.inputfolder + PATHDELIM + '0.fasta',
                options.inputfolder + PATHDELIM + 'genetic-elements.dat',  
                options.inputfolder + PATHDELIM + 'organism-params.dat'
              ]

      if files_exist( files , errorlogger = errorlogger):
        exit_process("ERROR\tCannot find all inputs for Pathologic in folder %s : "  %(options.inputfolder) )

    # is there a pathwaytools executable installed
    if not path.exists(options.ptoolsExec):
       eprintf("ERROR\tPathwayTools executable %s not found!\n", options.ptoolsExec)
       if errorlogger:
          errorlogger.printf("ERROR\tPathwayTools executable %s not found!\n",  options.ptoolsExec)
       exit_process("ERROR\tPathwayTools executable %s not found!\n" %(options.ptoolsExec))


    # command to build the ePGDB
    command = "%s -patho %s"  %(options.ptoolsExec, options.inputfolder)
    if options.no_taxonomic_pruning:
       command += " -no-taxonomic-pruning "

    if options.no_web_cel_overview:
       command += " -no-web-cel-overview"

    command += " -tip"
    command += " -api"

    status =0
    fix_pgdb_input_files(options.pgdbdir, pgdbs = [])


    if not path.exists(options.pgdbdir):
      status  = runPathologicCommand(runcommand = command) 
      fix_pgdb_input_files(options.pgdbdir, pgdbs = [])
    if status!=0:
       eprintf("ERROR\tFailed to run Pathologic on input %s : \n" %(options.inputfolder))
       eprintf("INFO\tKill any other PathwayTools instance running on the machine and try again\n")
       if errorlogger:
          errorlogger.write("ERROR\tFailed to run Pathologic on input %s : " %(options.inputfolder))
          errorlogger.write("INFO\tKill any other PathwayTools instance running on the machine and try again")
          errorlogger.write("     : " + command)
          insert_error(9)
       sys.exit(0)
       #exit_process("ERROR\tFailed to run Pathologic on input %s : "  %(options.inputfolder) )


    if not path.exists(options.reactions_list):
       try:
           pythonCyc = startPathwayTools(options.sample_name.lower(), options.ptoolsExec, True)
           pythonCyc.setDebug() # disable pathway debug statements
           printf("INFO\tExtracting the reaction list from ePGDB " + options.sample_name + "\n")
           resultLines = pythonCyc.getReactionListLines()
           #pythonCyc.stopPathwayTools()
           reaction_list_file = open(options.reactions_list + ".tmp", 'w')
           for line in resultLines:
              fprintf(reaction_list_file,"%s\n",line.strip())
           reaction_list_file.close()
           rename(options.reactions_list + ".tmp", options.reactions_list)
           StopPathwayTools()

       except:
           print traceback.print_exc(10)
           eprintf("ERROR\tFailed to run extract pathways for %s : \n" %(options.sample_name))
           eprintf("INFO\tKill any other PathwayTools instance running on the machine and try again")
           if errorlogger:
               errorlogger.write("ERROR\tFailed to run extract pathways for %s : " %(options.sample_name))
               errorlogger.write("INFO\tKill any other PathwayTools instance running on the machine and try again\n")
           insert_error(9)
           StopPathwayTools()

    if not path.exists(options.table_out):
        ExtractPathway_WTD(options)
def main(argv, errorlogger=None, runstatslogger=None):
    # filtering options
    global parser
    options, args = parser.parse_args(argv)

    if not (options.gff_file or options.nucleotide_sequences or
            options.output_amino or options.output_nuc or options.output_gff):
        insert_error(errorcode)
        return (1, '')

    if not options.gff_file:
        parser.error('No gff files are specified')
        insert_error(errorcode)
        return (1, '')

    if not options.nucleotide_sequences:
        parser.error('Nucleotide sequences')
        insert_error(errorcode)
        return (1, '')

    if not options.output_amino:
        parser.error('Output anino acid file must be specified')
        insert_error(errorcode)
        return (1, '')

    if not options.output_nuc:
        parser.error('Output nucloetide sequences file must be specified')
        insert_error(errorcode)
        return (1, '')

    if not options.output_gff:
        parser.error('Output gff file must be specified')
        insert_error(errorcode)
        return (1, '')
    #print options

    if not path.exists(options.gff_file):
        print "gff file does not exist"
        insert_error(errorcode)
        return (1, '')

    if not path.exists(options.nucleotide_sequences):
        print "nucloetide sequences file does not exist"
        insert_error(errorcode)
        return (1, '')

    nucleotide_seq_dict = {}
    process_sequence_file(options.nucleotide_sequences, nucleotide_seq_dict)
    process_gff_file(options.gff_file, options.output_amino,
                     options.output_nuc, options.output_gff,
                     nucleotide_seq_dict)
def  ExtractPathway_WTD(options):
    # Extract pathways and WTD
   # place to store list of expected taxonomic range(s)
    printf('\n')
    printf('INFO\tEntering the WTD calculations!\n')
    serialized_metacyc_taxa_ranges = "/tmp/metacyc_pwy_taxa_range.pk"

    try:
        #print  options.wtd,  not path.isfile(serialized_metacyc_taxa_ranges),  serialized_metacyc_taxa_ranges
        if options.wtd and not path.isfile(serialized_metacyc_taxa_ranges):
            # get MetaCyc's expected taxonomic range(s) and serialize for later use in /tmp
            # try:
            printf('INFO\tGetting MetaCyc Expected Taxonomic Range(s)\n')
            pythonCyc = startPathwayTools('meta', options.ptoolsExec, True)

            pwys = pythonCyc.getAllPathways()

            pwy_taxa_range = {} # hash from pwy to expected taxonomic range(s)
            pwy_taxa_range_pk = open(serialized_metacyc_taxa_ranges ,"w")

            # get expected taxonomic ranges for each pathway
            for pwy in pwys:
                printf(" " + pwy) 
                my_expected_taxonomic_range = pythonCyc.getExpectedTaxonomicRange(pwy)
                pwy_taxa_range[pwy] = my_expected_taxonomic_range
            # printf(" " + pwy)

            # write the pathway
            pickle.dump(pwy_taxa_range, pwy_taxa_range_pk)
            pwy_taxa_range_pk.close()
            StopPathwayTools()

        # read expected taxonomic range from serialized file
        exepected_taxa_in = open(serialized_metacyc_taxa_ranges ,"r")
        pwy_taxa_range = pickle.load(exepected_taxa_in)

        # create mapping of preferred NCBI to MEGAN taxonomy
        megan_map = {}
        if options.ncbi_megan_map:
            with open(options.ncbi_megan_map) as megan_map_file:
                for line in megan_map_file:
                    fields = line.split("\t")
                    fields = map(str.strip, fields)
                    megan_map[ fields[0] ] = fields[1]

        # get ORF to taxa map from annotation_table
        printf("INFO\tGetting ORF to Taxa Map from AnnotationTable\n")
        orf_lca = {}
        with open(options.annotation_table) as f:
            for line in f:
                fields = line.split("\t")
                orf_lca[fields[0].strip()] = fields[8].strip()

        # get pathway ORFs and Rxns
        pwy_to_orfs = {}
        pwy_to_long = {}
        pwy_to_rxns = {}
        try:
            pythonCyc = startPathwayTools(options.sample_name.lower(), options.ptoolsExec, True)
            pwys = pythonCyc.getAllPathways()

            for pwy in pwys:
                printf(" " + pwy)
                genes = pythonCyc.getPathwayORFs(pwy)
                rxns = pythonCyc.getPathwayReactionInfo(pwy)
                pwy_to_orfs[pwy] = genes
                pwy_to_long[pwy] = cleanup(pythonCyc.get_slot_value(pwy, "common-name"))
                pwy_to_rxns[pwy] = rxns
            # printf("\n")
            StopPathwayTools()

        except:
            insert_error(9)
            print """
            Problem connecting to Pathway Tools. Check the /tmp/ptools-socket file.
            """
    except:
        print """
        Problem calculating WTD via Pathway Tools. Check the /tmp/ptools-socket file.
        """
        insert_error(9)

    # get LCA per pathway
    pwy_lca = {}
    # load NCBI taxonomy map
    printf("\nINFO\tLoading NCBI Taxonomy Map\n")
    lca = LCAComputation([ options.ncbi_tree ], )

    for pwy in pwy_to_orfs:
        orfs = pwy_to_orfs[pwy]
        taxa_ids = []
        for orf in orfs:
            if orf in orf_lca:
                # could strip out id here
                res = re.search("(.+?)\(([0-9]+?)\)",  orf_lca[orf] )
                if res:
                    taxa_annotation = res.group(1)
                    id = res.group(2)
                else:
                    id = lca.get_a_Valid_ID([ orf_lca[orf] ])
                taxa_ids.append(id)
        pwy_lca_id = lca.get_lca(taxa_ids, True)
        # print "In run_pathologic"
        # print pwy_lca_id
        # print pwy_lca_id
        lca.clear_cells(taxa_ids)

        pwy_lca[pwy] = [pwy_lca_id, lca.translateIdToName(pwy_lca_id)]

    # calculate weighted taxonomic distance
    pwy_to_wtd = {}
    printf("INFO\tCalculating WTD\n")

    for pwy in pwy_lca:

        C = [] # list of distances
        C_taxa = [] # list of parallel observed-expected taxa pairs
        C_pos = [] # list of non-negative distances
        C_pos_taxa = [] # list of parallel observed-expected taxa pairs
        C_neg = [] # list of negative distances
        C_neg_taxa = [] # list of parallel observed-expected taxa pairs

        if pwy in pwy_taxa_range and  len(pwy_taxa_range[pwy]) :
            for expected in pwy_taxa_range[pwy]:
                dist = lca.wtd(expected[0], pwy_lca[pwy][0])
                if dist or dist == 0:
                    # valid distance
                    # add distance respective lists
                    C.append(dist) # add distance
                    C_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                    if dist >= 0:
                        C_pos.append(dist)  # add to non-negative list
                        C_pos_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                    else:
                        C_neg.append(dist)  # add to negative list
                        C_neg_taxa.append([ expected[0], pwy_lca[pwy][0] ])
                else:
                    print "Not a valid distance"
                    continue
        else:
            # no expected taxonomy, set to root
            min_taxa = "1"
            dist = lca.wtd(min_taxa, pwy_lca[pwy][0])
            # add distance respective lists
            C.append(dist) # add distance
            C_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
            if dist >= 0:
                C_pos.append(dist)  # add to non-negative list
                C_pos_taxa.append([ min_taxa, pwy_lca[pwy][0] ])
            else:
                C_neg.append(dist)  # add to negative list
                C_neg_taxa.append([ min_taxa, pwy_lca[pwy][0] ])

        # find index with max distance (closest to expected taxonomy)
        max_index, max_dist = max(enumerate(C), key=operator.itemgetter(1))
        max_taxa = C_taxa[max_index]

        # remap to preferred names
        observed = get_preferred_taxa_name(max_taxa[1], megan_map, lca.id_to_name)
        expected = get_preferred_taxa_name(max_taxa[0], megan_map, lca.id_to_name)

        pwy_to_wtd[pwy] = [ max_dist, observed, expected ]

    # write out pathway table
    table_out_tmp  = options.table_out + ".tmp"
    try:
        out = open(table_out_tmp, "w")
    except:
        print "Had problems opening file: " + options.table_out
        insert_error(9)

    # write appropreate header
    if options.wtd:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tWTD\tOBSERVED\tEXPECTED\tORFS\n"
    else:
        header = "SAMPLE\tPWY_NAME\tPWY_COMMON_NAME\tNUM_REACTIONS\tNUM_COVERED_REACTIONS\tORF_COUNT\tORFS\n"
    out.write(header)

    sample = options.sample_name # sample name
    for pwy in pwy_to_orfs:
        # generate output line
        line = []
        line.append(sample) # sample name
        line.append(pwy) # pathway name
        line.append(pwy_to_long[pwy]) # pathway longname
        line.append(pwy_to_rxns[pwy][0]) # pathway num reactions
        line.append(pwy_to_rxns[pwy][1]) # pathway covered reactions
        line.append(len(pwy_to_orfs[pwy])) # num orfs
        if options.wtd:
            line.append(pwy_to_wtd[pwy][0]) # wtd
            line.append(pwy_to_wtd[pwy][1]) # wtd observed taxa
            line.append(pwy_to_wtd[pwy][2]) # wtd expected taxa
        line.append("[" + ",".join(pwy_to_orfs[pwy]) + "]") # list of ORFs

        line = map(str, line) # cast all to string

        out.write("\t".join(line) + "\n") # write out line
    try:
        out.close() # close file
        rename(table_out_tmp, options.table_out)
    except:
        print "Had problems closing file: " + options.table_out
        insert_error(9)
    def __init__(self, dbname,  blastoutput, database_mapfile, refscore_file, opts, errorlogger =None):
        self.Size = 10000
        self.dbname = dbname
        self.ln2 = 0.69314718055994530941
        self.lnk = math.log(opts.k)
        self.Lambda = opts.Lambda
        self.blastoutput = blastoutput
        self.database_mapfile =database_mapfile
        self.refscore_file = refscore_file
        self.annot_map = {} 
        self.i=0
        self.opts = opts
        self.hits_counts = {}
        self.data = {}
        self.refscores = {}
        self.refBitScores = {}
        self.needToPermute = False;

        self.MAX_READ_ERRORS_ALLOWED = 10
        self.ERROR_COUNT = 0
        self.STEP_NAME = 'PARSE_BLAST'
        self.error_and_warning_logger = errorlogger 


        #print "trying to open blastoutput file " + blastoutput
        query_dictionary = {}

        try:
          create_query_dictionary(self.blastoutput, query_dictionary, self.opts.algorithm, errorlogger =  errorlogger) 
        except:
          insert_error(5)

        try:
            self.blastoutputfile = open(self.blastoutput,'r')
        except:
            eprintf("\nERROR : cannot open B/LAST output file " + blastoutput + " to parse "+\
                      "      : make sure \"B/LAST\"ing was done for the particular database" )

            if self.error_and_warning_logger:
               self.error_and_warning_logger.write("ERROR : cannot open B/LAST output file %s %s to parse \n" +\
                                             "      : make sure \"B/LAST\"ing was done for "+\
                                             "the particular database" %(blastoutput) )
            insert_error(5)
            exit_process( "Cannot open B/LAST output file " + blastoutput )



        try:
            self.create_refBitScores()
        except:
            print traceback.print_exc(10)
            exit_process( "Error while reading from  B/LAST refscore file " + self.refscore_file )
        try:
           create_dictionary(database_mapfile, self.annot_map, query_dictionary)
           query_dictionary = {}
        except AttributeError:
           eprintf("Cannot read the map file for database : %s\n" % (dbname))
           if errorlogger!= None:
              errorlogger.write("PARSE_BLAST\tERROR\tCannot read the map file %s for database : %s\tDelete the formatted files for the database in the \"formatted\" folder\n" %(database_mapfile, dbname))

           exit_process("Cannot read the map file for database  " + dbname)
def main(argv, errorlogger = None, runstatslogger = None):
    # filtering options
    global parser
    options, args = parser.parse_args(argv)

    if not(options.gff_file or options.nucleotide_sequences or options.output_amino or  options.output_nuc  or options.output_gff):
       insert_error(errorcode)
       return(1,'')
    
    if not options.gff_file:
       parser.error('No gff files are specified')
       insert_error(errorcode)
       return(1,'')

    if not options.nucleotide_sequences:
       parser.error('Nucleotide sequences')
       insert_error(errorcode)
       return(1,'')

    if not options.output_amino:
       parser.error('Output anino acid file must be specified')
       insert_error(errorcode)
       return(1,'')

    if not options.output_nuc:
       parser.error('Output nucloetide sequences file must be specified')
       insert_error(errorcode)
       return(1,'')

    if not options.output_gff:
       parser.error('Output gff file must be specified')
       insert_error(errorcode)
       return(1,'')
    #print options

    if not path.exists(options.gff_file):
        print "gff file does not exist"
        insert_error(errorcode)
        return(1,'')

    if not path.exists(options.nucleotide_sequences):
        print "nucloetide sequences file does not exist"
        insert_error(errorcode)
        return(1,'')

    nucleotide_seq_dict = {}
    process_sequence_file( options.nucleotide_sequences, nucleotide_seq_dict) 
    process_gff_file(options.gff_file, options.output_amino, options.output_nuc, options.output_gff, nucleotide_seq_dict)