Пример #1
0
def Main():

    #Initializes the protein class which contains all main functions for manipulation and storage of protein data
    P = Protein()
    # Gets user input for the name of the desired output file. Output graphic displays in the subfolder of completed trees
    name_of_run = nameOfRun()
    P.run_name = name_of_run
    # timer that determines how much pause is placed between calls to NCBI servers, reccomend 0.5 however 0 works when running NOT during peak hours
    time = timer()
    P.timer = time

    #Set input/output paths
    input_file_path = os.path.join('Input', 'ProteinInput')
    output_file_path = os.path.join('Output')

    #Clears previous CDS and genomic output from last run
    clearPreviousOutput(output_file_path)

    #Functions from the Protein class that use the input protein accession numbers to determine the corresponding protein ID, CDS, and genomic data
    P.Entrez_Protein_ID_Fetch(input_file_path)

    P.Entrez_Genome_Fetch()
    # Writes the fasta seq for the genome corresponding the protein. Establishing a parallel list. Ouput file path: PhyloRewrite/Output/Genome
    for i in range(len(P.gene)):
        data_type = 'gene'
        writeOutput(data_type, P.gene[i])
    print 'Genomic Sequences written to Output File... '

    P.Entrez_CDS_Fetch()
    #Writes the fasta seq for the CDS corresponding the protein. Establishing a parallel list. Ouput file path: PhyloRewrite/Output/CDS
    for i in range(len(P.retrieved_full_cds)):
        data_type = 'CDS'
        writeOutput(data_type, P.retrieved_full_cds[i])
    print 'CDS Sequences written to Output File... '

    #Checks as to whether the lists are in parallel, if they are not error will rise
    print '\n Checking the output for errors... \n'

    if len(P.gene) != len(P.retrieved_full_cds):
        repair_script()
        sys.exit()

    print 'No errors found, continuing... \n'

    #Function of the protein class that determines the intron phase and location of intron/exon boundry
    P.intronCalculator()

    #Contained within the lists is each
    print 'Intron Phases: ' + str(P.intron_phase)
    print 'Length of the Exons: ' + str(P.exon_lengths)

    #Uses clustal X linux executible to format a multiple sequencing alignment for the input protein sequences. Files found in execs/tmp
    P.multiple_sequencing_alignment()
    #Takes the multiple sequencing alignment output from clustal X and inputs into fasttree to generate an unrooted tree, then piped into ete2 to root. Files found in execs/tmp
    P.rootedTreeConstruction()
    #Builds the tree graphic
    P.renderingTreeImage()
Пример #2
0
        #Code produces the Phylogenetic tree and intron mapping
        P = Protein()
        P.multiple_sequencing_alignment()
        P.intronCalculator()
        P.rootedTreeConstruction()
        P.renderingTreeImage()

    elif response == '3':

        repair_script()

    elif response == '4':

        duplicate_management()

    elif response == '5':
        #Enter experimental code here:

        P = Protein()
        id_list = P.Entrez_Protein_ID_Fetch(
            os.path.join('Input', 'ProteinInput'))
        P.Genomic_Context(id_list)

    elif response == '6':
        file_merger()

    elif response != '1' or '2' or '3' or '4' or '5' or '6':

        print 'Sorry your response was not recognized! '
        sys.exit()