Пример #1
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # build the dictionary of GO ontology data
    go_ontology_dict = xlib.build_go_ontology_dict(args.ontology_file)

    # calculate annotation statistics
    if args.app == 'BLAST2GO':
        calculate_blast2go_go_stats(args.annotation_file, go_ontology_dict,
                                    args.output_dir)
    elif args.app == 'ENTAP':
        calculate_entap_go_stats(args.annotation_file, go_ontology_dict,
                                 args.output_dir)
    elif args.app == 'TOA':
        calculate_toa_go_stats(args.annotation_file, go_ontology_dict,
                               args.output_dir)
    elif args.app == 'TRAPID':
        calculate_trapid_go_stats(args.annotation_file, go_ontology_dict,
                                  args.output_dir)
    elif args.app == 'TRINOTATE':
        calculate_trinotate_go_stats(args.annotation_file, go_ontology_dict,
                                     args.output_dir)
Пример #2
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # get the TOA-transcriptome identification relationship dictionary
    toa_transcriptome_relationship_dict = xlib.get_id_relationship_dict(args.toa_transcriptome_relationship_file)

    # get the TOA-TransDecoder identification relationship dictionary
    if args.toa_transdecoder_relationship_file == 'NONE':
        toa_transdecoder_relationship_dict = {}
    else:
        toa_transdecoder_relationship_dict = xlib.get_id_relationship_dict(args.toa_transdecoder_relationship_file)

    # restore transcript sequence identifications in a FASTA file
    if args.file_format == 'FASTA':
        restore_ids_fasta(args.input_file, toa_transcriptome_relationship_dict, toa_transdecoder_relationship_dict, args.output_file)
    # restore transcript sequence identifications in a XML file
    elif args.file_format == 'XML':
        restore_ids_xml(args.input_file, toa_transcriptome_relationship_dict, toa_transdecoder_relationship_dict, args.output_file)
Пример #3
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # delete the TOA database if exists
    xlib.Message.print('verbose', 'Deleting old TOA database ...\n')
    if os.path.exists(args.toa_database):
        os.remove(args.toa_database)
    xlib.Message.print('verbose', 'Database is deleted.\n')

    # connect to the TOA database (it is create if not exists)
    xlib.Message.print('verbose', 'Creating new TOA database ...\n')
    conn = xsqlite.connect_database(args.toa_database)
    xlib.Message.print('verbose', 'Database is created.\n')

    # close connection to TOA database
    conn.close()
Пример #4
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # connect to the TOA database
    conn = xsqlite.connect_database(args.toa_database)

    # check the dataset identification
    if not xsqlite.is_dataset_id_found(conn, args.dataset_id):
        raise xlib.ProgramException('L001', args.dataset_id)

    # load table "blast" where the BLAST file format is 5 (BLAST XML)
    if args.blast_file_format == '5':
        load_table_blast_5(conn, args.dataset_id, args.blast_file)

    # close connection to TOA database
    conn.close()
Пример #5
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # connect to the TOA database
    conn = xsqlite.connect_database(args.toa_database)

    # load table "go_ontology"
    load_table_go_ontology(conn, args.ontology_file)

    # load table "go_cross_references"
    load_table_go_cross_references(conn, args.ec2go_file, args.kegg2go_file,
                                   args.metacyc2go_file, args.interpro2go_file)

    # close connection to TOA database
    conn.close()
Пример #6
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # get the dictionary with sequence identifications of the second FASTA file
    file_2_id_dict = get_file_2_id_dict(args.fasta_file_2)

    # get the TOA-transcriptome identification relationship dictionary
    if args.toa_transcriptome_relationship_file == 'NONE':
        toa_transcriptome_relationship_dict = {}
    else:
        toa_transcriptome_relationship_dict = xlib.get_id_relationship_dict(
            args.toa_transcriptome_relationship_file)

    # merge FASTA files with operation "1AND2" (sequences included in both files)
    if args.merger_operation == '1AND2':
        merge_files_operation_1and2(args.fasta_file_1, file_2_id_dict,
                                    args.merged_file,
                                    toa_transcriptome_relationship_dict)
    # merge FASTA files with operation "1LESS2" (sequences in file 1 and not in file 2)
    elif args.merger_operation == '1LESS2':
        merge_files_operation_1less2(args.fasta_file_1, file_2_id_dict,
                                     args.merged_file,
                                     toa_transcriptome_relationship_dict)
Пример #7
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # get the TOA-transcriptome identification relationship dictionary
    if args.toa_transcriptome_relationship_file == 'NONE':
        toa_transcriptome_relationship_dict = {}
    else:
        toa_transcriptome_relationship_dict = xlib.get_id_relationship_dict(
            args.toa_transcriptome_relationship_file)

    # get the TOA-TransDecoder identification relationship dictionary
    if args.toa_transdecoder_relationship_file == 'NONE':
        toa_transdecoder_relationship_dict = {}
    else:
        toa_transdecoder_relationship_dict = xlib.get_id_relationship_dict(
            args.toa_transdecoder_relationship_file)

    # merge XML file
    merge_files(args.xml_file_list, toa_transcriptome_relationship_dict,
                toa_transdecoder_relationship_dict, args.merged_file)
Пример #8
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # connect to the TOA database
    conn = xsqlite.connect_database(args.toa_database)

    # rebuild the TOA database file
    xlib.Message.print('verbose', 'Rebuilding TOA database ...\n')
    OK = xsqlite.rebuild_database(conn)
    if OK:
        xlib.Message.print('verbose', 'The database is rebuilt.\n')
    else:
        xlib.Message.print(
            'error', '*** WARNING: The database file can not be rebuilt.')

    # close connection to TOA database
    conn.close()
Пример #9
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # connect to the TOA database
    conn = xsqlite.connect_database(args.toa_database)

    # load table "datasets"
    load_table_datasets(conn, args.dataset_file)

    # load table "species"
    load_table_species(conn, args.species_file)

    # load table "ec_ids"
    load_table_ec_ids(conn, args.ec_id_file)

    # load table "kegg_ids"
    load_table_kegg_ids(conn, args.kegg_id_file)

    # close connection to TOA database
    conn.close()
Пример #10
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # connect to the NGShelper database
    conn = xsqlite.connect_database(args.ngshelper_database)

    # get the NGShelper database name
    file_name, file_extension = os.path.splitext(
        os.path.basename(args.ngshelper_database))

    # list data of variants and alleles and variant identifications to the scenario X
    query_data(conn, file_name, args.sp1_id, args.sp2_id, args.hybrid_id,
               args.imputed_md_id, args.max_separation, args.output_dir,
               args.tsi_list)

    # close connection to TOA database
    conn.close()
Пример #11
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # connect to the TOA database
    conn = xsqlite.connect_database(args.toa_database)

    # get the taxonomy dictionary of the species name from taxonomy server
    taxonomy_dict = xlib.get_taxonomy_dict('name', args.species_name)
    if taxonomy_dict == {}:
        raise xlib.ProgramException('L006', args.dataset_id)

    # load genomic features depending of format of the genomic feature file
    load_genomic_features(conn, args.species_name, args.gff_file,
                          args.gff_format)

    # close connection to TOA database
    conn.close()
Пример #12
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # connect to the TOA database
    conn = xsqlite.connect_database(args.toa_database)

    # get the PLAZA dataset identification list
    plaza_dataset_id_list = xsqlite.get_plaza_dataset_id_list(conn)

    # check the dataset identification
    if args.dataset_id not in plaza_dataset_id_list:
        raise xlib.ProgramException('L001', 'dataset', args.dataset_id)

    # get the PLAZA species identification list
    plaza_species_id_list = xsqlite.get_plaza_species_id_list(conn)

    # check the PLAZA species identification
    if args.species_id != 'all' and args.species_id not in plaza_species_id_list:
        raise xlib.ProgramException('L003', args.species_id)

    # load table "plaza_gene_description"
    load_table_plaza_gene_description(conn, args.dataset_id, args.species_id,
                                      args.gene_desc_dir,
                                      plaza_species_id_list)

    # load table "plaza_interpro"
    load_table_plaza_interpro(conn, args.dataset_id, args.species_id,
                              args.interpro_file, plaza_species_id_list)

    # load table "plaza_go"
    load_table_plaza_go(conn, args.dataset_id, args.species_id, args.go_file,
                        plaza_species_id_list)

    # load table "plaza_mapman"
    load_table_plaza_mapman(conn, args.dataset_id, args.species_id,
                            args.mapman_file, plaza_species_id_list)

    # close connection to TOA database
    conn.close()
Пример #13
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # convert a PHASE files to Structure file
    convert_phase_to_structure(args.phase_dir, args.phase_extension, args.sample_file, args.sp1_id, args.sp2_id, args.hybrid_id, args.structure_file)
Пример #14
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # extract sequences
    extract_sequences(args.vcf_file, args.id_file, args.extract_file)
Пример #15
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # extract genotype data of every variant from a VCF file
    extract_vcf_genotypes(args.input_vcf_file, args.imputed_md_id, args.output_genotype_file, args.tvi_list)
Пример #16
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # extract sequences
    get_exon_data(args.alignment_file, args.output_dir)
Пример #17
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # extract genomic features from a GFF file
    extract_ff_features(args.input_gff_file, args.gff_format, args.vcf_file, args.output_gff_file)
Пример #18
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # fix sequence identifiers
    fix_seq_ids(args.filenum, args.readfile)
Пример #19
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # get Gene Ontology terms per sequence
    get_go_terms(args.annotation_file, args.type, args.score_file, args.go_file)
Пример #20
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # calculate haplotype statistics
    calculate_haplotype_statistics(args.loci_file_path, args.stats_file_path)
Пример #21
0
def main():
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # filter SSRs
    filter_ssrs(args.cos_file, args.ssr_file, args.output_file)
Пример #22
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # build functional annotation data corresponding to a TRAPID run
    build_trapid_annotation(args.transcripts_with_go_file, args.transcripts_with_gf_file, args.transcripts_with_ko_file, args.annotation_file)
Пример #23
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # extract RNA sequences from a GFF file and its corresponding genome FASTA file
    extract_gff_rnas(args.gff_file, args.gff_format, args.genome_file,
                     args.rna_file, args.tvi_list)
Пример #24
0
def main():
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # filter transcripts
    filter_transcripts_bylen(args.fasta_file, args.output_file, args.minlen,
                             args.maxlen)
Пример #25
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # extract sequences
    get_flanking_regions(args.vcf_file, args.genome_file,
                         args.flanking_region_file, args.nucleotide_number)
Пример #26
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # convert a SimHyb file to Structure
    convert_simhyb_to_structure(args.simhyb_file, args.header_row_number,
                                args.structure_file)
Пример #27
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # split annotation file
    split_files(args.annotation_file, args.type, args.record_number_per_file,
                args.header)
Пример #28
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # extract annotations
    extract_annotations(args.annotation_file, args.type, args.id_file,
                        args.extract_file, args.stats_file)
Пример #29
0
def main():
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # filter transcripts
    filter_transcripts(args.assembly_software_code, args.transcriptome_file,
                       args.score_file, args.output_file, args.minlen,
                       args.maxlen, args.minFPKM, args.minTPM)
Пример #30
0
def main(argv):
    '''
    Main line of the program.
    '''

    # check the operating system
    xlib.check_os()

    # get and check the arguments
    parser = build_parser()
    args = parser.parse_args()
    check_args(args)

    # collapses the variant records corresponding to an indel in a VCF file
    collapse_indels(args.input_vcf_file, args.sample_file, args.imputed_md_id,
                    args.sp1_id, args.sp2_id, args.hybrid_id,
                    args.output_vcf_file, args.stats_file, args.tvi_list)