def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # build the dictionary of GO ontology data go_ontology_dict = xlib.build_go_ontology_dict(args.ontology_file) # calculate annotation statistics if args.app == 'BLAST2GO': calculate_blast2go_go_stats(args.annotation_file, go_ontology_dict, args.output_dir) elif args.app == 'ENTAP': calculate_entap_go_stats(args.annotation_file, go_ontology_dict, args.output_dir) elif args.app == 'TOA': calculate_toa_go_stats(args.annotation_file, go_ontology_dict, args.output_dir) elif args.app == 'TRAPID': calculate_trapid_go_stats(args.annotation_file, go_ontology_dict, args.output_dir) elif args.app == 'TRINOTATE': calculate_trinotate_go_stats(args.annotation_file, go_ontology_dict, args.output_dir)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # get the TOA-transcriptome identification relationship dictionary toa_transcriptome_relationship_dict = xlib.get_id_relationship_dict(args.toa_transcriptome_relationship_file) # get the TOA-TransDecoder identification relationship dictionary if args.toa_transdecoder_relationship_file == 'NONE': toa_transdecoder_relationship_dict = {} else: toa_transdecoder_relationship_dict = xlib.get_id_relationship_dict(args.toa_transdecoder_relationship_file) # restore transcript sequence identifications in a FASTA file if args.file_format == 'FASTA': restore_ids_fasta(args.input_file, toa_transcriptome_relationship_dict, toa_transdecoder_relationship_dict, args.output_file) # restore transcript sequence identifications in a XML file elif args.file_format == 'XML': restore_ids_xml(args.input_file, toa_transcriptome_relationship_dict, toa_transdecoder_relationship_dict, args.output_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # delete the TOA database if exists xlib.Message.print('verbose', 'Deleting old TOA database ...\n') if os.path.exists(args.toa_database): os.remove(args.toa_database) xlib.Message.print('verbose', 'Database is deleted.\n') # connect to the TOA database (it is create if not exists) xlib.Message.print('verbose', 'Creating new TOA database ...\n') conn = xsqlite.connect_database(args.toa_database) xlib.Message.print('verbose', 'Database is created.\n') # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # connect to the TOA database conn = xsqlite.connect_database(args.toa_database) # check the dataset identification if not xsqlite.is_dataset_id_found(conn, args.dataset_id): raise xlib.ProgramException('L001', args.dataset_id) # load table "blast" where the BLAST file format is 5 (BLAST XML) if args.blast_file_format == '5': load_table_blast_5(conn, args.dataset_id, args.blast_file) # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # connect to the TOA database conn = xsqlite.connect_database(args.toa_database) # load table "go_ontology" load_table_go_ontology(conn, args.ontology_file) # load table "go_cross_references" load_table_go_cross_references(conn, args.ec2go_file, args.kegg2go_file, args.metacyc2go_file, args.interpro2go_file) # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # get the dictionary with sequence identifications of the second FASTA file file_2_id_dict = get_file_2_id_dict(args.fasta_file_2) # get the TOA-transcriptome identification relationship dictionary if args.toa_transcriptome_relationship_file == 'NONE': toa_transcriptome_relationship_dict = {} else: toa_transcriptome_relationship_dict = xlib.get_id_relationship_dict( args.toa_transcriptome_relationship_file) # merge FASTA files with operation "1AND2" (sequences included in both files) if args.merger_operation == '1AND2': merge_files_operation_1and2(args.fasta_file_1, file_2_id_dict, args.merged_file, toa_transcriptome_relationship_dict) # merge FASTA files with operation "1LESS2" (sequences in file 1 and not in file 2) elif args.merger_operation == '1LESS2': merge_files_operation_1less2(args.fasta_file_1, file_2_id_dict, args.merged_file, toa_transcriptome_relationship_dict)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # get the TOA-transcriptome identification relationship dictionary if args.toa_transcriptome_relationship_file == 'NONE': toa_transcriptome_relationship_dict = {} else: toa_transcriptome_relationship_dict = xlib.get_id_relationship_dict( args.toa_transcriptome_relationship_file) # get the TOA-TransDecoder identification relationship dictionary if args.toa_transdecoder_relationship_file == 'NONE': toa_transdecoder_relationship_dict = {} else: toa_transdecoder_relationship_dict = xlib.get_id_relationship_dict( args.toa_transdecoder_relationship_file) # merge XML file merge_files(args.xml_file_list, toa_transcriptome_relationship_dict, toa_transdecoder_relationship_dict, args.merged_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # connect to the TOA database conn = xsqlite.connect_database(args.toa_database) # rebuild the TOA database file xlib.Message.print('verbose', 'Rebuilding TOA database ...\n') OK = xsqlite.rebuild_database(conn) if OK: xlib.Message.print('verbose', 'The database is rebuilt.\n') else: xlib.Message.print( 'error', '*** WARNING: The database file can not be rebuilt.') # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # connect to the TOA database conn = xsqlite.connect_database(args.toa_database) # load table "datasets" load_table_datasets(conn, args.dataset_file) # load table "species" load_table_species(conn, args.species_file) # load table "ec_ids" load_table_ec_ids(conn, args.ec_id_file) # load table "kegg_ids" load_table_kegg_ids(conn, args.kegg_id_file) # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # connect to the NGShelper database conn = xsqlite.connect_database(args.ngshelper_database) # get the NGShelper database name file_name, file_extension = os.path.splitext( os.path.basename(args.ngshelper_database)) # list data of variants and alleles and variant identifications to the scenario X query_data(conn, file_name, args.sp1_id, args.sp2_id, args.hybrid_id, args.imputed_md_id, args.max_separation, args.output_dir, args.tsi_list) # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # connect to the TOA database conn = xsqlite.connect_database(args.toa_database) # get the taxonomy dictionary of the species name from taxonomy server taxonomy_dict = xlib.get_taxonomy_dict('name', args.species_name) if taxonomy_dict == {}: raise xlib.ProgramException('L006', args.dataset_id) # load genomic features depending of format of the genomic feature file load_genomic_features(conn, args.species_name, args.gff_file, args.gff_format) # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # connect to the TOA database conn = xsqlite.connect_database(args.toa_database) # get the PLAZA dataset identification list plaza_dataset_id_list = xsqlite.get_plaza_dataset_id_list(conn) # check the dataset identification if args.dataset_id not in plaza_dataset_id_list: raise xlib.ProgramException('L001', 'dataset', args.dataset_id) # get the PLAZA species identification list plaza_species_id_list = xsqlite.get_plaza_species_id_list(conn) # check the PLAZA species identification if args.species_id != 'all' and args.species_id not in plaza_species_id_list: raise xlib.ProgramException('L003', args.species_id) # load table "plaza_gene_description" load_table_plaza_gene_description(conn, args.dataset_id, args.species_id, args.gene_desc_dir, plaza_species_id_list) # load table "plaza_interpro" load_table_plaza_interpro(conn, args.dataset_id, args.species_id, args.interpro_file, plaza_species_id_list) # load table "plaza_go" load_table_plaza_go(conn, args.dataset_id, args.species_id, args.go_file, plaza_species_id_list) # load table "plaza_mapman" load_table_plaza_mapman(conn, args.dataset_id, args.species_id, args.mapman_file, plaza_species_id_list) # close connection to TOA database conn.close()
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # convert a PHASE files to Structure file convert_phase_to_structure(args.phase_dir, args.phase_extension, args.sample_file, args.sp1_id, args.sp2_id, args.hybrid_id, args.structure_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # extract sequences extract_sequences(args.vcf_file, args.id_file, args.extract_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # extract genotype data of every variant from a VCF file extract_vcf_genotypes(args.input_vcf_file, args.imputed_md_id, args.output_genotype_file, args.tvi_list)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # extract sequences get_exon_data(args.alignment_file, args.output_dir)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # extract genomic features from a GFF file extract_ff_features(args.input_gff_file, args.gff_format, args.vcf_file, args.output_gff_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # fix sequence identifiers fix_seq_ids(args.filenum, args.readfile)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # get Gene Ontology terms per sequence get_go_terms(args.annotation_file, args.type, args.score_file, args.go_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # calculate haplotype statistics calculate_haplotype_statistics(args.loci_file_path, args.stats_file_path)
def main(): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # filter SSRs filter_ssrs(args.cos_file, args.ssr_file, args.output_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # build functional annotation data corresponding to a TRAPID run build_trapid_annotation(args.transcripts_with_go_file, args.transcripts_with_gf_file, args.transcripts_with_ko_file, args.annotation_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # extract RNA sequences from a GFF file and its corresponding genome FASTA file extract_gff_rnas(args.gff_file, args.gff_format, args.genome_file, args.rna_file, args.tvi_list)
def main(): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # filter transcripts filter_transcripts_bylen(args.fasta_file, args.output_file, args.minlen, args.maxlen)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # extract sequences get_flanking_regions(args.vcf_file, args.genome_file, args.flanking_region_file, args.nucleotide_number)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # convert a SimHyb file to Structure convert_simhyb_to_structure(args.simhyb_file, args.header_row_number, args.structure_file)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # split annotation file split_files(args.annotation_file, args.type, args.record_number_per_file, args.header)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # extract annotations extract_annotations(args.annotation_file, args.type, args.id_file, args.extract_file, args.stats_file)
def main(): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # filter transcripts filter_transcripts(args.assembly_software_code, args.transcriptome_file, args.score_file, args.output_file, args.minlen, args.maxlen, args.minFPKM, args.minTPM)
def main(argv): ''' Main line of the program. ''' # check the operating system xlib.check_os() # get and check the arguments parser = build_parser() args = parser.parse_args() check_args(args) # collapses the variant records corresponding to an indel in a VCF file collapse_indels(args.input_vcf_file, args.sample_file, args.imputed_md_id, args.sp1_id, args.sp2_id, args.hybrid_id, args.output_vcf_file, args.stats_file, args.tvi_list)