#Get the coords for all the proteins and store in a dictionary
gene_coords = defaultdict(tuple)
coord_folder = args.data_folders + "/coords/"

for coord_file in os.listdir(coord_folder):
    for line in open(coord_folder + coord_file, 'r'):
        line = line.rstrip()
        contig_id, protein_id, start, stop = line.split("\t")
        gene_coords[protein_id] = (contig_id, start, stop)


#Get the annotation information
annotation_folder = args.data_folders + "/annotation"

protein_annotation, function_definitions = \
    AnnotationTools.parse_annotation_folder(genome_gene_info.keys(), annotation_folder)

#Print output table
output_file = open(args.output_file, 'w')

#Get the COG definitions
cog_one_letter, desc_cog_letter, desc_cog_number = COG.cog_definitions()

for genome in genome_gene_info:
    for protein in genome_gene_info[genome]:

        try:
            product = protein_annotation[protein]["Product"]
        except KeyError:
            product = None
    parser.add_argument("-c", "--cluster_file", type=str,
                        help="Cluster file", required=True)
    parser.add_argument("-o", "--output_directory", type=str,
                        help="Output folder", required=True)

    args = parser.parse_args()

    #Create the output directory
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)

    #####Read the genome list
    genome_id_dictionary, genome_count = ClusterTools.read_genome_list(args.genome_list_index)

    ###Read the annotation information
    protein_annotation, function_definitions = AnnotationTools.parse_annotation_folder(genome_id_dictionary.keys(), args.annotation_folder)

    ##Read the cluster information
    total_clusters = ClusterTools.get_cluster_information(args.cluster_file)

    ##Print log file
    logfile = open(args.output_directory + "/logfile.txt", 'w')

    ##Total number of clusters
    logfile.write("Total number of analyzed clusters: %d" % len(total_clusters) + "\n")

    features_to_annotate = ["COG", "KO", "PFAM", "Product"]

    #Get the COG definitions
    cog_one_letter, desc_cog_letter, desc_cog_number = COG.cog_definitions()
Exemplo n.º 3
0
# Get the coords for all the proteins and store in a dictionary
gene_coords = defaultdict(tuple)
coord_folder = args.data_folders + "/coords/"

for coord_file in os.listdir(coord_folder):
    for line in open(coord_folder + coord_file, "r"):
        line = line.rstrip()
        contig_id, protein_id, start, stop = line.split("\t")
        gene_coords[protein_id] = (contig_id, start, stop)


# Get the annotation information
annotation_folder = args.data_folders + "/annotation"

protein_annotation, function_definitions = AnnotationTools.parse_annotation_folder(
    genome_gene_info.keys(), annotation_folder
)

# Print output table
output_file = open(args.output_file, "w")

# Get the COG definitions
cog_one_letter, desc_cog_letter, desc_cog_number = COG.cog_definitions()

for genome in genome_gene_info:
    for protein in genome_gene_info[genome]:

        try:
            product = protein_annotation[protein]["Product"]
        except KeyError:
            product = None