#this line takes the files from csv_error_files
list_files = get_files_from_error_csv('error_files_list.csv')

#Uncomment this line for the first insertion
#list_files = get_list_ids_files_in_path('/RAST/SPREADSHEET/')
list_files = reversed(list_files)
list_files_error = []
list_files_done = []
print((list_files))
for file_name in list_files:
    print(file_name)
    #Load taxonomy
    #try:
    path_file_genBank = cwd + '/RAST/GEN_BANK/' + file_name[:-3] + 'gbk'
    gen_bank_obj = Genbank_proteic_RAST(path_file_genBank)
    taxo = gen_bank_obj.get_taxonomy_array()
    print(gen_bank_obj.get_family())
    print(file_name)
    if len(taxo) == 7 or 'staphylococcus_aureus' in file_name.lower():

        #Test proteins from file
        path_file_xls = cwd + '/RAST/SPREADSHEET/' + file_name
        path_file_contig = cwd + '/RAST/CONTIGS/' + file_name[:-3] + 'contigs.fa'

        xls_obj = Xls_gen_bank(path_file_xls)

        value = check_file_exits(path_file_xls)

        contig_file_exist = check_file_exits(path_file_contig)
#this line takes the files from csv_error_files
list_files = get_files_from_error_csv('error_files_list.csv')

#Uncomment this line for the first insertion
#list_files = get_list_ids_files_in_path('/RAST/SPREADSHEET/')
list_files = reversed(list_files)
list_files_error = []
list_files_done = []
print((list_files))
for file_name in list_files:
    print(file_name)
    #Load taxonomy
    #try:
    path_file_genBank = cwd + '/RAST/GEN_BANK/' + file_name[:-3] + 'gbk'
    gen_bank_obj = Genbank_proteic_RAST(path_file_genBank)
    taxo = gen_bank_obj.get_taxonomy_array()
    print(gen_bank_obj.get_family())
    print(file_name)
    if len(taxo) == 7 or 'streptococcus_oralis' in file_name.lower():

        family_obj = None
        genus_obj = None
        specie_obj = None
        strain_obj = None
        if ('_phi' not in file_name.lower()
                or 'phage' not in file_name.lower()) and len(taxo) == 7:
            family_obj = Family(designation=gen_bank_obj.get_family())
            genus_obj = Genus(designation=gen_bank_obj.get_genus())
            specie_obj = Specie(designation=gen_bank_obj.get_specie())
            strain_obj = Strain(designation=gen_bank_obj.get_strain())
示例#3
0
print(xls_obj.get_number_of_proteins())

print("end")

############# TEST CONTIGS FASTA FILES ############# 

fasta_contigs_file = Fasta_contigs_RAST(cwd + '\RAST\\CONTIGS\\525717-Escherichia_coli_CFT079.contigs.fa')



sequence_contig_nucleic = fasta_contigs_file.get_contig_seq_by_id(listas[0])


############# TEST CONTIGS genbank FILES ############# 

genbank_file = Genbank_proteic_RAST(cwd + '\RAST\\GEN_BANK\\525717-Escherichia_coli_CFT079.gbk')


aaaa = genbank_file.get_definition_of_the_organism()
print(aaaa)
www = genbank_file.get_taxonomy_array()
print(www)

print(type(genbank_file.data_gen_bank))

print(list(genbank_file.data_gen_bank.keys())[0])

qty_contig = genbank_file.get_number_of_contigs()

print(genbank_file.get_family())