from Bio import SeqIO from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV, cross_val_score, KFold import numpy as np import csv from featuresetup_module import transcript_info, transcript_info_dict from sklearn.externals import joblib from sklearn import preprocessing import datetime from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_selection import RFECV #from collections import Counter from treeinterpreter import treeinterpreter as ti hsapiens_info, hsapiens_dict, hsapiens_names = transcript_info_dict( "../data/training_files/h_sapiens_random4500.fa", "../data/training_files/h_sapiens_random4500.cpat.txt", "../data/training_files/h_sapiens_random4500.fa.tab") print("imported human info") atha_info, atha_dict, atha_names = transcript_info_dict( "../data/training_files/a_thaliana_random4500.fa", "../data/training_files/a_thaliana_random4500.cpat.txt", "../data/training_files/a_thaliana_random4500.fa.tab") lncRNA_info, lncRNA_dict, lncRNA_names = trans_info_dict_cc( "../data/training_files/all_lncRNA_nodup.fa", "../data/training_files/all_lncRNA_nodup.humantrained.cpat.txt", "../data/training_files/all_lncRNA_nodup.fa.tab", 'ips1_arabidopsisthaliana_1') print("imported lncRNA info") wanted_keys = [
from Bio import SeqIO from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV, cross_val_score, KFold import numpy as np import csv from featuresetup_module import transcript_info, transcript_info_dict from sklearn.externals import joblib from sklearn import preprocessing import datetime from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_selection import RFECV #from collections import Counter from treeinterpreter import treeinterpreter as ti hsapiens_info, hsapiens_dict, hsapiens_names = transcript_info_dict( "../data/training_files/h_sapiens_random3000.fa", "../data/training_files/h_sapiens_random3000.cpat.txt", "../data/training_files/h_sapiens_random3000.fa.tab") print("imported human info") mmus_info, mmus_dict, mmus_names = transcript_info_dict( "../data/training_files/m_musculus_random1000.fa", "../data/training_files/m_musculus_random1000.cpat.txt", "../data/training_files/m_musculus_random1000.fa.tab") print("imported mouse info") osat_info, osat_dict, osat_names = transcript_info_dict( "../data/training_files/o_sativa_random3000.fa", "../data/training_files/o_sativa_random3000.cpat.txt", "../data/training_files/o_sativa_random3000.fa.tab") print("imported rice info") #lncRNA_info, lncRNA_dict, lncRNA_names = transcript_info_dict("fasta_files/subset_lncrna_V2.fa","fasta_files/subset_lncrna_v2.cpat.txt", "fasta_files/subset_lncrna_V2.fa.tab") #lncRNA_info, lncRNA_dict, lncRNA_names = trans_info_dict_cc("fasta_files/all_lncRNA_nodup.fa","fasta_files/all_lncRNA_nodup.humantrained.cpat.txt", "fasta_files/all_lncRNA_nodup.fa.tab") lncRNA_info, lncRNA_dict, lncRNA_names = trans_info_dict(
clf2 = joblib.load( os.path.join(CURRENT_DIR + '/../updated_gb_models/model2.pkl')) clf3 = joblib.load( os.path.join(CURRENT_DIR + '/../updated_gb_models/model3.pkl')) clf4 = joblib.load( os.path.join(CURRENT_DIR + '/../updated_gb_models/model4.pkl')) clf5 = joblib.load( os.path.join(CURRENT_DIR + '/../updated_gb_models/model5.pkl')) clf6 = joblib.load( os.path.join(CURRENT_DIR + '/../updated_gb_models/model6.pkl')) clf7 = joblib.load( os.path.join(CURRENT_DIR + '/../updated_gb_models/model7.pkl')) clf8 = joblib.load( os.path.join(CURRENT_DIR + '/../updated_gb_models/model8.pkl')) trans_info, trans_dict, trans_names = transcript_info_dict( args.trans_fasta, args.cpat_out, args.diam_out) key_order = [ "align_perc_len", "align_perc_ORF", "align_length", "ORF", "length", "GC", "fickett", "hexamer", "identity" ] # to keep this order trans_dict_order = { gene: {feature: trans_dict[gene][feature] for feature in key_order} for gene in trans_names } # changed this from trans_dict to trans_dict_order trans_array = np.array([[ trans_dict_order[gene][feature]
from Bio import SeqIO from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV, cross_val_score, KFold import numpy as np import csv from featuresetup_module import transcript_info, transcript_info_dict from sklearn.externals import joblib from sklearn import preprocessing import datetime from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_selection import RFECV #from collections import Counter from treeinterpreter import treeinterpreter as ti hsapiens_info, hsapiens_dict, hsapiens_names = transcript_info_dict( "../data/training_files/h_sapiens_random2000.fa", "../data/training_files/h_sapiens_random2000.cpat.txt", "../data/training_files/h_sapiens_random2000.fa.tab") print("imported human info") mmus_info, mmus_dict, mmus_names = transcript_info_dict( "../data/training_files/m_musculus_random1000.fa", "../data/training_files/m_musculus_random1000.cpat.txt", "../data/training_files/m_musculus_random1000.fa.tab") print("imported mouse info") #print("imported rice info") atha_info, atha_dict, atha_names = transcript_info_dict( "../data/training_files/a_thaliana_random3000.fa", "../data/training_files/a_thaliana_random3000.cpat.txt", "../data/training_files/a_thaliana_random3000.fa.tab") print("imported arabidopsis info") lncRNA_info, lncRNA_dict, lncRNA_names = transcript_info_dict(
from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV, cross_val_score, KFold import numpy as np import csv from featuresetup_module import transcript_info, transcript_info_dict from sklearn.externals import joblib from sklearn import preprocessing import datetime from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_selection import RFECV #from collections import Counter from treeinterpreter import treeinterpreter as ti import featuresetup_module hsapiens_info, hsapiens_dict, hsapiens_names = transcript_info_dict( "../data/training_files/h_sapiens_random3000.fa", "../data/training_files/h_sapiens_random3000.cpat.txt", "../data/training_files/h_sapiens_random3000.fa.tab") print("imported human info") mmus_info, mmus_dict, mmus_names = transcript_info_dict( "../data/training_files/m_musculus_random1000.fa", "../data/training_files/m_musculus_random1000.cpat.txt", "../data/training_files/m_musculus_random1000.fa.tab") print("imported mouse info") osat_info, osat_dict, osat_names = transcript_info_dict( "../data/training_files/o_sativa_random3000.fa", "../data/training_files/o_sativa_random3000.cpat.txt", "../data/training_files/o_sativa_random3000.fa.tab") print("imported rice info") #lncRNA_info, lncRNA_dict, lncRNA_names = transcript_info_dict("fasta_files/subset_lncrna_V2.fa","fasta_files/subset_lncrna_v2.cpat.txt", "fasta_files/subset_lncrna_V2.fa.tab") #lncRNA_info, lncRNA_dict, lncRNA_names = trans_info_dict_cc("fasta_files/all_lncRNA_nodup.fa","fasta_files/all_lncRNA_nodup.humantrained.cpat.txt", "fasta_files/all_lncRNA_nodup.fa.tab") lncRNA_info, lncRNA_dict, lncRNA_names = transcript_info_dict(
from Bio import SeqIO from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV, cross_val_score, KFold import numpy as np import csv from featuresetup_module import transcript_info, transcript_info_dict from sklearn.externals import joblib from sklearn import preprocessing import datetime from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_selection import RFECV #from collections import Counter from treeinterpreter import treeinterpreter as ti hsapiens_info, hsapiens_dict, hsapiens_names = transcript_info_dict( "../data/training_files/old_files/hsapiens_random_3000.fasta", "../data/training_files/old_files/hsapiens_random_3000.fasta.cpat.txt", "../data/training_files/old_files/hsapiens_random_3000.fasta.tab") print("imported human info") atha_info, atha_dict, atha_names = transcript_info_dict( "../data/training_files/old_files/arabidopsis_random_3000.fasta", "../data/training_files/old_files/arabidopsis_random_3000.cpat.txt", "../data/training_files/old_files/arabidopsis_random_3000.fasta.tab") lncRNA_info, lncRNA_dict, lncRNA_names = trans_info_dict_cc( "../data/training_files/all_lncRNA_nodup.fa", "../data/training_files/all_lncRNA_nodup.humantrained.cpat.txt", "../data/training_files/all_lncRNA_nodup.fa.tab", 'ips1_arabidopsisthaliana_1') print("imported lncRNA info") wanted_keys = [
from Bio import SeqIO from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV, cross_val_score, KFold import numpy as np import csv from featuresetup_module import transcript_info, transcript_info_dict from sklearn.externals import joblib from sklearn import preprocessing import datetime from sklearn.ensemble import GradientBoostingClassifier from sklearn.feature_selection import RFECV #from collections import Counter from treeinterpreter import treeinterpreter as ti hsapiens_info, hsapiens_dict, hsapiens_names = transcript_info_dict( "../data/training_files/hsapiens_random_3000.fasta", "../data/training_files/hsapiens_random_3000.fasta.cpat.txt", "../data/training_files/hsapiens_random_3000.fasta.tab") print("imported human info") atha_info, atha_dict, atha_names = transcript_info_dict( "../data/training_files/arabidopsis_random_3000.fasta", "../data/training_files/arabidopsis_random_3000.cpat.txt", "../data/training_files/arabidopsis_random_3000.fasta.tab") print("imported arabidopsis info") lncRNA_info, lncRNA_dict, lncRNA_names = transcript_info_dict( "../data/training_files/all_lncRNA_nodup.fa", "../data/training_files/all_lncRNA_nodup.humantrained.cpat.txt", "../data/training_files/all_lncRNA_nodup.fa.tab") print("imported lncRNA info") wanted_keys = [