def pseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1): """This is a complete process in PseKNC. :param k: int, the value of k-tuple. :param phyche_list: list, the input physicochemical properties list. :param extra_index_file: a file path includes the user-defined phyche_index. :param all_prop: bool, choose all physicochemical properties or not. """ phyche_list = get_phyche_list(k, phyche_list, extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop) # Get phyche_vals. if alphabet == index_list.DNA or alphabet == index_list.RNA: if extra_index_file is not None: extra_phyche_index = get_extra_index(extra_index_file) from util import normalize_index phyche_vals = get_phyche_value(k, phyche_list, alphabet, normalize_index(extra_phyche_index, alphabet, is_convert_dict=True)) else: phyche_vals = get_phyche_value(k, phyche_list, alphabet) elif alphabet == index_list.PROTEIN: phyche_vals = get_aaindex(phyche_list) if extra_index_file is not None: phyche_vals.extend(extend_aaindex(extra_index_file)) seq_list = get_data(input_data, alphabet) return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type)
def ipseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False): """This is a complete process in iPseKNC, k is kmer, but the index is just for dinucleotide. :param k: int, the value of k-tuple. :param phyche_list: list, the input physicochemical properties list. :param extra_index_file: a file path includes the user-defined phyche_index. :param all_prop: bool, choose all physicochemical properties or not. """ phyche_list = get_phyche_list(k=2, phyche_list=phyche_list, extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop) # Get phyche_vals. if extra_index_file is not None: extra_phyche_index = get_extra_index(extra_index_file) from util import normalize_index phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet, extra_phyche_index=normalize_index(extra_phyche_index, alphabet, is_convert_dict=True)) else: phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet) seq_list = get_data(input_data, alphabet) return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type=3)
def acc(input_data, k, lag, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1): """This is a complete acc in PseKNC. :param k: int, the value of k-tuple. :param phyche_list: list, the input physicochemical properties list. :param extra_index_file: a file path includes the user-defined phyche_index. :param all_prop: bool, choose all physicochemical properties or not. :param theta_type: the value 1, 2 and 3 for ac, cc or acc. """ phyche_list = get_phyche_list(k, phyche_list, extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop) # print(phyche_list) # Get phyche_vals. if alphabet == index_list.DNA or alphabet == index_list.RNA: if extra_index_file is not None: extra_phyche_index = get_extra_index(extra_index_file) from util import normalize_index phyche_vals = get_phyche_value( k, phyche_list, alphabet, normalize_index(extra_phyche_index, alphabet, is_convert_dict=True)) else: phyche_vals = get_phyche_value(k, phyche_list, alphabet) elif alphabet == index_list.PROTEIN: phyche_vals = get_aaindex(phyche_list) # print(phyche_vals) if extra_index_file is not None: phyche_vals.extend(extend_aaindex(extra_index_file)) seqs = get_data(input_data, alphabet) if alphabet == index_list.PROTEIN: # Transform the data format to dict {acid: [phyche_vals]}. phyche_keys = phyche_vals[0].index_dict.keys() phyche_vals = [e.index_dict.values() for e in phyche_vals] new_phyche_vals = zip(*[e for e in phyche_vals]) phyche_vals = { key: list(val) for key, val in zip(phyche_keys, new_phyche_vals) } if theta_type == 1: return make_ac_vec(seqs, lag, phyche_vals, k) elif theta_type == 2: return make_cc_vec(seqs, lag, phyche_vals, k) elif theta_type == 3: return make_acc_vec(seqs, lag, phyche_vals, k)
def acc(input_data, k, lag, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1): """This is a complete acc in PseKNC. :param k: int, the value of k-tuple. :param phyche_list: list, the input physicochemical properties list. :param extra_index_file: a file path includes the user-defined phyche_index. :param all_prop: bool, choose all physicochemical properties or not. :param theta_type: the value 1, 2 and 3 for ac, cc or acc. """ phyche_list = get_phyche_list( k, phyche_list, extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop ) # print(phyche_list) # Get phyche_vals. if alphabet == index_list.DNA or alphabet == index_list.RNA: if extra_index_file is not None: extra_phyche_index = get_extra_index(extra_index_file) from util import normalize_index phyche_vals = get_phyche_value( k, phyche_list, alphabet, normalize_index(extra_phyche_index, alphabet, is_convert_dict=True) ) else: phyche_vals = get_phyche_value(k, phyche_list, alphabet) elif alphabet == index_list.PROTEIN: phyche_vals = get_aaindex(phyche_list) # print(phyche_vals) if extra_index_file is not None: phyche_vals.extend(extend_aaindex(extra_index_file)) seqs = get_data(input_data, alphabet) if alphabet == index_list.PROTEIN: # Transform the data format to dict {acid: [phyche_vals]}. phyche_keys = phyche_vals[0].index_dict.keys() phyche_vals = [e.index_dict.values() for e in phyche_vals] new_phyche_vals = zip(*[e for e in phyche_vals]) phyche_vals = {key: list(val) for key, val in zip(phyche_keys, new_phyche_vals)} if theta_type == 1: return make_ac_vec(seqs, lag, phyche_vals, k) elif theta_type == 2: return make_cc_vec(seqs, lag, phyche_vals, k) elif theta_type == 3: return make_acc_vec(seqs, lag, phyche_vals, k)
]] print('Begin PseDNC') psednc = PseDNC() vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0])) psednc = PseDNC(lamada=2, w=0.1) vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0])) vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'], extra_phyche_index=normalize_index( phyche_index, is_convert_dict=True)) print(vec) print(len(vec[0])) print('Begin PseKNC') pseknc = PseKNC() vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0])) pseknc = PseKNC(k=2, lamada=1, w=0.05) vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0]))
if seq.no != 1: print("Error, the basic function get_data5") error = True # Normalization of physicochemical index. from util import normalize_index phyche_index = [[ 0.026, 0.036, 0.031, 0.033, 0.016, 0.026, 0.014, 0.031, 0.025, 0.025, 0.026, 0.036, 0.017, 0.025, 0.016, 0.026 ], [ 0.038, 0.038, 0.037, 0.036, 0.025, 0.042, 0.026, 0.037, 0.038, 0.036, 0.042, 0.038, 0.018, 0.038, 0.025, 0.038 ]] if normalize_index(phyche_index) \ != [[0.06, 1.5, 0.78, 1.07, -1.38, 0.06, -1.66, 0.78, -0.08, -0.08, 0.06, 1.5, -1.23, -0.08, -1.38, 0.06], [0.5, 0.5, 0.36, 0.22, -1.36, 1.08, -1.22, 0.36, 0.5, 0.22, 1.08, 0.5, -2.37, 0.5, -1.36, 0.5]]: print("Error, the basic function normalize_index") error = True print("Basic function test end!") # ###################################################################################### # Nucleic acid Composition # Basic kmer from nac import Kmer kmer = Kmer(k=2) if kmer.make_kmer_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) \
if seq.seq != 'GACTGAACTGCACTTTGGTTTCATATTATTTGCTC': print("Error, the basic function get_data4") error = True if seq.no != 1: print("Error, the basic function get_data5") error = True # Normalization of physicochemical index. from util import normalize_index phyche_index = [ [0.026, 0.036, 0.031, 0.033, 0.016, 0.026, 0.014, 0.031, 0.025, 0.025, 0.026, 0.036, 0.017, 0.025, 0.016, 0.026], [0.038, 0.038, 0.037, 0.036, 0.025, 0.042, 0.026, 0.037, 0.038, 0.036, 0.042, 0.038, 0.018, 0.038, 0.025, 0.038]] if normalize_index(phyche_index) \ != [[0.06, 1.5, 0.78, 1.07, -1.38, 0.06, -1.66, 0.78, -0.08, -0.08, 0.06, 1.5, -1.23, -0.08, -1.38, 0.06], [0.5, 0.5, 0.36, 0.22, -1.36, 1.08, -1.22, 0.36, 0.5, 0.22, 1.08, 0.5, -2.37, 0.5, -1.36, 0.5]]: print("Error, the basic function normalize_index") error = True print("Basic function test end!") # ###################################################################################### # Nucleic acid Composition # Basic kmer from nac import Kmer kmer = Kmer(k=2) if kmer.make_kmer_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) \
self.k = 2 check_psenac(self.lamada, self.w, self.k) def make_psednc_vec(self, input_data, phyche): sequence_list = get_data(input_data) #sequence_list, phyche_value = get_sequence_list_and_phyche_value_psednc(input_data, extra_phyche_index) phyche_value = phyche from psenacutil import make_pseknc_vector vector = make_pseknc_vector(sequence_list, self.lamada, self.w, self.k, phyche_value, theta_type=1) return vector phy = pd.read_csv('phy.csv', header=-1, index_col=None) phyche_index = np.array(phy) phyche_index_dict = normalize_index(phyche_index, is_convert_dict=True) psednc = PseDNC(lamada=23, w=0.05) vec = psednc.make_psednc_vec(open('S_data.txt'), phyche=phyche_index_dict) #print(len(vec[0])) feature = np.array(vec) data_new = np.matrix(feature) data_PseDNC = pd.DataFrame(data=data_new) data_PseDNC.to_csv('PseDNC_S.csv')
-0.654, 0.567, 1.019]] print('Begin PseDNC') psednc = PseDNC() vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0])) psednc = PseDNC(lamada=2, w=0.1) vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0])) vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'], extra_phyche_index=normalize_index(phyche_index, is_convert_dict=True)) print(vec) print(len(vec[0])) print('Begin PseKNC') pseknc = PseKNC() vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0])) pseknc = PseKNC(k=2, lamada=1, w=0.05) vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) print(vec) print(len(vec[0]))