def generate(X, seqType, args): ''' # Reference-1: Z-Scale (iFeature) # Reference-2: https://pubs.acs.org/doi/suppl/10.1021/jm9700575/suppl_file/jm2481.pdf :param X: :param seqType: :param args: :return: ''' if seqType == 'PROT': d = { 'A': [0.24, -2.32, 0.60, -0.14, 1.30], # A 'C': [0.84, -1.67, 3.71, 0.18, -2.65], # C 'D': [3.98, 0.93, 1.93, -2.46, 0.75], # D 'E': [3.11, 0.26, -0.11, -0.34, -0.25], # E 'F': [-4.22, 1.94, 1.06, 0.54, -0.62], # F 'G': [2.05, -4.06, 0.36, -0.82, -0.38], # G 'H': [2.47, 1.95, 0.26, 3.90, 0.09], # H 'I': [-3.89, -1.73, -1.71, -0.84, 0.26], # I 'K': [2.29, 0.89, -2.49, 1.49, 0.31], # K 'L': [-4.28, -1.30, -1.49, -0.72, 0.84], # L 'M': [-2.85, -0.22, 0.47, 1.94, -0.98], # M 'N': [3.05, 1.62, 1.04, -1.15, 1.61], # N 'P': [-1.66, 0.27, 1.84, 0.70, 2.00], # P 'Q': [1.75, 0.50, -1.44, -1.34, 0.66], # Q 'R': [3.52, 2.50, -3.50, 1.99, -0.17], # R 'S': [2.39, -1.07, 1.15, -1.39, 0.67], # S 'T': [0.75, -2.18, -1.12, -1.46, -0.40], # T 'V': [-2.59, -2.64, -1.54, -0.85, -0.02], # V 'W': [-4.36, 3.94, 0.59, 3.44, -1.59], # W 'Y': [-2.54, 2.44, 0.43, 0.04, -1.47], # Y 'p': [0, 0, 0, 0, 0], # p } else: if seqType == 'DNA' or seqType == 'RNA': print( CRED + 'Error: The \'Physicochemical Properties-P4\' feature is NOT applicable for DNA/RNA.' + CEND) return None else: return None #end-if X = utils.processMono(X, d, args) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': None else: if seqType == 'PROT': totalFeature = 5 else: None # end-if save.datasetSave(X, totalFeature, 'pcpP4')
def generate(X, seqType, args): ''' # Reference: Spider-2 :param X: :param seqType: :param args: :return: ''' if seqType == 'PROT': d = { 'A': [-0.350, -0.680, -0.677, -0.171, -0.170, 0.900, -0.476], 'C': [-0.140, -0.329, -0.359, 0.508, -0.114, -0.652, 0.476], 'D': [-0.213, -0.417, -0.281, -0.767, -0.900, -0.155, -0.635], 'E': [-0.230, -0.241, -0.058, -0.696, -0.868, 0.900, -0.582], 'F': [0.363, 0.373, 0.412, 0.646, -0.272, 0.155, 0.318], 'G': [-0.900, -0.900, -0.900, -0.342, -0.179, -0.900, -0.900], 'H': [0.384, 0.110, 0.138, -0.271, 0.195, -0.031, -0.106], 'I': [0.900, -0.066, -0.009, 0.652, -0.186, 0.155, 0.688], 'K': [-0.088, 0.066, 0.163, -0.889, 0.727, 0.279, -0.265], 'L': [0.213, -0.066, -0.009, 0.596, -0.186, 0.714, -0.053], 'M': [0.110, 0.066, 0.087, 0.337, -0.262, 0.652, -0.001], 'N': [-0.213, -0.329, -0.243, -0.674, -0.075, -0.403, -0.529], 'P': [0.247, -0.900, -0.294, 0.055, -0.010, -0.900, 0.106], 'Q': [-0.230, -0.110, -0.020, -0.464, -0.276, 0.528, -0.371], 'R': [0.105, 0.373, 0.466, -0.900, 0.900, 0.528, -0.371], 'S': [-0.337, -0.637, -0.544, -0.364, -0.265, -0.466, -0.212], 'T': [0.402, -0.417, -0.321, -0.199, -0.288, -0.403, 0.212], 'V': [0.677, -0.285, -0.232, 0.331, -0.191, -0.031, 0.900], 'W': [0.479, 0.900, 0.900, 0.900, -0.209, 0.279, 0.529], 'Y': [0.363, 0.417, 0.541, 0.188, -0.274, -0.155, 0.476], 'p': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], } else: if seqType == 'DNA' or seqType == 'RNA': print( CRED + 'Error: The \'Physicochemical Properties-P5\' feature is NOT applicable for DNA/RNA.' + CEND) return None else: return None #end-if X = utils.processMono(X, d, args) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': None else: if seqType == 'PROT': totalFeature = 7 else: None # end-if save.datasetSave(X, totalFeature, 'pcpP5')
def generate(X, seqType, args): ''' # Reference: It is a very common feature. :param X: :param seqType: :param args: :return: ''' if seqType == 'DNA': d = { 'A': [0, 5, 5, 1], 'C': [5, 1, 0, 5], 'G': [1, 5, 5, 0], 'T': [5, 0, 1, 5], 'p': [0, 0, 0, 0], # padding } else: if seqType == 'RNA': d = { 'A': [0, 5, 5, 1], 'C': [5, 1, 0, 5], 'G': [1, 5, 5, 0], 'U': [5, 0, 1, 5], 'p': [0, 0, 0, 0], # padding } else: if seqType == 'PROT': print(CRED + 'Error: The \'Transition-Transversion\' feature is NOT applicable for PROT.' + CEND) return None else: None #end-if # print(X) X = utils.processMono(X, d, args) # print(X.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': totalFeature = 4 else: if seqType == 'PROT': None else: None # end-if save.datasetSave(X, totalFeature, 'tt') #end-def
def generate(X, seqType, args): ''' # Reference-1: http://rosalind.info/glossary/blosum62/ # Reference-2: https://www.pnas.org/content/89/22/10915/ :param X: :param seqType: :param args: :return: ''' if seqType == 'PROT': d = { 'A': [ 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2 ], 'C': [ 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2 ], 'D': [ -2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3 ], 'E': [ -1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2 ], 'F': [ -2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3 ], 'G': [ 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3 ], 'H': [ -2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2 ], 'I': [ -1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1 ], 'K': [ -1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2 ], 'L': [ -1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1 ], 'M': [ -1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1 ], 'N': [ -2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2 ], 'P': [ -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3 ], 'Q': [ -1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1 ], 'R': [ -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2 ], 'S': [ 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2 ], 'T': [ 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2 ], 'V': [ 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1 ], 'W': [ -3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2 ], 'Y': [ -2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7 ], 'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], } else: if seqType == 'DNA' or seqType == 'RNA': print( CRED + 'Error: The \'BLOSUM62\' feature is NOT applicable for DNA/RNA.' + CEND) return None else: return None #end-if X = utils.processMono(X, d, args) # print(X.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': None else: if seqType == 'PROT': totalFeature = 20 else: None # end-if save.datasetSave(X, totalFeature, 'blosum62')
def generate(X, seqType, args): ''' # Reference: https://www.biorxiv.org/content/10.1101/332171v2.full.pdf (Supp: Table-5) :param X: :param seqType: :param args: :return: ''' # Column1 --> Acidic : D, E # Column2 --> Basic : R, H, K # Column3 --> Aromatic side chain : Y, F, W # Column4 --> Aliphatic side chain: I, L, V, A, G # Column5 --> Cyclic : P # Column6 --> Sulfur containing : M, C # Column7 --> Hydroxyl containing : S, T # Column8 --> Acidic amide : Q, N # [1, 0, 0, 0, 0, 0, 0, 0], #Acidic # [0, 1, 0, 0, 0, 0, 0, 0], #Basic # [0, 0, 1, 0, 0, 0, 0, 0], #Aromatic side chain # [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain # [0, 0, 0, 0, 1, 0, 0, 0], #Cyclic # [0, 0, 0, 0, 0, 1, 0, 0], #Sulfur containing # [0, 0, 0, 0, 0, 0, 1, 0], #Hydroxyl containing # [0, 0, 0, 0, 0, 0, 0, 1], #Acidic amide # [0, 0, 0, 0, 0, 0, 0, 0], #padding if seqType == 'PROT': d = { 'A': [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain 'C': [0, 0, 0, 0, 0, 1, 0, 0], #Sulfur containing 'D': [1, 0, 0, 0, 0, 0, 0, 0], #Acidic 'E': [1, 0, 0, 0, 0, 0, 0, 0], #Acidic 'F': [0, 0, 1, 0, 0, 0, 0, 0], #Aromatic side chain 'G': [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain 'H': [0, 1, 0, 0, 0, 0, 0, 0], #Basic 'I': [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain 'K': [0, 1, 0, 0, 0, 0, 0, 0], #Basic 'L': [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain 'M': [0, 0, 0, 0, 0, 1, 0, 0], #Sulfur containing 'N': [0, 0, 0, 0, 0, 0, 0, 1], #Acidic amide 'P': [0, 0, 0, 0, 1, 0, 0, 0], #Cyclic 'Q': [0, 0, 0, 0, 0, 0, 0, 1], #Acidic amide 'R': [0, 1, 0, 0, 0, 0, 0, 0], #Basic 'S': [0, 0, 0, 0, 0, 0, 1, 0], #Hydroxyl containing 'T': [0, 0, 0, 0, 0, 0, 1, 0], #Hydroxyl containing 'V': [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain 'W': [0, 0, 1, 0, 0, 0, 0, 0], #Aromatic side chain 'Y': [0, 0, 1, 0, 0, 0, 0, 0], #Aromatic side chain 'p': [0, 0, 0, 0, 0, 0, 0, 0], #padding } else: if seqType == 'DNA' or seqType == 'RNA': print( CRED + 'Error: The \'Physicochemical Properties-P3\' feature is NOT applicable for DNA/RNA.' + CEND) return None else: return None #end-if X = utils.processMono(X, d, args) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': None else: if seqType == 'PROT': totalFeature = 8 else: None # end-if save.datasetSave(X, totalFeature, 'pcpP3')
def generate(X, seqType, args): ''' # Reference: https://doi.org/10.1016/j.omtn.2019.04.025 (It is also called "identity matrix".) :param X: :param seqType: :param args: :return: ''' if seqType == 'DNA': d = { 'A': [1, 0, 0, 0], 'C': [0, 1, 0, 0], 'G': [0, 0, 1, 0], 'T': [0, 0, 0, 1], 'p': [0, 0, 0, 0], # padding } else: if seqType == 'RNA': d = { 'A': [1, 0, 0, 0], 'C': [0, 1, 0, 0], 'G': [0, 0, 1, 0], 'U': [0, 0, 0, 1], 'p': [0, 0, 0, 0], # padding } else: if seqType == 'PROT': # Protein/Peptide One-Zero Encoding d = { 'A': [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'C': [ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'D': [ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'E': [ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'F': [ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'G': [ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'H': [ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'I': [ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'K': [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'L': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'M': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], 'N': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 ], 'P': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 ], 'Q': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 ], 'R': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 ], 'S': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 ], 'T': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 ], 'V': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 ], 'W': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 ], 'Y': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 ], 'p': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], # padding } else: None #end-if # print(X) X = utils.processMono(X, d, args) # print(X.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': totalFeature = 4 else: if seqType == 'PROT': totalFeature = 20 else: None # end-if save.datasetSave(X, totalFeature, 'bpf') #end-def
def generate(X, seqType, args): ''' # Reference: https://doi.org/10.1093/bioinformatics/bty451 (Supp: Table S2) :param X: :param seqType: :param args: :return: ''' # Column1 --> Aromatic: {F, Y, W, H} # Column2 --> Negative: {D, E} # Column3 --> Positive: {K, H, R} # Column4 --> Polar: {N, Q, S, D, E, C, T, K, R, H, Y, W} # Column5 --> Hydrophobic: {A, G, C, T, I, V, L, K, H, F, Y, W, M} # Column6 --> Aliphatic: {I, V, L} # Column7 --> Tiny: {A, S, G, C} # Column8 --> Charged: {K, H, R, D, E} # Column9 --> Small: {P, N, D, T, C, A, G, S, V} # Column10 --> Proline: {P} if seqType == 'PROT': d = { 'A': [0, 0, 0, 0, 1, 0, 1, 0, 1, 0], 'R': [0, 0, 1, 1, 0, 0, 0, 1, 0, 0], 'N': [0, 0, 0, 1, 0, 0, 0, 0, 1, 0], 'D': [0, 1, 0, 1, 0, 0, 0, 1, 1, 0], 'C': [0, 0, 0, 1, 1, 0, 1, 0, 1, 0], 'Q': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], 'E': [0, 1, 0, 1, 0, 0, 0, 1, 0, 0], 'G': [0, 0, 0, 0, 1, 0, 1, 0, 1, 0], 'H': [1, 0, 1, 1, 1, 0, 0, 1, 0, 0], 'I': [0, 0, 0, 0, 1, 1, 0, 0, 0, 0], 'L': [0, 0, 0, 0, 1, 1, 0, 0, 0, 0], 'K': [0, 0, 1, 1, 1, 0, 0, 1, 0, 0], 'M': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], 'F': [1, 0, 0, 0, 1, 0, 0, 0, 0, 0], 'P': [0, 0, 0, 0, 0, 0, 0, 0, 1, 1], 'S': [0, 0, 0, 1, 0, 0, 1, 0, 1, 0], 'T': [0, 0, 0, 1, 1, 0, 0, 0, 1, 0], 'W': [1, 0, 0, 1, 1, 0, 0, 0, 0, 0], 'Y': [1, 0, 0, 1, 1, 0, 0, 0, 0, 0], 'V': [0, 0, 0, 0, 1, 1, 0, 0, 1, 0], 'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], } else: if seqType == 'DNA' or seqType == 'RNA': print(CRED+'Error: The \'Physicochemical Properties-P1\' feature is NOT applicable for DNA/RNA.'+CEND) return None else: return None #end-if X = utils.processMono(X, d, args) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': None else: if seqType == 'PROT': totalFeature = 10 else: None # end-if save.datasetSave(X, totalFeature, 'pcpP1')
def generate(X, seqType, args): ''' # Reference-1: http://rosalind.info/glossary/pam250/ # Reference-2: http://profs.scienze.univr.it/~liptak/ALBioinfo/2011_2012/files/pam1.pdf (M.O. Dayhoff et al.) :param X: :param seqType: :param args: :return: ''' if seqType == 'PROT': d = { 'A': [ 2, -2, 0, 0, -3, 1, -1, -1, -1, -2, -1, 0, 1, 0, -2, 1, 1, 0, -6, -3 ], 'C': [ -2, 12, -5, -5, -4, -3, -3, -2, -5, -6, -5, -4, -3, -5, -4, 0, -2, -2, -8, 0 ], 'D': [ 0, -5, 4, 3, -6, 1, 1, -2, 0, -4, -3, 2, -1, 2, -1, 0, 0, -2, -7, -4 ], 'E': [ 0, -5, 3, 4, -5, 0, 1, -2, 0, -3, -2, 1, -1, 2, -1, 0, 0, -2, -7, -4 ], 'F': [ -3, -4, -6, -5, 9, -5, -2, 1, -5, 2, 0, -3, -5, -5, -4, -3, -3, -1, 0, 7 ], 'G': [ 1, -3, 1, 0, -5, 5, -2, -3, -2, -4, -3, 0, 0, -1, -3, 1, 0, -1, -7, -5 ], 'H': [ -1, -3, 1, 1, -2, -2, 6, -2, 0, -2, -2, 2, 0, 3, 2, -1, -1, -2, -3, 0 ], 'I': [ -1, -2, -2, -2, 1, -3, -2, 5, -2, 2, 2, -2, -2, -2, -2, -1, 0, 4, -5, -1 ], 'K': [ -1, -5, 0, 0, -5, -2, 0, -2, 5, -3, 0, 1, -1, 1, 3, 0, 0, -2, -3, -4 ], 'L': [ -2, -6, -4, -3, 2, -4, -2, 2, -3, 6, 4, -3, -3, -2, -3, -3, -2, 2, -2, -1 ], 'M': [ -1, -5, -3, -2, 0, -3, -2, 2, 0, 4, 6, -2, -2, -1, 0, -2, -1, 2, -4, -2 ], 'N': [ 0, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 2, 0, 1, 0, 1, 0, -2, -4, -2 ], 'P': [ 1, -3, -1, -1, -5, 0, 0, -2, -1, -3, -2, 0, 6, 0, 0, 1, 0, -1, -6, -5 ], 'Q': [ 0, -5, 2, 2, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4, 1, -1, -1, -2, -5, -4 ], 'R': [ -2, -4, -1, -1, -4, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6, 0, -1, -2, 2, -4 ], 'S': [ 1, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2, 1, -1, -2, -3 ], 'T': [ 1, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, 0, -5, -3 ], 'V': [ 0, -2, -2, -2, -1, -1, -2, 4, -2, 2, 2, -2, -1, -2, -2, -1, 0, 4, -6, -2 ], 'W': [ -6, -8, -7, -7, 0, -7, -3, -5, -3, -2, -4, -4, -6, -5, 2, -2, -5, -6, 17, 0 ], 'Y': [ -3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3, -3, -2, 0, 10 ], 'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], } else: if seqType == 'DNA' or seqType == 'RNA': print( CRED + 'Error: The \'PAM250\' feature is NOT applicable for DNA/RNA.' + CEND) return None else: return None #end-if X = utils.processMono(X, d, args) # print(X.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': None else: if seqType == 'PROT': totalFeature = 20 else: None # end-if save.datasetSave(X, totalFeature, 'pam250')
def generate(X, seqType, args): ''' # Reference: https://doi.org/10.1093/bioinformatics/bty451 (Supp: Table S3) :param X: :param seqType: :param args: :return: ''' ### Group-1: # Column1 --> Hydrophobicity: {A, C, F, G, H, I, L, M, N, P, Q, S, T, V, W, Y} # Column2 --> Normalized Van der Waals volume: {C, F, I, L, M, V, W} # Column3 --> Polarity: {A, C, D, G, P, S, T} # Column4 --> Polarizibility: {C, F, I, L, M, V, W, Y} # Column5 --> Charge: {A, D, G, S, T} # Column6 --> Secondary structures: {D, G, N, P, S} # Column7 --> Solvent accessibility: {A, C, F, G, I, L, V, W} ### Group-2: # Column8 --> Hydrophobicity: {D, E} # Column9 --> Normalized Van der Waals volume: {A, G, H, P, S, T, Y} # Column10 --> Polarity: {E, I, L, N, Q, V} # Column11 --> Polarizibility: {A, G, P, S, T} # Column12 --> Charge: {C, E, I, L, N, P, Q, V} # Column13 --> Secondary structures: {A, E, H, K, L, M, Q, R} # Column14 --> Solvent accessibility: {H, M, P, S, T, Y} ### Group-3: # Column15 --> Hydrophobicity: {K, R} # Column16 --> Normalized Van der Waals volume: {D, E, K, N, Q, R} # Column17 --> Polarity: {F, H, K, M, R, W, Y} # Column18 --> Polarizibility: {D, E, H, K, N, Q, R} # Column19 --> Charge: {F, H, K, M, R, W, Y} # Column20 --> Secondary structures: {C, F, I, T, V, W, Y} # Column21 --> Solvent accessibility: {D, E, K, N, R, Q} if seqType == 'PROT': d = { 'A': [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0], 'R': [0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1], 'N': [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1], 'D': [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1], 'C': [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0], 'Q': [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1], 'E': [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1], 'G': [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0], 'H': [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0], 'I': [1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0], 'L': [1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0], 'K': [0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1], 'M': [ 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0 ], 'F': [ 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0 ], 'P': [ 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0 ], 'S': [ 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0 ], 'T': [ 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0 ], 'W': [ 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0 ], 'Y': [ 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0 ], 'V': [ 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 ], 'p': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], } else: if seqType == 'DNA' or seqType == 'RNA': print( CRED + 'Error: The \'Physicochemical Properties-P2\' feature is NOT applicable for DNA/RNA.' + CEND) return None else: return None #end-if X = utils.processMono(X, d, args) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': None else: if seqType == 'PROT': totalFeature = 21 else: None # end-if save.datasetSave(X, totalFeature, 'pcpP2')