def main(args): # Set revcomp parameter. if args.r != 1: args.r = False elif args.r == 1 and args.alphabet != 'DNA': print("Error, the -r parameter can only be used in DNA.") elif args.r == 1 and args.alphabet == 'DNA': args.r = True # Set alphabet parameter. if args.alphabet == 'DNA': args.alphabet = index_list.DNA elif args.alphabet == 'RNA': args.alphabet = index_list.RNA elif args.alphabet == 'Protein': args.alphabet = index_list.PROTEIN res = make_kmer_vector(k=args.k, alphabet=args.alphabet, filename=args.inputfile, revcomp=args.r) # Write correspond res file. if args.f == 'svm': from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): #TODO:args.method will be finished #TODO:args.inputfile, name if args.alphabet == "RNA": if args.method.upper() == 'TRIPLET': res = get_triplet_matrix(args.inputfile) elif args.method.upper() == 'PSESSC': if args.k is None: print "parameters k is required. The default value of k is 2." args.k = 2 if args.r is None: print "parameters r is required. The default value of r is 2." args.r = 2 if args.w is None: print "parameters w is required. The default value of w is 0.1." args.w = 0.1 res = get_psessc_matrix(args.inputfile, args.k, args.r, args.w) elif args.method.upper() == 'PSEDPC': if args.n is None: print "parameters n is required. The default value of d is 0." args.n = 0 if args.r is None: print "parameters r is required. The default value of r is 2." args.r = 2 if args.w is None: print "parameters w is required. The default value of w is 0.1." args.w = 0.1 res = get_psedpc_matrix(args.inputfile, args.n, args.r, args.w) else: print("Method error!") else: print("sequence type error!") # Write correspond res file. if args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'svm': if args.multi == 0 and args.l is None: args.l = '+1' elif args.multi == 0 and (args.l != '+1' and args.l != '-1'): print "For binary classification, the label should be either '+1' or '-1'." return False elif args.multi == 1 and args.l is None: args.l = '0' elif args.multi == 1 and args.l is not None: try: label = int(args.l) except ValueError: print 'The labels should be integer.' return False from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): with open(args.inputfile) as f: k = read_k(args.alphabet, args.method, 0) # Get index_list. if args.i is not None: from pse import read_index ind_list = read_index(args.i) else: ind_list = [] default_e = [] # Set Pse default index_list. if args.alphabet == 'DNA': args.alphabet = index_list.DNA if k == 2: default_e = const.DI_INDS_6_DNA elif k == 3: default_e = const.TRI_INDS_DNA elif args.alphabet == 'RNA': args.alphabet = index_list.RNA default_e = const.DI_INDS_RNA elif args.alphabet == 'Protein': args.alphabet = index_list.PROTEIN default_e = const.INDS_3_PROTEIN theta_type = 1 if args.method in const.METHODS_AC: theta_type = 1 elif args.method in const.METHODS_CC: theta_type = 2 elif args.method in const.METHODS_ACC: theta_type = 3 else: print("Method error!") # ACC. if args.e is None and len(ind_list) == 0 and args.a is False: # Default Pse. res = acc(f, k, args.lag, default_e, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) else: res = acc(f, k, args.lag, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) # Write correspond res file. if args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'svm': from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): #TODO:args.method will be finished #TODO:args.inputfile, name if args.alphabet == "RNA": if args.method.upper() == 'TRIPLET': res = get_triplet_matrix(args.inputfile) elif args.method.upper() == 'PSESSC': if args.k is None: print "parameters k is required. The default value of k is 2." args.k = 2 if args.r is None: print "parameters r is required. The default value of r is 2." args.r = 2 if args.w is None: print "parameters w is required. The default value of w is 0.1." args.w = 0.1 res = get_psessc_matrix(args.inputfile, args.k, args.r, args.w) elif args.method.upper() == 'PSEDPC': if args.n is None: print "parameters n is required. The default value of d is 0." args.n = 0 if args.r is None: print "parameters r is required. The default value of r is 2." args.r = 2 if args.w is None: print "parameters w is required. The default value of w is 0.1." args.w = 0.1 res = get_psedpc_matrix(args.inputfile, args.n, args.r, args.w) else: print("Method error!") else: print("sequence type error!") # Write correspond res file. if args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'svm': from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): with open(args.inputfile) as f: # Get index_list. if args.i is not None: ind_list = read_index(args.i) else: ind_list = [] default_e = [] # Set Pse default index_list. if args.alphabet == 'DNA': args.alphabet = index_list.DNA if args.k == 2: default_e = const.DI_INDS_6_DNA elif args.k == 3: default_e = const.TRI_INDS_DNA elif args.alphabet == 'RNA': args.alphabet = index_list.RNA default_e = const.DI_INDS_RNA elif args.alphabet == 'Protein': args.alphabet = index_list.PROTEIN default_e = const.INDS_3_PROTEIN theta_type = 1 if args.method in const.THETA_1_METHODS: theta_type = 1 elif args.method in const.THETA_2_METHODS: theta_type = 2 elif args.method == 'PseKNC': theta_type = 3 else: print("Method error!") # PseKNC. if args.method != 'PseKNC': if args.e is None and len(ind_list) == 0 and args.a is False: # Default Pse. res = pseknc(f, args.k, args.w, args.lamada, default_e, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) else: res = pseknc(f, args.k, args.w, args.lamada, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) # iPseKNC. else: if args.e is None and len(ind_list) == 0 and args.a is False: # Default iPse. res = ipseknc(f, args.k, args.w, args.lamada, const.DI_INDS_6_DNA, args.alphabet, extra_index_file=args.e, all_prop=args.a) else: res = ipseknc(f, args.k, args.w, args.lamada, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a) # Write correspond res file. if args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'svm': from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): # Set revcomp parameter. if args.r != 1: args.r = False elif args.r == 1 and args.alphabet != 'DNA': print("Error, the -r parameter can only be used in DNA.") elif args.r == 1 and args.alphabet == 'DNA': args.r = True # Set alphabet parameter. if args.alphabet == 'DNA': args.alphabet = index_list.DNA elif args.alphabet == 'RNA': args.alphabet = index_list.RNA elif args.alphabet == 'Protein': args.alphabet = index_list.PROTEIN if args.method.upper() == 'KMER': if args.k is None: print "parameters k is required. The default value of k is 2." args.k = 2 if args.r is None: print "parameters r is required. The default value of r is 0." args.r = 0 res = make_kmer_vector(k=args.k, alphabet=args.alphabet, filename=args.inputfile, revcomp=args.r) elif args.method.upper() == 'IDKMER': if args.k is None: print "parameters k is required. The default value of k is 6." args.k = 6 if args.ps is None or args.ns is None: print 'The positive and the negative source files are required.' return False res = idkmer(k=args.k, filename=args.inputfile, pos_src_name=args.ps, neg_src_name=args.ns) elif args.method.upper() == "MISMATCH": if args.k is None: print "parameters k is required. The default value of k is 3." args.k = 3 if args.m is None: print "parameters m is required. The default value of m is 1." args.m = 1 if args.m >= args.k: print "parameters m should be less than parameter k." else: res = getMismatchProfileMatrix(args.inputfile, args.alphabet, args.k, args.m) elif args.method.upper() == "SUBSEQUENCE": if args.delta is None: print "parameters delta is required. The default value of delta is 1." args.delta = 1 elif args.delta > 1 or args.delta < 0: print "delta should be greater than or equal to 0 and less than or equal to 1." if args.k is None: print "parameters k is required. The default value of k is 3." args.k = 3 res = getSubsequenceProfileByParallel(filename=args.inputfile, alphabet=args.alphabet, k=args.k, delta=args.delta) elif args.method.upper() == 'DR': if args.alphabet != index_list.PROTEIN: print 'DR method is only available for Protein.' return False elif args.max_dis < 0 or args.max_dis > 10: print 'The max distance can not be negative integer and should be smaller than 11.' return False else: res = dr_method(inputfile=args.inputfile, max_dis=args.max_dis) print res elif args.method.upper() == 'DP': if args.alphabet != index_list.PROTEIN: print 'Distance Pair method is only available for Protein.' return False elif args.max_dis < 0 or args.max_dis > 10: print 'The max distance can not be negative integer and should be smaller than 11.' return False else: if args.cp == 'cp_13': reduce_alphabet_scheme = const.cp_13 elif args.cp == 'cp_14': reduce_alphabet_scheme = const.cp_14 elif args.cp == 'cp_19': reduce_alphabet_scheme = const.cp_19 elif args.cp == 'cp_20': reduce_alphabet_scheme = const.cp_20 res = get_pseaacdis_matrix(filename=args.inputfile, reduce_alphabet_scheme=reduce_alphabet_scheme, max_distance=args.max_dis, alphabet=args.alphabet) else: print("Method error!") # Write correspond res file. if args.f == 'svm': if args.multi == 0 and args.l is None: args.l = '+1' elif args.multi == 0 and (args.l != '+1' and args.l != '-1'): print "For binary classification, the label should be either '+1' or '-1'." return False elif args.multi == 1 and args.l is None: args.l = '0' elif args.multi == 1 and args.l is not None: try: label = int(args.l) except ValueError: print 'The labels should be integer.' return False from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): with open(args.inputfile) as f: k = read_k(args.alphabet, args.method, 0) # Get index_list. if args.i is not None: from pse import read_index ind_list = read_index(args.i) else: ind_list = [] default_e = [] # Set Pse default index_list. if args.alphabet == "DNA": args.alphabet = index_list.DNA if k == 2: default_e = const.DI_INDS_6_DNA elif k == 3: default_e = const.TRI_INDS_DNA elif args.alphabet == "RNA": args.alphabet = index_list.RNA default_e = const.DI_INDS_RNA elif args.alphabet == "Protein": args.alphabet = index_list.PROTEIN default_e = const.INDS_3_PROTEIN theta_type = 1 if args.method in const.METHODS_AC: theta_type = 1 elif args.method in const.METHODS_CC: theta_type = 2 elif args.method in const.METHODS_ACC: theta_type = 3 else: print("Method error!") # ACC. if args.e is None and len(ind_list) == 0 and args.a is False: # Default Pse. res = acc( f, k, args.lag, default_e, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type, ) else: res = acc( f, k, args.lag, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type ) # Write correspond res file. if args.f == "tab": from util import write_tab write_tab(res, args.outputfile) elif args.f == "svm": from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == "csv": from util import write_csv write_csv(res, args.outputfile)
def main(args): with open(args.inputfile) as f: if args.method.upper() not in ['MAC', 'GAC', 'NMBAC']: k = read_k(args.alphabet, args.method, 0) # Get index_list. if args.i is not None: from .pse import read_index ind_list = read_index(args.i) else: ind_list = [] default_e = [] # Set Pse default index_list. if args.alphabet == 'DNA': args.alphabet = index_list.DNA if k == 2: default_e = const.DI_INDS_6_DNA elif k == 3: default_e = const.TRI_INDS_DNA elif args.alphabet == 'RNA': args.alphabet = index_list.RNA default_e = const.DI_INDS_RNA elif args.alphabet == 'Protein': args.alphabet = index_list.PROTEIN default_e = const.INDS_3_PROTEIN theta_type = 1 if args.method in const.METHODS_AC: theta_type = 1 elif args.method in const.METHODS_CC: theta_type = 2 elif args.method in const.METHODS_ACC: theta_type = 3 else: print("Method error!") # ACC. if args.e is None and len(ind_list) == 0 and args.a is False: # Default Pse. res = acc(f, k, args.lag, default_e, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) else: res = acc(f, k, args.lag, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) if args.method.upper() in ['MAC', 'GAC', 'NMBAC']: if args.lamada < 0 or args.lamada > 10: print( 'The value of lamada should be larger than 0 and smaller than 10.' ) return False if args.a is None: args.a == False elif args.alphabet == 'DNA': args.alphabet = index_list.DNA if args.oli == 0: if args.a == True: res = autocorrelation(autoc=args.method, inputfile=args.inputfile, props=const.ALL_DI_DNA_IND, k=2, l=args.lamada, alphabet=args.alphabet) elif args.a == False: res = autocorrelation(autoc=args.method, inputfile=args.inputfile, props=const.DEFAULT_DI_DNA_IND, k=2, l=args.lamada, alphabet=args.alphabet) if args.oli == 1: if args.a == True: res = autocorrelation(autoc=args.method, inputfile=args.inputfile, props=const.ALL_TRI_DNA_IND, k=3, l=args.lamada, alphabet=args.alphabet) elif args.a == False: res = autocorrelation(autoc=args.method, inputfile=args.inputfile, props=const.DEFAULT_TRI_DNA_IND, k=3, l=args.lamada, alphabet=args.alphabet) elif args.alphabet == 'RNA': args.alphabet = index_list.RNA if args.a == True: res = autocorrelation(autoc=args.method, inputfile=args.inputfile, props=const.ALL_RNA_IND, k=2, l=args.lamada, alphabet=args.alphabet) elif args.a == False: res = autocorrelation(autoc=args.method, inputfile=args.inputfile, props=const.DEFAULT_RNA_IND, k=2, l=args.lamada, alphabet=args.alphabet) #print res # Write correspond res file. if args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'svm': if args.multi == 0 and args.l is None: args.l = '+1' elif args.multi == 0 and (args.l != '+1' and args.l != '-1'): print( "For binary classification, the label should be either '+1' or '-1'." ) return False elif args.multi == 1 and args.l is None: args.l = '0' elif args.multi == 1 and args.l is not None: try: label = int(args.l) except ValueError: print('The labels should be integer.') return False from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): with open(args.inputfile) as f: # Get index_list. if args.i is not None: ind_list = read_index(args.i) else: ind_list = [] default_e = [] # Set Pse default index_list. if args.alphabet == 'DNA': args.alphabet = index_list.DNA if args.k == 2: default_e = const.DI_INDS_6_DNA elif args.k == 3: default_e = const.TRI_INDS_DNA elif args.alphabet == 'RNA': args.alphabet = index_list.RNA default_e = const.DI_INDS_RNA elif args.alphabet == 'Protein': args.alphabet = index_list.PROTEIN default_e = const.INDS_3_PROTEIN theta_type = 1 if args.method in const.THETA_1_METHODS: theta_type = 1 elif args.method in const.THETA_2_METHODS: theta_type = 2 elif args.method == 'PseKNC': theta_type = 3 else: print("Method error!") # PseKNC. if args.method != 'PseKNC': if args.e is None and len(ind_list) == 0 and args.a is False: # Default Pse. res = pseknc(f, args.k, args.w, args.lamada, default_e, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) else: res = pseknc(f, args.k, args.w, args.lamada, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type) # iPseKNC. else: if args.e is None and len(ind_list) == 0 and args.a is False: # Default iPse. res = ipseknc(f, args.k, args.w, args.lamada, const.DI_INDS_6_DNA, args.alphabet, extra_index_file=args.e, all_prop=args.a) else: res = ipseknc(f, args.k, args.w, args.lamada, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a) # Write correspond res file. if args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'svm': if args.multi == 0 and args.l is None: args.l = '+1' elif args.multi == 0 and (args.l != '+1' and args.l != '-1'): print "For binary classification, the label should be either '+1' or '-1'." return False elif args.multi == 1 and args.l is None: args.l = '0' elif args.multi == 1 and args.l is not None: try: label = int(args.l) except ValueError: print 'The labels should be integer.' return False from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)
def main(args): # Set revcomp parameter. if args.r != 1: args.r = False elif args.r == 1 and args.alphabet != 'DNA': print("Error, the -r parameter can only be used in DNA.") elif args.r == 1 and args.alphabet == 'DNA': args.r = True # Set alphabet parameter. if args.alphabet == 'DNA': args.alphabet = index_list.DNA elif args.alphabet == 'RNA': args.alphabet = index_list.RNA elif args.alphabet == 'Protein': args.alphabet = index_list.PROTEIN if args.method.upper() == 'KMER': if args.k is None: print "parameters k is required. The default value of k is 2." args.k = 2 if args.r is None: print "parameters r is required. The default value of r is 0." args.r = 0 res = make_kmer_vector(k=args.k, alphabet=args.alphabet, filename=args.inputfile, revcomp=args.r) elif args.method.upper() == "MISMATCH": if args.k is None: print "parameters k is required. The default value of k is 3." args.k = 3 if args.m is None: print "parameters m is required. The default value of m is 1." args.m = 1 if args.m >= args.k: print "parameters m should be less than parameter k." else: res = getMismatchProfileMatrix(args.inputfile, args.alphabet, args.k, args.m) elif args.method.upper() == "SUBSEQUENCE": if args.delta is None: print "parameters delta is required. The default value of delta is 1." args.delta = 1 elif args.delta > 1 or args.delta < 0: print "delta should be greater than or equal to 0 and less than or equal to 1." if args.k is None: print "parameters k is required. The default value of k is 3." args.k = 3 res = getSubsequenceProfileByParallel(filename=args.inputfile, alphabet=args.alphabet, k=args.k, delta=args.delta) else: print("Method error!") # Write correspond res file. if args.f == 'svm': from util import write_libsvm write_libsvm(res, [args.l] * len(res), args.outputfile) elif args.f == 'tab': from util import write_tab write_tab(res, args.outputfile) elif args.f == 'csv': from util import write_csv write_csv(res, args.outputfile)