示例#1
0
def main(args):
    with open(args.inputfile) as f:
        k = read_k(args.alphabet, args.method, 0)

        # Get index_list.
        if args.i is not None:
            from pse import read_index
            ind_list = read_index(args.i)
        else:
            ind_list = []

        default_e = []
        # Set Pse default index_list.
        if args.alphabet == 'DNA':
            args.alphabet = index_list.DNA
            if k == 2:
                default_e = const.DI_INDS_6_DNA
            elif k == 3:
                default_e = const.TRI_INDS_DNA
        elif args.alphabet == 'RNA':
            args.alphabet = index_list.RNA
            default_e = const.DI_INDS_RNA
        elif args.alphabet == 'Protein':
            args.alphabet = index_list.PROTEIN
            default_e = const.INDS_3_PROTEIN

        theta_type = 1
        if args.method in const.METHODS_AC:
            theta_type = 1
        elif args.method in const.METHODS_CC:
            theta_type = 2
        elif args.method in const.METHODS_ACC:
            theta_type = 3
        else:
            print("Method error!")

        # ACC.
        if args.e is None and len(ind_list) == 0 and args.a is False:
            # Default Pse.
            res = acc(f, k, args.lag, default_e, args.alphabet,
                      extra_index_file=args.e, all_prop=args.a, theta_type=theta_type)
        else:
            res = acc(f, k, args.lag, ind_list, args.alphabet,
                      extra_index_file=args.e, all_prop=args.a, theta_type=theta_type)

    # Write correspond res file.
    if args.f == 'tab':
        from util import write_tab
        write_tab(res, args.outputfile)
    elif args.f == 'svm':
        from util import write_libsvm
        write_libsvm(res, [args.l] * len(res), args.outputfile)
    elif args.f == 'csv':
        from util import write_csv
        write_csv(res, args.outputfile)
示例#2
0
                            "Protein: Hydrophobicity, Hydrophilicity, Mass.")
    parse.add_argument('-e', help="The user-defined indices file.\n")
    parse.add_argument('-all_index', dest='a', action='store_true', help="Choose all physicochemical indices")
    parse.add_argument('-no_all_index', dest='a', action='store_false',
                       help="Do not choose all physicochemical indices, default.")
    parse.set_defaults(a=False)
    parse.add_argument('-f', default='tab', choices=['tab', 'svm', 'csv'],
                       help="The output format (default = tab).\n"
                            "tab -- Simple format, delimited by TAB.\n"
                            "svm -- The libSVM training data format.\n"
                            "csv -- The format that can be loaded into a spreadsheet program.")
    parse.add_argument('-l', default='+1', choices=['+1', '-1'],
                       help="The libSVM output file label.")

    args = parse.parse_args()
    args.k = read_k(args.alphabet, args.method, args.k)

    # print(args)
    if check_args(args, 'pse.py'):
        print("Calculating...")
    start_time = time.time()
    main(args)
    print("Done.")
    print("Used time: %ss" % (time.time() - start_time))

    # Test dna type1.
    # print("Test di_dna, type1.")
    # alphabet = index_list.DNA
    # res = pseknc(input_data=['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'], k=2, w=0.5, lamada=1,
    # phyche_list=['Tilt', 'Roll', 'Rise', 'Slide', 'Shift'],
    # extra_index_file="data/test_ext_dna.txt", alphabet=alphabet)
示例#3
0
def main(args):
    """The main process of autocorrelation methods.
    :param args: an object of the arguments.
    """
    file_list = args.inputfiles

    label_list = args.labels
    output_format = args.f
    if len(file_list) == 0:
        print('Input files not found.')
        return False
    if output_format == 'svm' and len(label_list) == 0:
        print('The labels of the input files should be set.')
        return False
    if output_format == 'svm' and len(file_list) != len(label_list):
        print(
            'The number of labels should be the same as that of the input files.'
        )
        return False

    if args.out is not None:
        outputfile_list = args.out
        if len(outputfile_list) != len(file_list):
            print(
                'The number of output files should be the same as that of input files.'
            )
            return False
    elif args.out is None:
        outputfile_list = []
        if output_format == 'svm':
            for in_file_name in file_list:
                file_elem_list = list(os.path.splitext(in_file_name))
                out_name = file_elem_list[0] + '_svm' + file_elem_list[1]
                outputfile_list.append(out_name)
        elif output_format == 'tab':
            for in_file_name in file_list:
                file_elem_list = list(os.path.splitext(in_file_name))
                out_name = file_elem_list[0] + '_tab' + file_elem_list[1]
                outputfile_list.append(out_name)
        elif output_format == 'csv':
            for in_file_name in file_list:
                file_elem_list = list(os.path.splitext(in_file_name))
                out_name = file_elem_list[0] + '_csv' + file_elem_list[1]
                outputfile_list.append(out_name)
    if output_format != 'svm':
        label_list = [0] * len(file_list)

    if args.method.upper() not in ['MAC', 'GAC', 'NMBAC', 'PDT']:
        for input_file, output_file, label in zip(file_list, outputfile_list,
                                                  label_list):
            with open(input_file) as f:
                k = read_k(args.alphabet, args.method, 0)

                # Get index_list.
                if args.i is not None:
                    from pse import read_index
                    ind_list = read_index(args.i)
                else:
                    ind_list = []

                default_e = []
                # Set Pse default index_list.
                if args.alphabet == 'DNA':
                    alphabet_list = index_list.DNA
                    if k == 2:
                        default_e = const.DI_INDS_6_DNA
                    elif k == 3:
                        default_e = const.TRI_INDS_DNA
                elif args.alphabet == 'RNA':
                    alphabet_list = index_list.RNA
                    default_e = const.DI_INDS_RNA
                elif args.alphabet == 'Protein':
                    alphabet_list = index_list.PROTEIN
                    default_e = const.INDS_3_PROTEIN
                else:
                    print('The alphabet should be DNA, RNA or Protein.')
                    return False

                theta_type = 1
                if args.method in const.METHODS_AC:
                    theta_type = 1
                elif args.method in const.METHODS_CC:
                    theta_type = 2
                elif args.method in const.METHODS_ACC:
                    theta_type = 3
                else:
                    print("Method error!")

            # ACC.

                if args.e is None and len(ind_list) == 0 and args.a is False:
                    # Default Pse.
                    res = acc(f,
                              k,
                              args.lag,
                              default_e,
                              alphabet_list,
                              extra_index_file=args.e,
                              all_prop=args.a,
                              theta_type=theta_type)
                else:
                    res = acc(f,
                              k,
                              args.lag,
                              ind_list,
                              alphabet_list,
                              extra_index_file=args.e,
                              all_prop=args.a,
                              theta_type=theta_type)
            write_to_file(res, output_format, label, output_file)

    if args.method.upper() in ['MAC', 'GAC', 'NMBAC']:
        if args.lamada < 0 or args.lamada > 10:
            print(
                'The value of lamada should be larger than 0 and smaller than 10.'
            )
            return False
        if args.a is None:
            args.a == False
        elif args.alphabet == 'DNA':
            args.alphabet = index_list.DNA
            if args.oli == 0:
                if args.a == True:
                    for input_file, output_file, label in zip(
                            file_list, outputfile_list, label_list):
                        res = autocorrelation(autoc=args.method,
                                              inputfile=input_file,
                                              props=const.ALL_DI_DNA_IND,
                                              k=2,
                                              l=args.lamada,
                                              alphabet=args.alphabet)
                        write_to_file(res, output_format, label, output_file)
                elif args.a == False:
                    for input_file, output_file, label in zip(
                            file_list, outputfile_list, label_list):
                        res = autocorrelation(autoc=args.method,
                                              inputfile=input_file,
                                              props=const.DEFAULT_DI_DNA_IND,
                                              k=2,
                                              l=args.lamada,
                                              alphabet=args.alphabet)
                        write_to_file(res, output_format, label, output_file)
            if args.oli == 1:
                if args.a == True:
                    for input_file, output_file, label in zip(
                            file_list, outputfile_list, label_list):
                        res = autocorrelation(autoc=args.method,
                                              inputfile=input_file,
                                              props=const.ALL_TRI_DNA_IND,
                                              k=3,
                                              l=args.lamada,
                                              alphabet=args.alphabet)
                        write_to_file(res, output_format, label, output_file)
                elif args.a == False:
                    for input_file, output_file, label in zip(
                            file_list, outputfile_list, label_list):
                        res = autocorrelation(autoc=args.method,
                                              inputfile=input_file,
                                              props=const.DEFAULT_TRI_DNA_IND,
                                              k=3,
                                              l=args.lamada,
                                              alphabet=args.alphabet)
                        write_to_file(res, output_format, label, output_file)
        elif args.alphabet == 'RNA':
            args.alphabet = index_list.RNA
            if args.a == True:
                for input_file, output_file, label in zip(
                        file_list, outputfile_list, label_list):
                    res = autocorrelation(autoc=args.method,
                                          inputfile=input_file,
                                          props=const.ALL_RNA_IND,
                                          k=2,
                                          l=args.lamada,
                                          alphabet=args.alphabet)
                    write_to_file(res, output_format, label, output_file)
            elif args.a == False:
                for input_file, output_file, label in zip(
                        file_list, outputfile_list, label_list):
                    res = autocorrelation(autoc=args.method,
                                          inputfile=input_file,
                                          props=const.DEFAULT_RNA_IND,
                                          k=2,
                                          l=args.lamada,
                                          alphabet=args.alphabet)
                    write_to_file(res, output_format, label, output_file)

    if args.method.upper() == 'PDT':
        if args.alphabet != 'Protein':
            print('PDT method is only available for Protein sequences.')
            return False
        else:
            if args.lamada < 1 or args.lamada > 15:
                print(
                    'The value of -lamada should be larger than 0 and smaller than 16.'
                )
                return False
            else:
                for input_file, output_file, label in zip(
                        file_list, outputfile_list, label_list):
                    res = pdt(input_file, args.lamada)
                    write_to_file(res, output_format, label, output_file)

    if len(outputfile_list) != 0:
        for index, output_file in enumerate(outputfile_list):
            out_with_full_path = os.path.abspath(output_file)
            if os.path.isfile(out_with_full_path):
                if index == 0:
                    print('The output file(s) can be found here:')
                print(out_with_full_path)
示例#4
0
def main(args):
    with open(args.inputfile) as f:
        k = read_k(args.alphabet, args.method, 0)

        # Get index_list.
        if args.i is not None:
            from pse import read_index

            ind_list = read_index(args.i)
        else:
            ind_list = []

        default_e = []
        # Set Pse default index_list.
        if args.alphabet == "DNA":
            args.alphabet = index_list.DNA
            if k == 2:
                default_e = const.DI_INDS_6_DNA
            elif k == 3:
                default_e = const.TRI_INDS_DNA
        elif args.alphabet == "RNA":
            args.alphabet = index_list.RNA
            default_e = const.DI_INDS_RNA
        elif args.alphabet == "Protein":
            args.alphabet = index_list.PROTEIN
            default_e = const.INDS_3_PROTEIN

        theta_type = 1
        if args.method in const.METHODS_AC:
            theta_type = 1
        elif args.method in const.METHODS_CC:
            theta_type = 2
        elif args.method in const.METHODS_ACC:
            theta_type = 3
        else:
            print("Method error!")

        # ACC.
        if args.e is None and len(ind_list) == 0 and args.a is False:
            # Default Pse.
            res = acc(
                f,
                k,
                args.lag,
                default_e,
                args.alphabet,
                extra_index_file=args.e,
                all_prop=args.a,
                theta_type=theta_type,
            )
        else:
            res = acc(
                f, k, args.lag, ind_list, args.alphabet, extra_index_file=args.e, all_prop=args.a, theta_type=theta_type
            )

    # Write correspond res file.
    if args.f == "tab":
        from util import write_tab

        write_tab(res, args.outputfile)
    elif args.f == "svm":
        from util import write_libsvm

        write_libsvm(res, [args.l] * len(res), args.outputfile)
    elif args.f == "csv":
        from util import write_csv

        write_csv(res, args.outputfile)
示例#5
0
        action='store_false',
        help="Do not choose all physicochemical indices, default.")
    parse.set_defaults(a=False)
    parse.add_argument(
        '-f',
        default='tab',
        choices=['tab', 'svm', 'csv'],
        help="The output format (default = tab).\n"
        "tab -- Simple format, delimited by TAB.\n"
        "svm -- The libSVM training data format.\n"
        "csv -- The format that can be loaded into a spreadsheet program.")

    parse.add_argument(
        '-labels',
        nargs='*',
        help="The labels of the input files.\n"
        "For binary classification problem, the labels can only be '+1' or '-1'.\n"
        "For multiclass classification problem, the labels can be set as a list of integers."
    )

    args = parse.parse_args()
    args.k = read_k(args.alphabet, args.method, args.k)

    # print(args)
    if check_args(args, 'pse.py'):
        print("Calculating...")
    start_time = time.time()
    main(args)
    print("Done.")
    print("Used time: %.2fs" % (time.time() - start_time))
示例#6
0
def main(args):
    with open(args.inputfile) as f:
        if args.method.upper() not in ['MAC', 'GAC', 'NMBAC']:
            k = read_k(args.alphabet, args.method, 0)

            # Get index_list.
            if args.i is not None:
                from .pse import read_index
                ind_list = read_index(args.i)
            else:
                ind_list = []

            default_e = []
            # Set Pse default index_list.
            if args.alphabet == 'DNA':
                args.alphabet = index_list.DNA
                if k == 2:
                    default_e = const.DI_INDS_6_DNA
                elif k == 3:
                    default_e = const.TRI_INDS_DNA
            elif args.alphabet == 'RNA':
                args.alphabet = index_list.RNA
                default_e = const.DI_INDS_RNA
            elif args.alphabet == 'Protein':
                args.alphabet = index_list.PROTEIN
                default_e = const.INDS_3_PROTEIN

            theta_type = 1
            if args.method in const.METHODS_AC:
                theta_type = 1
            elif args.method in const.METHODS_CC:
                theta_type = 2
            elif args.method in const.METHODS_ACC:
                theta_type = 3
            else:
                print("Method error!")

        # ACC.

            if args.e is None and len(ind_list) == 0 and args.a is False:
                # Default Pse.
                res = acc(f,
                          k,
                          args.lag,
                          default_e,
                          args.alphabet,
                          extra_index_file=args.e,
                          all_prop=args.a,
                          theta_type=theta_type)
            else:
                res = acc(f,
                          k,
                          args.lag,
                          ind_list,
                          args.alphabet,
                          extra_index_file=args.e,
                          all_prop=args.a,
                          theta_type=theta_type)

    if args.method.upper() in ['MAC', 'GAC', 'NMBAC']:
        if args.lamada < 0 or args.lamada > 10:
            print(
                'The value of lamada should be larger than 0 and smaller than 10.'
            )
            return False
        if args.a is None:
            args.a == False
        elif args.alphabet == 'DNA':
            args.alphabet = index_list.DNA
            if args.oli == 0:
                if args.a == True:
                    res = autocorrelation(autoc=args.method,
                                          inputfile=args.inputfile,
                                          props=const.ALL_DI_DNA_IND,
                                          k=2,
                                          l=args.lamada,
                                          alphabet=args.alphabet)
                elif args.a == False:
                    res = autocorrelation(autoc=args.method,
                                          inputfile=args.inputfile,
                                          props=const.DEFAULT_DI_DNA_IND,
                                          k=2,
                                          l=args.lamada,
                                          alphabet=args.alphabet)
            if args.oli == 1:
                if args.a == True:
                    res = autocorrelation(autoc=args.method,
                                          inputfile=args.inputfile,
                                          props=const.ALL_TRI_DNA_IND,
                                          k=3,
                                          l=args.lamada,
                                          alphabet=args.alphabet)
                elif args.a == False:
                    res = autocorrelation(autoc=args.method,
                                          inputfile=args.inputfile,
                                          props=const.DEFAULT_TRI_DNA_IND,
                                          k=3,
                                          l=args.lamada,
                                          alphabet=args.alphabet)
        elif args.alphabet == 'RNA':
            args.alphabet = index_list.RNA
            if args.a == True:
                res = autocorrelation(autoc=args.method,
                                      inputfile=args.inputfile,
                                      props=const.ALL_RNA_IND,
                                      k=2,
                                      l=args.lamada,
                                      alphabet=args.alphabet)
            elif args.a == False:
                res = autocorrelation(autoc=args.method,
                                      inputfile=args.inputfile,
                                      props=const.DEFAULT_RNA_IND,
                                      k=2,
                                      l=args.lamada,
                                      alphabet=args.alphabet)
        #print res

    # Write correspond res file.
    if args.f == 'tab':
        from util import write_tab
        write_tab(res, args.outputfile)
    elif args.f == 'svm':
        if args.multi == 0 and args.l is None:
            args.l = '+1'
        elif args.multi == 0 and (args.l != '+1' and args.l != '-1'):
            print(
                "For binary classification, the label should be either '+1' or '-1'."
            )
            return False
        elif args.multi == 1 and args.l is None:
            args.l = '0'
        elif args.multi == 1 and args.l is not None:
            try:
                label = int(args.l)
            except ValueError:
                print('The labels should be integer.')
                return False
        from util import write_libsvm
        write_libsvm(res, [args.l] * len(res), args.outputfile)
    elif args.f == 'csv':
        from util import write_csv
        write_csv(res, args.outputfile)
示例#7
0
def main(args):
	
    names=[]
    with open(args.inputfile) as af:
        seq_list=read_fasta(af)
        for e in seq_list:
            names.append(e.name)
    #kmer.py -f tab -l +1 -r 1 -k ',num2str(k),' TAIR10_DHSs.fas TAIR10_DHSs_reckmer_',num2str(k),'.txt DNA'
    res_kmer = make_kmer_vector(k=2, alphabet=index_list.DNA, filename=args.inputfile, revcomp=True)  
    #acc.py -e user_indices.txt -f svm -l +1 -lag ',num2str(lag),' TAIR10_DHSs.fas TAIR10_DHSs_dac_',num2str(lag),'.txt DNA DAC'
        if args.s==0:
        model_file='pDHSdata_TAIR_model.txt'
        lag=3
    else:
        model_file='pDHSdata_TIGR_model.txt'
        lag=8
    with open(args.inputfile) as f:
        k = read_k('DNA', 'DAC', 0)
        ind_list=[]
        res_acc = acc(f, k, lag, ind_list, index_list.DNA, extra_index_file='user_indices.txt', all_prop=False, theta_type=1)
    # features= revckmer+dac,formed by add each row
    res=[]
    for i in range(len(res_kmer)):
        res.append(res_kmer[i]+res_acc[i])
    featuresfile=args.inputfile+'_tmp_features.txt'
    # Write correspond res file.
    from util import write_libsvm
    write_libsvm(res, ['+1'] * len(res), featuresfile)

    #predict the result
    tmp_predict_result_file=args.inputfile+'_tmp_result.txt'
    if sys.platform == 'win32':
        options='svm-predict -b 1 -q '+featuresfile+' '+model_file+' '+ tmp_predict_result_file
    else:
        options='./svm-predict -b 1 -q '+featuresfile+' '+model_file+' '+ tmp_predict_result_file
    os.system(options)
    pf=open(args.outputfile,'w')
    with open(tmp_predict_result_file) as nf:
            label, TrueProb, FalseProb= '', '',''
            count = 0
            while True:
                line = nf.readline().strip()
                if not line:
                    break
                if count>len(names):
                    break
                if 0==count:
                     pf.write('ID\t\tLabel\t\tProb\n')
                     count+=1
                     continue
                label=int(line.split()[0])
                TrueProb=line.split()[1]
                FalseProb=line.split()[2]
                if label==-1:
                    pf.write(names[count-1]+'\t\t'+'Non DHS'+'\t\t'+str(FalseProb)+'\n')
                else:                
                    pf.write(names[count-1]+'\t\t'+'DHS'+'\t\t'+str(TrueProb)+'\n')
                count+=1
    pf.close()
    cwd = os.getcwd()
    files = [x for x in os.listdir(os.getcwd()) if os.path.isfile(os.path.join(cwd,x))]
    #print files 
    for file in files:
        if -1 != file.find('tmp'):
            os.remove(file)
示例#8
0
def GetVariousClassFeatures(samples_file, path):
    isExists = os.path.exists(path)
    if not isExists:
        os.makedirs(path)
    fp = open(samples_file, 'r')
    sample = GetSequences(fp, 'ACGT')
    instances = array(sample)
    print('The number of samples: %d' % (len(sample)))

    # 1 Spectrum Profile for k=1,2,3,4,5
    for k in range(1, 6):
        tic = time.clock()
        X = GetSpectrumProfile(k, samples_file)
        np.savetxt(path + str(k) + '-SpectrumProfile.txt', X)
        toc = time.clock()
        print('Coding time for ' + str(k) + '-Spectrum Profile:%.3f minutes' % ((toc - tic) / 60))

    # 2 Mismatch Profile for (k,m)=(3,1),(4,1),(5,1)
    for (k, m) in [(3, 1), (4, 1), (5, 1)]:
        tic = time.clock()
        X = GetMismatchProfile(instances, alphabet, k, m)
        np.savetxt(path + str((k, m)) + '-MismatchProfile.txt', X)
        toc = time.clock()
        print('Coding time for ' + str((k, m)) + '-Mismatch Profile:%.3f minutes' % ((toc - tic) / 60))

    # 3 Reverse Compliment Kmer for k=1,2,3,4,5
    for k in range(1, 6):
        tic = time.clock()
        X = GetRevcKmer(k)
        np.savetxt(path + str(k) + '-RevcKmer.txt', X)
        toc = time.clock()
        print('Coding time for ' + str(k) + '-RevcKmer:%.3f minutes' % ((toc - tic) / 60))

    # 4 Parallel Correlation Pseudo Dinucleotide Composition
    tic = time.clock()
    X = GetPCPseDNC(3, 0.9)  #(2, 0.2)
    np.savetxt(path + 'PCPseDNC.txt', X)
    toc = time.clock()
    print('Coding time for PCPseDNC:%.3f minutes' % ((toc - tic) / 60))

    # 5 Parallel Correlation Pseudo Trinucleotide Composition
    tic = time.clock()
    X = GetPCPseTNC(3, 0.5)  #(6, 0.1)
    np.savetxt(path + 'PCPseTNC.txt', X)
    toc = time.clock()
    print('Coding time for PCPseTNC:%.3f minutes' % ((toc - tic) / 60))

    # 6 Series Correlation Pseudo Dinucleotide Composition
    tic = time.clock()
    X = GetSCPseDNC(5, 0.1)  #(1, 0.1)
    np.savetxt(path + 'SCPseDNC.txt', X)
    toc = time.clock()
    print('Coding time for SCPseDNC:%.3f minutes' % ((toc - tic) / 60))

    # 7 Series Correlation Pseudo Trinucleotide Composition
    tic = time.clock()
    X = GetSCPseTNC(10, 0.1)  #(6, 0.1)
    np.savetxt(path + 'SCPseTNC.txt', X)
    toc = time.clock()
    print('Coding time for SCPseTNC:%.3f minutes' % ((toc - tic) / 60))

    # 8 Dinucleotide-based auto covariance
    tic = time.clock()
    k = read_k('DNA', 'DAC', 0)
    # X = GetDAC(instances, k, 3, alphabet)
    X = GetDAC(instances, k, 8, alphabet)
    np.savetxt(path + 'DAC.txt', X)
    toc = time.clock()
    print('Coding time for DAC:%.3f minutes' % ((toc - tic) / 60))

    f0 = open((path + 'SpectrumProfile.txt'), 'w+')
    f1 = open((path + '1-SpectrumProfile.txt'), 'r')
    f2 = open((path + '2-SpectrumProfile.txt'), 'r')
    f3 = open((path + '3-SpectrumProfile.txt'), 'r')
    f4 = open((path + '4-SpectrumProfile.txt'), 'r')
    f5 = open((path + '5-SpectrumProfile.txt'), 'r')
    while True:
        F = []
        s1 = f1.readline()
        s1 = s1.strip('\n')
        F.extend(s1 + ' ')

        s2 = f2.readline()
        s2 = s2.strip('\n')
        F.extend(s2 + ' ')

        s3 = f3.readline()
        s3 = s3.strip('\n')
        F.extend(s3 + ' ')

        s4 = f4.readline()
        s4 = s4.strip('\n')
        F.extend(s4 + ' ')

        s5 = f5.readline()
        if not s5:
            break
        s5 = s5.strip('\n')
        F.extend(s5 + ' ')
        result = ''.join(F)
        f0.write(result + '\n')
    f1.close()
    f2.close()
    f3.close()
    f4.close()
    f5.close()
    f0.close()

    f0 = open((path + 'MismatchProfile.txt'), 'w+')
    f1 = open((path + '(3, 1)-MismatchProfile.txt'), 'r')
    f2 = open((path + '(4, 1)-MismatchProfile.txt'), 'r')
    f3 = open((path + '(5, 1)-MismatchProfile.txt'), 'r')
    while True:
        F = []
        s1 = f1.readline()
        s1 = s1.strip('\n')
        F.extend(s1 + ' ')

        s2 = f2.readline()
        s2 = s2.strip('\n')
        F.extend(s2 + ' ')

        s3 = f3.readline()
        if not s3:
            break
        s3 = s3.strip('\n')
        F.extend(s3 + ' ')
        result = ''.join(F)
        f0.write(result + '\n')
    f1.close()
    f2.close()
    f3.close()
    f0.close()

    f0 = open((path + 'RevcKmer.txt'), 'w+')
    f1 = open((path + '1-RevcKmer.txt'), 'r')
    f2 = open((path + '2-RevcKmer.txt'), 'r')
    f3 = open((path + '3-RevcKmer.txt'), 'r')
    f4 = open((path + '4-RevcKmer.txt'), 'r')
    f5 = open((path + '5-RevcKmer.txt'), 'r')
    while True:
        F = []
        s1 = f1.readline()
        s1 = s1.strip('\n')
        F.extend(s1 + ' ')

        s2 = f2.readline()
        s2 = s2.strip('\n')
        F.extend(s2 + ' ')

        s3 = f3.readline()
        s3 = s3.strip('\n')
        F.extend(s3 + ' ')

        s4 = f4.readline()
        s4 = s4.strip('\n')
        F.extend(s4 + ' ')

        s5 = f5.readline()
        if not s5:
            break
        s5 = s5.strip('\n')
        F.extend(s5 + ' ')
        result = ''.join(F)
        f0.write(result + '\n')
    f1.close()
    f2.close()
    f3.close()
    f4.close()
    f5.close()
    f0.close()

    f0 = open((path + 'Pse.txt'), 'w+')
    f1 = open((path + 'PCPseDNC.txt'), 'r')
    f2 = open((path + 'PCPseTNC.txt'), 'r')
    f3 = open((path + 'SCPseDNC.txt'), 'r')
    f4 = open((path + 'SCPseTNC.txt'), 'r')
    while True:
        F = []
        s1 = f1.readline()
        s1 = s1.strip('\n')
        F.extend(s1 + ' ')

        s2 = f2.readline()
        s2 = s2.strip('\n')
        F.extend(s2 + ' ')

        s3 = f3.readline()
        s3 = s3.strip('\n')
        F.extend(s3 + ' ')

        s4 = f4.readline()
        if not s4:
            break
        s4 = s4.strip('\n')
        F.extend(s4 + ' ')
        result = ''.join(F)
        f0.write(result + '\n')
    f1.close()
    f2.close()
    f3.close()
    f4.close()
    f0.close()