Exemplo n.º 1
0
def computePOIM(x,y,poim_degree,kernel_degree,savepath):
    feats_train  = StringCharFeatures(x,DNA)
    labels = BinaryLabels(y);
    kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, kernel_degree)
    C=1
    svm = LibSVM(C, kernel, labels)    
    svm.train()
    tally=len(x[0])
    ma = poim.compute_poims(svm,kernel,poim_degree,tally)
    fobj = open(savepath,'wb')
    pickle.dump(ma,fobj)
    fobj.close()
    return ma  
Exemplo n.º 2
0
def computePOIM(x, y, poim_degree, kernel_degree, savepath):

    feats_train = StringCharFeatures(x, DNA)
    labels = BinaryLabels(np.array(y))
    print "compute kernel matrix"
    kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, kernel_degree)
    C = 1
    svm = LibSVM(C, kernel, labels)
    print "train support vector machine"
    svm.train()
    pdb.set_trace()
    tally = len(x[0])
    print "compute poim"
    ma = poim.compute_poims(svm, kernel, poim_degree, tally)
    fobj = open(savepath, "wb")
    pickle.dump(ma, fobj)
    fobj.close()
    return ma
Exemplo n.º 3
0
def svm_poim(argv):
    """A top level script to parse input parameters and plot poims"""

    assert(argv[1]=='poim')
    if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles  poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1)

    # parse input parameters
    C = float(argv[2])
    poimdegree = int(argv[3])
    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False)
    (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest)
    
    if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<3:
        if argv_rest[-1] == 'dna':
            sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
            sys.exit(-1)
        elif argv_rest[-1] == 'protein':
            sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
            sys.exit(-1)
        else:
            sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
            sys.exit(-1)
    if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
    poimfilename = argv_rest[0]
    seq_source = argv_rest[1]
    nuc_con = argv_rest[2]

    utils.check_params(kparam, C, len(examples[0]))

    # train svm and compute POIMs
    (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con)
    print "done with training "
    (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0]))

    # plot poims
    plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))
Exemplo n.º 4
0
def svm_poim(argv):
    """A top level script to parse input parameters and plot poims"""

    assert(argv[1]=='poim')
    if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles  poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1)

    # parse input parameters
    C = float(argv[2])
    poimdegree = int(argv[3])
    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False)
    (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest)
    
    if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<3:
        if argv_rest[-1] == 'dna':
            sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
            sys.exit(-1)
        elif argv_rest[-1] == 'protein':
            sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
            sys.exit(-1)
        else:
            sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
            sys.exit(-1)
    if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
    poimfilename = argv_rest[0]
    seq_source = argv_rest[1]
    nuc_con = argv_rest[2]

    utils.check_params(kparam, C, len(examples[0]))

    # train svm and compute POIMs
    (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con)
    (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0]))

    # plot poims
    plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))