def svm_poim(argv): """A top level script to parse input parameters and plot poims""" assert(argv[1]=='poim') if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1) # parse input parameters C = float(argv[2]) poimdegree = int(argv[3]) (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False) (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest) if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<3: if argv_rest[-1] == 'dna': sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n") sys.exit(-1) elif argv_rest[-1] == 'protein': sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n") sys.exit(-1) else: sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n") sys.exit(-1) if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1) poimfilename = argv_rest[0] seq_source = argv_rest[1] nuc_con = argv_rest[2] utils.check_params(kparam, C, len(examples[0])) # train svm and compute POIMs (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con) print "done with training " (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0])) # plot poims plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))
def svm_poim(argv): """A top level script to parse input parameters and plot poims""" assert(argv[1]=='poim') if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1) # parse input parameters C = float(argv[2]) poimdegree = int(argv[3]) (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False) (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest) if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1) if len(argv_rest)<3: if argv_rest[-1] == 'dna': sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n") sys.exit(-1) elif argv_rest[-1] == 'protein': sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n") sys.exit(-1) else: sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n") sys.exit(-1) if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1) poimfilename = argv_rest[0] seq_source = argv_rest[1] nuc_con = argv_rest[2] utils.check_params(kparam, C, len(examples[0])) # train svm and compute POIMs (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con) (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0])) # plot poims plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))