def parser(def_model='l2svm', def_loss='f1score', def_verbose=True): usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("-m", "--model", dest="model", default='l2 square', type='string', #choices=known_models, help="model selection from {%s}"%', '.join(s for s in known_models)) parser.add_option("-i", "--indir", dest="indir", help="input directory", metavar="INDIR") parser.add_option("-o", "--outdir", dest="outdir", help="output directory", metavar="OUTDIR") parser.add_option("-l", "--lscore", dest="scorer", default='hamming', type='choice', choices=['hamming', 'rankloss'], help="score type, hamming or rankloss (def=rankloss)") #parser.add_option("-s", "--shuffle", dest="shuffle", default=False, # action="store_true", help="shuffle labels (for random baseline)") parser.add_option("-t", "--toy", default="0", type="choice", choices=["0", "1"], dest="istoy", help='generate toy data instead of real data (-t 1) or no (-t 0)' ) parser.add_option("-q", "--quiet", default=False, action="store_true", dest="quiet") (options, args) = parser.parse_args() # extract options model_all = options.model # model type #shuffle = options.shuffle # shuffle yes/no verbose = not options.quiet scorer = options.scorer istoy = int(options.istoy) # yes/no running smaller test data if istoy: cv_dir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "toy", "results", "doublecv")) else: cv_dir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "results", "doublecv")) resultdir = os.path.join(cv_dir, "processed") #if shuffle: # cv_dir = os.path.join(cv_dir, "shuffle") if options.indir is not None: cv_dir = options.indir if options.outdir is not None: resultdir = options.outdir # make result directory try: os.makedirs(resultdir) except: pass # extract model name model = clean_name(model_all) saveinfo = {'resultdir':resultdir} runinfo = {'model':model, "cv_dir":cv_dir, "scorer":scorer}#, "shuffle":shuffle} return saveinfo, runinfo, verbose
def main(): usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("-m", "--model", dest="model", default='l2 svm', type='string', #choices=known_models, help="model selection from {%s}"%', '.join(s for s in known_models)) parser.add_option("-i", "--indir", dest="indir", help="input directory", metavar="INDIR") parser.add_option("-o", "--outdir", dest="outdir", help="output directory", metavar="OUTDIR") parser.add_option("-c", "--cv", dest="CV", type="int", default=0, help="which CV set (typically 0...4)") parser.add_option("-l", "--lscore", dest="scorer", default='hamming', type='choice', choices=['hamming', 'rankloss'], help="score type, hamming or rankloss (def=rankloss)") parser.add_option("-p", dest="param", type=float, default=10, help="number of model hyperparameter to test (def=10)") parser.add_option("-n", "--n_jobs", dest="n_jobs", type=int, default=6, help="number of processors used in parallel fit (def=6)") parser.add_option("-s", "--shuffle", dest="shuffle", default=False, action="store_true", help="shuffle labels (for random baseline)") parser.add_option("-t", "--toy", default="1", type="choice", choices=["0", "1"], dest="istoy", help='generate toy data instead of real data (-t 1) or no (-t 0)' ) parser.add_option("-q", "--quiet", default=False, action="store_true", dest="quiet") (options, args) = parser.parse_args() # extract options model_all = options.model # model type cv = options.CV # which cv set p = options.param # number of params shuffle = options.shuffle # shuffle yes/no verbose = 0 if options.quiet else 1 scorer = options.scorer istoy = int(options.istoy) # yes/no running smaller test data n_jobs = options.n_jobs # number f parallel fit jobs datadir = os.path.expanduser(os.path.join("~", "workdir", "data", "openfMRI", "preprocessed")) if istoy: resultdir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "toy", "results", "wardcv")) else: resultdir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "results", "wardcv")) if shuffle: resultdir = os.path.join(resultdir, "shuffle") if options.indir is not None: datadir = options.indir if options.outdir is not None: resultdir = options.outdir # make result directory try: os.makedirs(resultdir) except: pass # extract model name model = clean_name(model_all) # run it! ################################################### # extract_data if verbose: print "Extracting data..." T = time.time() Xt, Xe, Z, train, test, subtrain, subtest = extract_data(datadir, cv, verbose, istoy) if verbose: dims = Xt.shape+(Z.shape[1],) print "data dims: N=%d, D=%d, K=%d"%dims if verbose: print "extract time = %s"%(time.time()-T,) if shuffle: print "shuffling output labels..." if verbose: print "Training model...\nmodel=%s, cv=%d, scorer=%s"%(model, cv, scorer) T = time.time() # run model (or multimodel). saveinfo = {'resultdir':resultdir, 'cv':cv} runinfo = {'model':model, "scorer":scorer, "p":p, "shuffle":shuffle, "n_jobs":n_jobs} data = (Xt, Xe, Z, train, test, subtrain, subtest) train_multilabel(data, runinfo, saveinfo, verbose) if verbose: print "Combined Train time = %s"%(time.time()-T,)