Пример #1
0
def smoke_build_model(build_model):
    set_log_file("testlog.log")
    prepared_data = prepare_fake_data()

    f_train, f_validate, train_idx, valid_idx, train_eval, valid_eval = (
        build_model(prepared_data, batch_size=1))

    best_validation_loss, best_epoch = (
        train_model(f_train, f_validate, train_idx, valid_idx, train_eval, valid_eval,
                    n_epochs=100))

    assert best_validation_loss > 0.5
    assert best_epoch > 0
Пример #2
0
        help='the name of the parameter set that we want to use')
    parser.add_argument('-f',
                        dest='file',
                        type=str,
                        default=None,
                        help='the data file to use')
    parser.add_argument('-o',
                        dest='outname',
                        type=str,
                        default=gen_log_name(),
                        help='name for the log file to be generated')
    parser.add_argument(
        '-t',
        dest='task_num',
        type=int,
        default=0,
        help='a way to separate different runs of the same parameter-set')
    args = parser.parse_args()

    params = config.get_config(args.param_set)
    set_log_file(args.outname)
    if args.file:
        params['dataset_name'] = args.file
    elif 'dataset_name' not in params:
        params['dataset_name'] = default_dataset
    run(0, **params)
    print "finished"
    if sys.platform.startswith('win'):
        from win_utils import winalert
        winalert()
Пример #3
0
    dataset = smart_load_data(**kwargs)
    train_idx, valid_idx = cv_split_binarized(dataset, percent=0.2, fold_index=task_num)
    prepared_data = (dataset, train_idx, valid_idx)

    # load classifiers to build ensemble out of
    saved_classifiers = filter(lambda(fn): os.path.splitext(fn)[1] == '.model', os.listdir('.'))

    # build and train ensemble
    model = SelectedModel(prepared_data, saved_classifiers=saved_classifiers, **kwargs)
    _, params = model.train_full(**kwargs)

if __name__ == '__main__':
    args = docopt(__doc__)

    params = {}
    log_filename = args['--out'] or gen_log_name()
    if args['--quiet']:
        log_filename = os.devnull
        print("Not printing to log file.")
    set_log_file(log_filename)

    if args['--file']:
        params['dataset_name'] = args['--file']

    task_num = int(args['--task_number'])

    params['conds'] = COND_TYPES[task_num % len(COND_TYPES)]
    run(task_num=task_num, **params)
    
    print("Finished")
Пример #4
0
    with open(fname, 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        writer.writerow(headers)
        for i in idxs:
            student = data['subject'][i]
            outcome = 'correct' if data['correct'][i] == 2 else 'incorrect'
            KCs = 'word' if single_skill else data.orig['skill'][i]
            word_feats = skills[data['skill'][i]]
            row = [student, outcome, KCs] + list(word_feats)
            writer.writerow(row)


def to_fast(data, train_fname=None, valid_fname=None, fold=0, vector_length=150, single_skill=False):
    train_fname = train_fname or 'FAST+deepkt_train{}.txt'.format(fold)
    valid_fname = valid_fname or 'FAST+deepkt_test{}.txt'.format(fold)
    valid_idx, train_idx = cv_split(data, fold_index=fold, no_new_skill=True)
    skills = gen_word_matrix(data.get_data('skill'), data['skill'].enum_pairs, vector_length=vector_length)

    # create fast header
    feat_headers = ['*features_{}'.format(i) for i in xrange(vector_length)]
    headers = ['student', 'outcome', 'KCs'] + feat_headers

    _to_fast_with_idxs(train_fname, headers, skills, data, train_idx, single_skill=single_skill)
    _to_fast_with_idxs(valid_fname, headers, skills, data, valid_idx, single_skill=single_skill)

set_log_file('temp.log')
#for fold in xrange(14):
#    to_fast(fold=fold, single_skill=False)
data = prepare_data(dataset_name='data/data5.gz', top_n=14)
to_fast(data, fold=0, single_skill=False)
Пример #5
0
    prepared_data = chinese.prepare_data(top_n=40, **kwargs)
    train_idx, valid_idx = cv_split(prepared_data, percent=.1, fold_index=task_num, **kwargs)

    model = SelectedModel((prepared_data, train_idx, valid_idx), **kwargs)
    model.train_full()


if __name__ == '__main__':
    default_dataset = 'raw_data/chinese_dictation.txt'

    parser = argparse.ArgumentParser(description="run an experiment on this computer")
    parser.add_argument('-p', dest='param_set', type=str, default='default',
                        choices=config.all_param_set_keys,
                        help='the name of the parameter set that we want to use')
    parser.add_argument('-f', dest='file', type=str, default=None,
                        help='the data file to use')
    parser.add_argument('-o', dest='outname', type=str, default=gen_log_name(),
                        help='name for the log file to be generated')
    parser.add_argument('-t', dest='task_num', type=int, default=0,
                        help='a way to separate different runs of the same parameter-set')
    args = parser.parse_args()

    params = config.get_config(args.param_set)
    set_log_file(args.outname)
    if args.file:
        params['dataset_name'] = args.file
    elif 'dataset_name' not in params:
        params['dataset_name'] = default_dataset
    run(0, **params)
    print "finished"
Пример #6
0
    with open(fname, 'w') as f:
        writer = csv.writer(f, delimiter='\t')
        writer.writerow(headers)
        for i in idxs:
            student = data['subject'][i]
            outcome = 'correct' if data['correct'][i] == 2 else 'incorrect'
            KCs = 'word' if single_skill else data.orig['skill'][i]
            word_feats = skills[data['skill'][i]]
            row = [student, outcome, KCs] + list(word_feats)
            writer.writerow(row)


def to_fast(data, train_fname=None, valid_fname=None, fold=0, vector_length=150, single_skill=False):
    train_fname = train_fname or 'FAST+deepkt_train{}.txt'.format(fold)
    valid_fname = valid_fname or 'FAST+deepkt_test{}.txt'.format(fold)
    valid_idx, train_idx = cv_split(data, cv_fold=fold, no_new_skill=True)
    skills = gen_word_matrix(data.get_data('skill'), data['skill'].enum_pairs, vector_length=vector_length)

    # create fast header
    feat_headers = ['*features_{}'.format(i) for i in xrange(vector_length)]
    headers = ['student', 'outcome', 'KCs'] + feat_headers

    _to_fast_with_idxs(train_fname, headers, skills, data, train_idx, single_skill=single_skill)
    _to_fast_with_idxs(valid_fname, headers, skills, data, valid_idx, single_skill=single_skill)

set_log_file('temp.log')
#for fold in xrange(14):
#    to_fast(fold=fold, single_skill=False)
data = prepare_data(dataset_name='data/data5.gz', top_n=14)
to_fast(data, fold=0, single_skill=False)
Пример #7
0
 def __enter__(self):
     set_log_file(self.log_name)
Пример #8
0
 def __enter__(self):
     set_log_file(self.log_name)
Пример #9
0
    with open('{}.model'.format(output_name), "wb") as f:
        pickle.dump(model, f)


if __name__ == '__main__':
    args = docopt(__doc__)

    # load args
    params = config.get_config(args['--param_set'])
    err_filename = args['--err'] or gen_log_name()
    out_filename = args['--out'] or "{}.model".format(err_filename)
    if args['--quiet']:
        log_filename = os.devnull
        out_filename = os.devnull
        print("Suppressing logging and output.")
    set_log_file(err_filename)

    task_num = int(args['--task_number'])

    params['model'] = args['--model']
    params['features'] = args['--feature']
    params['conds'] = args['--cond']
    params['dataset_name'] = args['--in']
    params['data_mode'] = args['--data_mode']
    params['output_name'] = out_filename
    params['task_num'] = int(args['--task_number'])

    if args['run']:
        run(**params)

    # TODO: figure out a good interface
Пример #10
0
    else:
        raise Exception("model type is not valid")
    dataset = prepare_data(**kwargs)
    train_idx, valid_idx = cv_split(dataset, percent=0.1, fold_index=task_num)
    prepared_data = (dataset, train_idx, valid_idx)

    model = SelectedModel(prepared_data, **kwargs)
    model.train_full(**kwargs)


if __name__ == '__main__':
    default_dataset = "raw_data/all_siegle.txt"

    args = docopt(__doc__)

    params = config.get_config(args['--param_set'])
    log_filename = args['--out'] or gen_log_name()
    if args['--quiet']:
        log_filename = os.devnull
        print("Not printing to log file.")
    set_log_file(log_filename)

    if args['--file']:
        params['dataset_name'] = args['--file']
    elif 'dataset_name' not in params:
        params['dataset_name'] = default_dataset

    params['conds'] = ['EyesClosed', 'EyesOpen']
    run(0, **params)
    print("Finished")