Пример #1
0
def change_brand(brandcode,brandcodes,label_type):
    dataset = Nikkei(brandcode = brandcode)
    dataset.unify_stockprices(dataset = dataset.raw_data[brandcode],brandcodes=brandcodes,label_type = label_type) 
    reguralize_data(dataset, brandcodes)
    for datatype in ['train', 'valid', 'test']:
        dataset.phase2[datatype]['y'] = dataset.phase2[datatype][brandcode]
    return dataset
Пример #2
0
def change_brand(brandcode, brandcodes, label_type):
    dataset = Nikkei(brandcode=brandcode)
    dataset.unify_stockprices(dataset=dataset.raw_data[brandcode],
                              brandcodes=brandcodes,
                              label_type=label_type)
    reguralize_data(dataset, brandcodes)
    for datatype in ['train', 'valid', 'test']:
        dataset.phase2[datatype]['y'] = dataset.phase2[datatype][brandcode]
    return dataset
Пример #3
0
def retrain_CompressModel():
    print 'STEP 1 start...'
    dataset = Nikkei(dataset_type=params['dataset_type'],
                     brandcode=params['STEP3']['brandcode'])
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    if params['STEP1']['model'] == 'rbm':
        model = load_model(model_type='rbm',
                           input=x,
                           params_dir=model_dirs['STEP1'])
        train_rbm(input=x,
                  model=model,
                  dataset=dataset,
                  learning_rate=params['STEP1']['learning_rate'],
                  batch_size=params['STEP1']['batch_size'],
                  outdir=model_dirs['STEP1'])
    # elif params['STEP1']['model'] == 'sda':
    #     presae_dir = '%s/%s/h%d_lr%s_b%s_c%s.%s' % (default_model_dir, 'STEP1', params['STEP1']['n_hidden'], str(params['STEP1']['learning_rate']), str(params['STEP1']['reg_weight']), str(params['STEP1']['corruption_level']), 'sae')
    #     x2 = T.matrix('x')
    #     pre_model = load_model(model_type='sae', input=x, params_dir=presae_dir)
    #     model = SparseAutoencoder(input=x2, n_visible=params['STEP1']['n_hidden'], n_hidden=params['STEP1']['n_hidden'], reg_weight=params['STEP1']['reg_weight'], corruption_level=params['STEP1']['corruption_level'])
    #     train_sae2(input=x, model=model, pre_model=pre_model, dataset=dataset, learning_rate=params['STEP1']['learning_rate'], outdir=model_dirs['STEP1'])
    else:
        model = load_model(model_type='sae',
                           input=x,
                           params_dir=model_dirs['STEP1'])
        train_sae(input=x,
                  model=model,
                  dataset=dataset,
                  learning_rate=params['STEP1']['learning_rate'],
                  batch_size=params['STEP1']['batch_size'],
                  outdir=model_dirs['STEP1'])
Пример #4
0
def build_CompressModel():
    print 'STEP 1 start...'
    dataset = Nikkei(dataset_type=params['experiment_type'],
                     brandcode=params['STEP3']['brandcode'])
    # pdb.set_trace()
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    if params['STEP1']['model'] == 'rbm':
        model = RBM(input=x,
                    n_visible=dataset.phase1_input_size,
                    n_hidden=params['STEP1']['n_hidden'],
                    reg_weight=params['STEP1']['beta'])
        train_rbm(input=x,
                  model=model,
                  dataset=dataset,
                  learning_rate=params['STEP1']['learning_rate'],
                  outdir=model_dirs['STEP1'])
    else:
        model = SparseAutoencoder(input=x,
                                  n_visible=dataset.phase1_input_size,
                                  n_hidden=params['STEP1']['n_hidden'],
                                  beta=params['STEP1']['beta'])
        train_sae(input=x,
                  model=model,
                  dataset=dataset,
                  learning_rate=params['STEP1']['learning_rate'],
                  outdir=model_dirs['STEP1'])
Пример #5
0
def unify_kijis(dataset):
    print 'STEP 2 start...'
    if dataset == None:
        print 'dataset load...'
        dataset = Nikkei(dataset_type=params['experiment_type'], brandcode=params['STEP3']['brandcode'])
    # model = load_model(model_dirs['STEP1'])
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    ###########################################################
    model = load_model(input=x, params_dir=model_dirs['STEP1'], model_type=params['STEP1']['model'])
    # model = cPickle.load(open(model_dirs['STEP1']))
    ###########################################################
    dataset.unify_kijis(model, params['STEP1']['model'], params['experiment_type'])
    out = open(model_dirs['STEP2'], 'w')
    out.write(cPickle.dumps(dataset))
    return dataset
Пример #6
0
def unify_kijis(dataset):
    print 'STEP 2 start...'
    if dataset == None:
        print 'dataset load...'
        dataset = Nikkei(dataset_type=params['experiment_type'], brandcode=params['STEP3']['brandcode'])
    # model = load_model(model_dirs['STEP1'])
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    ###########################################################
    model = load_model(input=x, params_dir=model_dirs['STEP1'], model_type=params['STEP1']['model'])
    # model = cPickle.load(open(model_dirs['STEP1']))
    ###########################################################
    dataset.unify_kijis(model, params['STEP1']['model'], params['experiment_type'])
    out = open(model_dirs['STEP2'], 'w')
    out.write(cPickle.dumps(dataset))
    return dataset
Пример #7
0
def unify_kijis(dataset):
    print "STEP 2 start..."
    if dataset == None:
        print "dataset load..."
        dataset = Nikkei(dataset_type=params["experiment_type"], brandcode=params["STEP3"]["brandcode"])
    # model = load_model(model_dirs['STEP1'])
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images

    ###########################################################
    model = load_model(input=x, params_dir=model_dirs["STEP1"], model_type=params["STEP1"]["model"])
    # model = cPickle.load(open(model_dirs['STEP1']))
    ###########################################################
    dataset.unify_kijis(model, params["STEP1"]["model"], params["experiment_type"])
    out = open(model_dirs["STEP2"], "w")
    out.write(cPickle.dumps(dataset))
    return dataset
Пример #8
0
def retrain_CompressModel():
    print 'STEP 1 start...'
    dataset = Nikkei(dataset_type=params['experiment_type'], brandcode=params['STEP3']['brandcode'])
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    if params['STEP1']['model'] == 'rbm':
        model = load_model(model_type='rbm', input=x, params_dir=model_dirs['STEP1'])
        train_rbm(input=x, model=model, dataset=dataset, learning_rate=params['STEP1']['learning_rate'], outdir=model_dirs['STEP1'])
    else:
        model = load_model(model_type='sae', input=x, params_dir=model_dirs['STEP1'])
        train_sae(input=x, model=model, dataset=dataset, learning_rate=params['STEP1']['learning_rate'], outdir=model_dirs['STEP1'])
Пример #9
0
######################################################


def unify_stockprices(dataset):
    print 'STEP 3 start...'
    dataset.unify_stockprices(dataset.raw_data[params['STEP3']['brandcode']])


##########################################
###  STEP 4: 指定された銘柄の株価を予測する  ###
##########################################


def predict(dataset):
    print 'STEP 4 start...'
    train_DBN(dataset=dataset,
              hidden_layers_sizes=params['STEP4']['hidden_layers_sizes'],
              pretrain_lr=params['STEP4']['pretrain']['learning_rate'],
              pretrain_batch_size=params['STEP4']['pretrain']['batch_size'],
              pretrain_epochs=params['STEP4']['pretrain']['epochs'],
              finetune_lr=params['STEP4']['finetune']['learning_rate'],
              finetune_batch_size=params['STEP4']['finetune']['batch_size'],
              finetune_epochs=params['STEP4']['finetune']['epochs'])


if __name__ == '__main__':
    dataset = Nikkei(dataset_type=params['dataset_type'],
                     brandcode=params['STEP3']['brandcode'])
    unify_stockprices(dataset)
    predict(dataset)
Пример #10
0
def predict(dataset, model, brandcodes=['0101'], label_type=1, y_type=1,model_type=2):
    print 'STEP 3 start...'
    if dataset == None:
        if params['experiment_type'] == 'baseline':
            print 'start to load baseline dataset...'
            dataset = cPickle.load(open(default_model_dir + '/STEP2/baseline_original'))
        elif params['experiment_type'] == 'chi2_selected':
            print 'start to load chi2_selected...'
            dataset = Nikkei() 
        else:
            print 'start to load proposed dataset...'
            dataset = cPickle.load(open(model_dirs['STEP2']))
    print 'start to unify stockprice...'
    # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type)
    if params['experiment_type'] != 'chi2_selected': 
        dataset.unify_stockprices(dataset=dataset.baseline_original, brandcodes=brandcodes,
                                dataset_type=params['experiment_type'],label_type=label_type)
    else:
        dataset.unify_stockprices(dataset = dataset.raw_data[brandcodes[0]],brandcodes=brandcodes,label_type = label_type)
    reguralize_data(dataset, brandcodes)
    change_brand(dataset, brandcodes[0])
    
    if model_type == 0:
        def transformY(data_y):
            y = []
            if label_type < 3:
                for data in data_y:
                    y.append(data[0])
                return numpy.array(y)
            else :
                for data in data_y:
                    y.append(data)
                return numpy.array(y)
        train_x = dataset.phase2['train']['x']
        train_x = numpy.append(train_x, dataset.phase2['valid']['x'], 0)
        test_x = dataset.phase2['test']['x']
        while(1):

            if params['experiment_type'] == 'baseline':
                train_x_original = train_x
                test_x_original = test_x
                pca = PCA(n_components=1000)
                pca.fit(train_x_original)
                train_x = pca.transform(train_x_original)
                test_x = pca.transform(test_x_original)

            train_y = transformY(dataset.phase2['train']['y'])
            train_y = numpy.append(train_y, transformY(dataset.phase2['valid']['y']), 0)
            test_y = transformY(dataset.phase2['test']['y'])


            if label_type < 3:
                tuned_parameters = [{'kernel': ['rbf'], 'gamma': [10**i for i in range(-4,0)], 'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10)
            else:
                print 'classification'
                tuned_parameters = [{'kernel': ['rbf', 'linear'], 'gamma': [10**i for i in range(-4,0)],'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10)
                gscv.fit(train_x, train_y)
                best_model = gscv.best_estimator_
            predict_y = best_model.predict(test_x)
            result_train = (best_model.predict(train_x) == train_y).sum()
            result_test = (best_model.predict(test_x) == test_y).sum()
            print 'training accuracy : %.2f , %d / %d' % (float(result_train) / len(train_y), result_train, len(train_y))
            print 'testing accuracy : %.2f , %d / %d' % (float(result_test) / len(test_y), result_test, len(test_y))     
            pdb.set_trace()    
    
    
    pretrain_params = {
        'dataset' : dataset, 
        'hidden_layers_sizes' : params['STEP4']['hidden_layers_sizes'],
        'pretrain_lr' : params['STEP4']['pretrain']['learning_rate'],
        'pretrain_batch_size' : params['STEP4']['pretrain']['batch_size'],
        'pretrain_epochs' : params['STEP4']['pretrain']['epochs'],
        'corruption_levels' : params['STEP4']['corruption_levels'],
        'k' : params['STEP4']['k'],
        'hidden_recurrent': params['STEP4']['hidden_recurrent'],
        'n_outs' : (1 + y_type)
    }
    pretrain_model = model.pretrain(pretrain_params, y_type)
    pretrain_params = get_model_params(pretrain_model)
    while(1):
        finetune_params = {
            'dataset' : dataset,
            'model' : pretrain_model,
            'finetune_lr' : params['STEP4']['finetune']['learning_rate'],
            'finetune_batch_size' : params['STEP4']['finetune']['batch_size'],
            'finetune_epochs' : params['STEP4']['finetune']['epochs']
        }
        finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type)
        pdb.set_trace()
        set_model_params(pretrain_model, pretrain_params)
Пример #11
0

if __name__ == '__main__':
    ###   銘柄数種について実験
    all_size = len(params['STEP3']['brandcode']) * len(
        params['STEP4']['hidden_layers_sizes']
    ) * len(params['STEP4']['pretrain']['batch_size']) * len(
        params['STEP4']['pretrain']['learning_rate']) * len(
            params['STEP4']['pretrain']['epochs']) * len(
                params['STEP4']['finetune']['batch_size']) * len(
                    params['STEP4']['finetune']['learning_rate']) * len(
                        params['STEP4']['finetune']['epochs'])
    i = 0
    for brandcode in params['STEP3']['brandcode']:
        model_dirs = reload_model_dirs(brandcode)
        dataset = Nikkei(dataset_type=params['dataset_type'],
                         brandcode=brandcode)
        dataset.unify_stockprices(dataset.raw_data[brandcode])
        for hidden_layers_sizes in params['STEP4']['hidden_layers_sizes']:
            for hidden_recurrent in params['STEP4']['hidden_recurrent']:
                for batch_size_pretrain in params['STEP4']['pretrain'][
                        'batch_size']:
                    for learning_rate_pretrain in params['STEP4']['pretrain'][
                            'learning_rate']:
                        for epochs_pretrain in params['STEP4']['pretrain'][
                                'epochs']:
                            for batch_size_finetune in params['STEP4'][
                                    'finetune']['batch_size']:
                                for learning_rate_finetune in params['STEP4'][
                                        'finetune']['learning_rate']:
                                    for epochs_finetune in params['STEP4'][
                                            'finetune']['epochs']:
Пример #12
0
def predict(dataset, model, brandcodes=["0101"], label_type=1, y_type=1, model_type=2):
    print "STEP 3 start..."
    if dataset == None:
        if params["experiment_type"] == "baseline":
            print "start to load baseline dataset..."
            dataset = cPickle.load(open(default_model_dir + "/STEP2/baseline_original"))
        elif params["experiment_type"] == "chi2_selected":
            print "start to load chi2_selected..."
            dataset = Nikkei()
        else:
            print "start to load proposed dataset..."
            dataset = cPickle.load(open(model_dirs["STEP2"]))
    print "start to unify stockprice..."
    # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type)
    if params["experiment_type"] != "chi2_selected":
        dataset.unify_stockprices(
            dataset=dataset.baseline_original,
            brandcodes=brandcodes,
            dataset_type=params["experiment_type"],
            label_type=label_type,
        )
    else:
        dataset.unify_stockprices(dataset=dataset.raw_data[brandcodes[0]], brandcodes=brandcodes, label_type=label_type)
    reguralize_data(dataset, brandcodes)
    change_brand(dataset, brandcodes[0])

    if model_type == 0:

        def transformY(data_y):
            y = []
            if label_type < 3:
                for data in data_y:
                    y.append(data[0])
                return numpy.array(y)
            else:
                for data in data_y:
                    y.append(data)
                return numpy.array(y)

        train_x = dataset.phase2["train"]["x"]
        train_x = numpy.append(train_x, dataset.phase2["valid"]["x"], 0)
        test_x = dataset.phase2["test"]["x"]
        while 1:

            if params["experiment_type"] == "baseline":
                train_x_original = train_x
                test_x_original = test_x
                pca = PCA(n_components=1000)
                pca.fit(train_x_original)
                train_x = pca.transform(train_x_original)
                test_x = pca.transform(test_x_original)

            train_y = transformY(dataset.phase2["train"]["y"])
            train_y = numpy.append(train_y, transformY(dataset.phase2["valid"]["y"]), 0)
            test_y = transformY(dataset.phase2["test"]["y"])

            if label_type < 3:
                tuned_parameters = [
                    {"kernel": ["rbf"], "gamma": [10 ** i for i in range(-4, 0)], "C": [10 ** i for i in range(0, 4)]}
                ]
                gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10)
            else:
                print "classification"
                tuned_parameters = [
                    {
                        "kernel": ["rbf", "linear"],
                        "gamma": [10 ** i for i in range(-4, 0)],
                        "C": [10 ** i for i in range(0, 4)],
                    }
                ]
                gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10)
                gscv.fit(train_x, train_y)
                best_model = gscv.best_estimator_
            predict_y = best_model.predict(test_x)
            result_train = (best_model.predict(train_x) == train_y).sum()
            result_test = (best_model.predict(test_x) == test_y).sum()
            print "training accuracy : %.2f , %d / %d" % (
                float(result_train) / len(train_y),
                result_train,
                len(train_y),
            )
            print "testing accuracy : %.2f , %d / %d" % (float(result_test) / len(test_y), result_test, len(test_y))
            pdb.set_trace()

    pretrain_params = {
        "dataset": dataset,
        "hidden_layers_sizes": params["STEP4"]["hidden_layers_sizes"],
        "pretrain_lr": params["STEP4"]["pretrain"]["learning_rate"],
        "pretrain_batch_size": params["STEP4"]["pretrain"]["batch_size"],
        "pretrain_epochs": params["STEP4"]["pretrain"]["epochs"],
        "corruption_levels": params["STEP4"]["corruption_levels"],
        "k": params["STEP4"]["k"],
        "hidden_recurrent": params["STEP4"]["hidden_recurrent"],
        "n_outs": (1 + y_type),
    }
    pretrain_model = model.pretrain(pretrain_params, y_type)
    pretrain_params = get_model_params(pretrain_model)
    while 1:
        finetune_params = {
            "dataset": dataset,
            "model": pretrain_model,
            "finetune_lr": params["STEP4"]["finetune"]["learning_rate"],
            "finetune_batch_size": params["STEP4"]["finetune"]["batch_size"],
            "finetune_epochs": params["STEP4"]["finetune"]["epochs"],
        }
        finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type)
        pdb.set_trace()
        set_model_params(pretrain_model, pretrain_params)
Пример #13
0
def predict(dataset, model, brandcodes=['0101'], label_type=1, y_type=1, m=1):
    print 'STEP 3 start...'
    if dataset == None:
        if params['experiment_type'] == 'baseline':
            print 'start to load baseline dataset...'
            dataset = cPickle.load(open(default_model_dir + '/STEP2/baseline_original'))
        elif params['experiment_type'] == 'chi2_selected':
            print 'start to load chi2_selected dataset...'
            dataset = Nikkei()
        else:
            print 'start to load proposed dataset...'
            dataset = cPickle.load(open(model_dirs['STEP2']))
    print 'start to unify stockprice...'
    # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type)
    if params['experiment_type'] == 'chi2_selected':
        dataset.unify_stockprices(dataset = dataset.raw_data[brandcodes[0]],brandcodes=brandcodes,label_type = label_type) 
    else:
        dataset.unify_stockprices(dataset=dataset.baseline_original, brandcodes=brandcodes,
                                dataset_type=params['experiment_type'],label_type=label_type)
    reguralize_data(dataset, brandcodes)
    change_brand(dataset, brandcodes[0])
    #change_brand(dataset, '0101')
    if m==0:
        def transformY(data_y):
            y = []
            if label_type < 3:
                for data in data_y:
                    y.append(data[0])
                return numpy.array(y)
            else :
                for data in data_y:
                    y.append(data)
                return numpy.array(y)
        train_x = dataset.phase2['train']['x']
        train_x = numpy.append(train_x, dataset.phase2['valid']['x'], 0)
        test_x = dataset.phase2['test']['x']
        while(1):

            if params['experiment_type'] == 'baseline':
                train_x_original = train_x
                test_x_original = test_x
                pca = PCA(n_components=1000)
                pca.fit(train_x_original)
                train_x = pca.transform(train_x_original)
                test_x = pca.transform(test_x_original)

            train_y = transformY(dataset.phase2['train']['y'])
            train_y = numpy.append(train_y, transformY(dataset.phase2['valid']['y']), 0)
            test_y = transformY(dataset.phase2['test']['y'])


            if label_type < 3:
                tuned_parameters = [{'kernel': ['rbf'], 'gamma': [10**i for i in range(-4,0)], 'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10)
            else:
                print 'classification'
                tuned_parameters = [{'kernel': ['rbf', 'linear'], 'gamma': [10**i for i in range(-4,0)],'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10)
                gscv.fit(train_x, train_y)
                best_model = gscv.best_estimator_
            predict_y = best_model.predict(test_x)
            result_train = (best_model.predict(train_x) == train_y).sum()
            result_test = (best_model.predict(test_x) == test_y).sum()
            print 'training accuracy : %.2f , %d / %d' % (float(result_train) / len(train_y), result_train, len(train_y))
            print 'testing accuracy : %.2f , %d / %d' % (float(result_test) / len(test_y), result_test, len(test_y))
            pdb.set_trace()
    ##################
    #ファイル名の決定#
    ##################
    if params['experiment_type'] == 'chi2_selected':
        yoshihara_dir = 'chi2'
    else:
        yoshihara_dir = 'experiment'
    if y_type ==0:
        type = "regression"
    else:
        type = "classification"
    if m == 1:
        model_type = "sda"
    elif m==2:
        model_type = "dbn_only"
    elif m==4:
        model_type = "rnnrbm_mlp"
    elif m==5:
        model_type = "rnnrbm_dbn"
    elif m==6:
        model_type = "dbn_rnnrbm"
    #model_dirs['STEP4_logs'] = '%s/%s/%s_%d_%s.csv' % (default_model_dir, 'result',type, label_type, model_type)
    model_dirs['STEP4_logs'] = '%s/%s/%s_%d_%s.csv' % (default_model_dir,yoshihara_dir,type, label_type, model_type)
    all_size = len(params['STEP4']['hidden_recurrent']) * len(params['STEP4']['hidden_layers_sizes']) * len(params['STEP4']['pretrain']['batch_size']) * len(params['STEP4']['pretrain']['learning_rate']) * len(params['STEP4']['pretrain']['epochs']) * len(params['STEP4']['finetune']['batch_size']) * len(params['STEP4']['finetune']['learning_rate']) * len(params['STEP4']['finetune']['epochs'])
    i = 0
      
    for hidden_layers_sizes in params['STEP4']['hidden_layers_sizes']:
        for batch_size_pretrain in params['STEP4']['pretrain']['batch_size']:
            for learning_rate_pretrain in params['STEP4']['pretrain']['learning_rate']:
                for epochs_pretrain in params['STEP4']['pretrain']['epochs']: 
                    for hidden_recurrent in params['STEP4']['hidden_recurrent']: 
                        pretrain_params = ""
                        pretrain_params = {
                        'dataset' : dataset, 
                        'hidden_layers_sizes' : hidden_layers_sizes,
                        'pretrain_lr' : learning_rate_pretrain,
                        'pretrain_batch_size' : batch_size_pretrain,
                        'pretrain_epochs' : epochs_pretrain,
                        'corruption_levels' : params['STEP4']['corruption_levels'],
                        'k' : params['STEP4']['k'],
                        'hidden_recurrent': hidden_recurrent,
                        'n_outs' : (1 + y_type)
                        }
                        pretrain_model = ""   
                        pretrain_model = model.pretrain(pretrain_params, y_type)
                        pretrain_params = ""
                        pretrain_params = get_model_params(pretrain_model)
                        for brandcode in brandcodes:
                            change_brand(dataset,brandcode)
                            for batch_size_finetune in params['STEP4']['finetune']['batch_size']:
                                for learning_rate_finetune in params['STEP4']['finetune']['learning_rate']:
                                    for epochs_finetune in params['STEP4']['finetune']['epochs']: 
                                        set_model_params(pretrain_model,pretrain_params)
                                        finetune_params = ""
                                        finetune_params = {
                                        'dataset' : dataset,
                                        'model' : pretrain_model,
                                        'finetune_lr' : learning_rate_finetune,
                                        'finetune_batch_size' : batch_size_finetune,
                                        'finetune_epochs' : epochs_finetune
                                        }
                                        finetune_model = ""
                                        best_validation_loss = ""
                                        test_score = ""
                                        best_epoch = ""
                                        finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type)
                                        i += 1
                                        print '%d / %d is done...' % (i , all_size)
                                        out = open(model_dirs['STEP4_logs'], 'a')
                                        out.write('%f,%f,%s,%s,%d,%f,%d,%d,%d,%f,%d,%d,%s\n' % (best_validation_loss, test_score, brandcode, str(hidden_layers_sizes).replace(',', ' '), batch_size_pretrain, learning_rate_pretrain, hidden_recurrent, epochs_pretrain, batch_size_finetune, learning_rate_finetune, epochs_finetune, best_epoch, str(datetime.datetime.now())))
                                        out.close()
                        gc.collect()
Пример #14
0
def build_CompressModel():
    print 'STEP 1 start...'
    dataset = Nikkei(dataset_type=params['dataset_type'],
                     brandcode=params['STEP3']['brandcode'])
    # pdb.set_trace()
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    if params['STEP1']['model'] == 'rbm':
        model = RBM(input=x,
                    n_visible=dataset.phase1_input_size,
                    n_hidden=params['STEP1']['n_hidden'],
                    reg_weight=params['STEP1']['reg_weight'],
                    corruption_level=params['STEP1']['corruption_level'])
        train_rbm(input=x,
                  model=model,
                  dataset=dataset,
                  learning_rate=params['STEP1']['learning_rate'],
                  outdir=model_dirs['STEP1'],
                  batch_size=params['STEP1']['batch_size'])
    elif params['STEP1']['model'] == 'sda':
        sda_params = {
            'dataset':
            dataset,
            'hidden_layers_sizes':
            [params['STEP1']['n_hidden'], params['STEP1']['n_hidden'] / 2],
            'pretrain_lr':
            params['STEP1']['learning_rate'],
            'pretrain_batch_size':
            params['STEP1']['batch_size'],
            'pretrain_epochs':
            5,
            'corruption_levels': [0.5, 0.5],
            'k':
            None,
            'y_type':
            0,
            'sparse_weight':
            params['STEP1']['reg_weight']
        }
        model = SdA.compress(sda_params)
        pre_params = get_model_params(model)

        while (True):
            try:
                f_out = open(model_dirs['STEP1'], 'w')
                f_out.write(cPickle.dumps(model, 1))
                f_out.close()
                break
            except:
                pdb.set_trace()
    else:
        model = SparseAutoencoder(
            input=x,
            n_visible=dataset.phase1_input_size,
            n_hidden=params['STEP1']['n_hidden'],
            reg_weight=params['STEP1']['reg_weight'],
            corruption_level=params['STEP1']['corruption_level'])
        train_sae(input=x,
                  model=model,
                  dataset=dataset,
                  learning_rate=params['STEP1']['learning_rate'],
                  outdir=model_dirs['STEP1'],
                  batch_size=params['STEP1']['batch_size'])
Пример #15
0
                break
            except:
                print 'File could not be written...'
                pdb.set_trace()

    # outdir = '/home/fujikawa/StockPredict/src/deeplearning/experiment/Model/sae.pkl'
    while (True):
        try:
            f_out = open(outdir, 'w')
            f_out.write(cPickle.dumps(params, 1))
            f_out.close()
            break
        except:
            pdb.set_trace()

    # pdb.set_trace()
    end_time = time.clock()

    training_time = (end_time - start_time)


if __name__ == '__main__':

    dataset = Nikkei()
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    model = SparseAutoencoder(input=x,
                              n_visible=dataset.phase1_input_size,
                              n_hidden=100)
    train_sae(model=model, dataset=dataset, learning_rate=0.01)