Пример #1
0
def change_brand(brandcode,brandcodes,label_type):
    dataset = Nikkei(brandcode = brandcode)
    dataset.unify_stockprices(dataset = dataset.raw_data[brandcode],brandcodes=brandcodes,label_type = label_type) 
    reguralize_data(dataset, brandcodes)
    for datatype in ['train', 'valid', 'test']:
        dataset.phase2[datatype]['y'] = dataset.phase2[datatype][brandcode]
    return dataset
Пример #2
0
def change_brand(brandcode, brandcodes, label_type):
    dataset = Nikkei(brandcode=brandcode)
    dataset.unify_stockprices(dataset=dataset.raw_data[brandcode],
                              brandcodes=brandcodes,
                              label_type=label_type)
    reguralize_data(dataset, brandcodes)
    for datatype in ['train', 'valid', 'test']:
        dataset.phase2[datatype]['y'] = dataset.phase2[datatype][brandcode]
    return dataset
Пример #3
0
def predict(dataset, model, brandcodes=['0101'], label_type=1, y_type=1,model_type=2):
    print 'STEP 3 start...'
    if dataset == None:
        if params['experiment_type'] == 'baseline':
            print 'start to load baseline dataset...'
            dataset = cPickle.load(open(default_model_dir + '/STEP2/baseline_original'))
        elif params['experiment_type'] == 'chi2_selected':
            print 'start to load chi2_selected...'
            dataset = Nikkei() 
        else:
            print 'start to load proposed dataset...'
            dataset = cPickle.load(open(model_dirs['STEP2']))
    print 'start to unify stockprice...'
    # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type)
    if params['experiment_type'] != 'chi2_selected': 
        dataset.unify_stockprices(dataset=dataset.baseline_original, brandcodes=brandcodes,
                                dataset_type=params['experiment_type'],label_type=label_type)
    else:
        dataset.unify_stockprices(dataset = dataset.raw_data[brandcodes[0]],brandcodes=brandcodes,label_type = label_type)
    reguralize_data(dataset, brandcodes)
    change_brand(dataset, brandcodes[0])
    
    if model_type == 0:
        def transformY(data_y):
            y = []
            if label_type < 3:
                for data in data_y:
                    y.append(data[0])
                return numpy.array(y)
            else :
                for data in data_y:
                    y.append(data)
                return numpy.array(y)
        train_x = dataset.phase2['train']['x']
        train_x = numpy.append(train_x, dataset.phase2['valid']['x'], 0)
        test_x = dataset.phase2['test']['x']
        while(1):

            if params['experiment_type'] == 'baseline':
                train_x_original = train_x
                test_x_original = test_x
                pca = PCA(n_components=1000)
                pca.fit(train_x_original)
                train_x = pca.transform(train_x_original)
                test_x = pca.transform(test_x_original)

            train_y = transformY(dataset.phase2['train']['y'])
            train_y = numpy.append(train_y, transformY(dataset.phase2['valid']['y']), 0)
            test_y = transformY(dataset.phase2['test']['y'])


            if label_type < 3:
                tuned_parameters = [{'kernel': ['rbf'], 'gamma': [10**i for i in range(-4,0)], 'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10)
            else:
                print 'classification'
                tuned_parameters = [{'kernel': ['rbf', 'linear'], 'gamma': [10**i for i in range(-4,0)],'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10)
                gscv.fit(train_x, train_y)
                best_model = gscv.best_estimator_
            predict_y = best_model.predict(test_x)
            result_train = (best_model.predict(train_x) == train_y).sum()
            result_test = (best_model.predict(test_x) == test_y).sum()
            print 'training accuracy : %.2f , %d / %d' % (float(result_train) / len(train_y), result_train, len(train_y))
            print 'testing accuracy : %.2f , %d / %d' % (float(result_test) / len(test_y), result_test, len(test_y))     
            pdb.set_trace()    
    
    
    pretrain_params = {
        'dataset' : dataset, 
        'hidden_layers_sizes' : params['STEP4']['hidden_layers_sizes'],
        'pretrain_lr' : params['STEP4']['pretrain']['learning_rate'],
        'pretrain_batch_size' : params['STEP4']['pretrain']['batch_size'],
        'pretrain_epochs' : params['STEP4']['pretrain']['epochs'],
        'corruption_levels' : params['STEP4']['corruption_levels'],
        'k' : params['STEP4']['k'],
        'hidden_recurrent': params['STEP4']['hidden_recurrent'],
        'n_outs' : (1 + y_type)
    }
    pretrain_model = model.pretrain(pretrain_params, y_type)
    pretrain_params = get_model_params(pretrain_model)
    while(1):
        finetune_params = {
            'dataset' : dataset,
            'model' : pretrain_model,
            'finetune_lr' : params['STEP4']['finetune']['learning_rate'],
            'finetune_batch_size' : params['STEP4']['finetune']['batch_size'],
            'finetune_epochs' : params['STEP4']['finetune']['epochs']
        }
        finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type)
        pdb.set_trace()
        set_model_params(pretrain_model, pretrain_params)
Пример #4
0
if __name__ == '__main__':
    ###   銘柄数種について実験
    all_size = len(params['STEP3']['brandcode']) * len(
        params['STEP4']['hidden_layers_sizes']
    ) * len(params['STEP4']['pretrain']['batch_size']) * len(
        params['STEP4']['pretrain']['learning_rate']) * len(
            params['STEP4']['pretrain']['epochs']) * len(
                params['STEP4']['finetune']['batch_size']) * len(
                    params['STEP4']['finetune']['learning_rate']) * len(
                        params['STEP4']['finetune']['epochs'])
    i = 0
    for brandcode in params['STEP3']['brandcode']:
        model_dirs = reload_model_dirs(brandcode)
        dataset = Nikkei(dataset_type=params['dataset_type'],
                         brandcode=brandcode)
        dataset.unify_stockprices(dataset.raw_data[brandcode])
        for hidden_layers_sizes in params['STEP4']['hidden_layers_sizes']:
            for hidden_recurrent in params['STEP4']['hidden_recurrent']:
                for batch_size_pretrain in params['STEP4']['pretrain'][
                        'batch_size']:
                    for learning_rate_pretrain in params['STEP4']['pretrain'][
                            'learning_rate']:
                        for epochs_pretrain in params['STEP4']['pretrain'][
                                'epochs']:
                            for batch_size_finetune in params['STEP4'][
                                    'finetune']['batch_size']:
                                for learning_rate_finetune in params['STEP4'][
                                        'finetune']['learning_rate']:
                                    for epochs_finetune in params['STEP4'][
                                            'finetune']['epochs']:
                                        result = train_rnnrbm(
Пример #5
0
def predict(dataset, model, brandcodes=["0101"], label_type=1, y_type=1, model_type=2):
    print "STEP 3 start..."
    if dataset == None:
        if params["experiment_type"] == "baseline":
            print "start to load baseline dataset..."
            dataset = cPickle.load(open(default_model_dir + "/STEP2/baseline_original"))
        elif params["experiment_type"] == "chi2_selected":
            print "start to load chi2_selected..."
            dataset = Nikkei()
        else:
            print "start to load proposed dataset..."
            dataset = cPickle.load(open(model_dirs["STEP2"]))
    print "start to unify stockprice..."
    # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type)
    if params["experiment_type"] != "chi2_selected":
        dataset.unify_stockprices(
            dataset=dataset.baseline_original,
            brandcodes=brandcodes,
            dataset_type=params["experiment_type"],
            label_type=label_type,
        )
    else:
        dataset.unify_stockprices(dataset=dataset.raw_data[brandcodes[0]], brandcodes=brandcodes, label_type=label_type)
    reguralize_data(dataset, brandcodes)
    change_brand(dataset, brandcodes[0])

    if model_type == 0:

        def transformY(data_y):
            y = []
            if label_type < 3:
                for data in data_y:
                    y.append(data[0])
                return numpy.array(y)
            else:
                for data in data_y:
                    y.append(data)
                return numpy.array(y)

        train_x = dataset.phase2["train"]["x"]
        train_x = numpy.append(train_x, dataset.phase2["valid"]["x"], 0)
        test_x = dataset.phase2["test"]["x"]
        while 1:

            if params["experiment_type"] == "baseline":
                train_x_original = train_x
                test_x_original = test_x
                pca = PCA(n_components=1000)
                pca.fit(train_x_original)
                train_x = pca.transform(train_x_original)
                test_x = pca.transform(test_x_original)

            train_y = transformY(dataset.phase2["train"]["y"])
            train_y = numpy.append(train_y, transformY(dataset.phase2["valid"]["y"]), 0)
            test_y = transformY(dataset.phase2["test"]["y"])

            if label_type < 3:
                tuned_parameters = [
                    {"kernel": ["rbf"], "gamma": [10 ** i for i in range(-4, 0)], "C": [10 ** i for i in range(0, 4)]}
                ]
                gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10)
            else:
                print "classification"
                tuned_parameters = [
                    {
                        "kernel": ["rbf", "linear"],
                        "gamma": [10 ** i for i in range(-4, 0)],
                        "C": [10 ** i for i in range(0, 4)],
                    }
                ]
                gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10)
                gscv.fit(train_x, train_y)
                best_model = gscv.best_estimator_
            predict_y = best_model.predict(test_x)
            result_train = (best_model.predict(train_x) == train_y).sum()
            result_test = (best_model.predict(test_x) == test_y).sum()
            print "training accuracy : %.2f , %d / %d" % (
                float(result_train) / len(train_y),
                result_train,
                len(train_y),
            )
            print "testing accuracy : %.2f , %d / %d" % (float(result_test) / len(test_y), result_test, len(test_y))
            pdb.set_trace()

    pretrain_params = {
        "dataset": dataset,
        "hidden_layers_sizes": params["STEP4"]["hidden_layers_sizes"],
        "pretrain_lr": params["STEP4"]["pretrain"]["learning_rate"],
        "pretrain_batch_size": params["STEP4"]["pretrain"]["batch_size"],
        "pretrain_epochs": params["STEP4"]["pretrain"]["epochs"],
        "corruption_levels": params["STEP4"]["corruption_levels"],
        "k": params["STEP4"]["k"],
        "hidden_recurrent": params["STEP4"]["hidden_recurrent"],
        "n_outs": (1 + y_type),
    }
    pretrain_model = model.pretrain(pretrain_params, y_type)
    pretrain_params = get_model_params(pretrain_model)
    while 1:
        finetune_params = {
            "dataset": dataset,
            "model": pretrain_model,
            "finetune_lr": params["STEP4"]["finetune"]["learning_rate"],
            "finetune_batch_size": params["STEP4"]["finetune"]["batch_size"],
            "finetune_epochs": params["STEP4"]["finetune"]["epochs"],
        }
        finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type)
        pdb.set_trace()
        set_model_params(pretrain_model, pretrain_params)
def predict(dataset, model, brandcodes=['0101'], label_type=1, y_type=1, m=1):
    print 'STEP 3 start...'
    if dataset == None:
        if params['experiment_type'] == 'baseline':
            print 'start to load baseline dataset...'
            dataset = cPickle.load(open(default_model_dir + '/STEP2/baseline_original'))
        elif params['experiment_type'] == 'chi2_selected':
            print 'start to load chi2_selected dataset...'
            dataset = Nikkei()
        else:
            print 'start to load proposed dataset...'
            dataset = cPickle.load(open(model_dirs['STEP2']))
    print 'start to unify stockprice...'
    # dataset.unify_stockprices(dataset=dataset.unified, brandcodes=brandcodes, dataset_type=params['experiment_type'], label_type=label_type)
    if params['experiment_type'] == 'chi2_selected':
        dataset.unify_stockprices(dataset = dataset.raw_data[brandcodes[0]],brandcodes=brandcodes,label_type = label_type) 
    else:
        dataset.unify_stockprices(dataset=dataset.baseline_original, brandcodes=brandcodes,
                                dataset_type=params['experiment_type'],label_type=label_type)
    reguralize_data(dataset, brandcodes)
    change_brand(dataset, brandcodes[0])
    #change_brand(dataset, '0101')
    if m==0:
        def transformY(data_y):
            y = []
            if label_type < 3:
                for data in data_y:
                    y.append(data[0])
                return numpy.array(y)
            else :
                for data in data_y:
                    y.append(data)
                return numpy.array(y)
        train_x = dataset.phase2['train']['x']
        train_x = numpy.append(train_x, dataset.phase2['valid']['x'], 0)
        test_x = dataset.phase2['test']['x']
        while(1):

            if params['experiment_type'] == 'baseline':
                train_x_original = train_x
                test_x_original = test_x
                pca = PCA(n_components=1000)
                pca.fit(train_x_original)
                train_x = pca.transform(train_x_original)
                test_x = pca.transform(test_x_original)

            train_y = transformY(dataset.phase2['train']['y'])
            train_y = numpy.append(train_y, transformY(dataset.phase2['valid']['y']), 0)
            test_y = transformY(dataset.phase2['test']['y'])


            if label_type < 3:
                tuned_parameters = [{'kernel': ['rbf'], 'gamma': [10**i for i in range(-4,0)], 'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVR(), tuned_parameters, cv=5, scoring="mean_squared_error", n_jobs=10)
            else:
                print 'classification'
                tuned_parameters = [{'kernel': ['rbf', 'linear'], 'gamma': [10**i for i in range(-4,0)],'C': [10**i for i in range(0,4)]}]
                gscv = GridSearchCV(SVC(), tuned_parameters, cv=5, n_jobs=10)
                gscv.fit(train_x, train_y)
                best_model = gscv.best_estimator_
            predict_y = best_model.predict(test_x)
            result_train = (best_model.predict(train_x) == train_y).sum()
            result_test = (best_model.predict(test_x) == test_y).sum()
            print 'training accuracy : %.2f , %d / %d' % (float(result_train) / len(train_y), result_train, len(train_y))
            print 'testing accuracy : %.2f , %d / %d' % (float(result_test) / len(test_y), result_test, len(test_y))
            pdb.set_trace()
    ##################
    #ファイル名の決定#
    ##################
    if params['experiment_type'] == 'chi2_selected':
        yoshihara_dir = 'chi2'
    else:
        yoshihara_dir = 'experiment'
    if y_type ==0:
        type = "regression"
    else:
        type = "classification"
    if m == 1:
        model_type = "sda"
    elif m==2:
        model_type = "dbn_only"
    elif m==4:
        model_type = "rnnrbm_mlp"
    elif m==5:
        model_type = "rnnrbm_dbn"
    elif m==6:
        model_type = "dbn_rnnrbm"
    #model_dirs['STEP4_logs'] = '%s/%s/%s_%d_%s.csv' % (default_model_dir, 'result',type, label_type, model_type)
    model_dirs['STEP4_logs'] = '%s/%s/%s_%d_%s.csv' % (default_model_dir,yoshihara_dir,type, label_type, model_type)
    all_size = len(params['STEP4']['hidden_recurrent']) * len(params['STEP4']['hidden_layers_sizes']) * len(params['STEP4']['pretrain']['batch_size']) * len(params['STEP4']['pretrain']['learning_rate']) * len(params['STEP4']['pretrain']['epochs']) * len(params['STEP4']['finetune']['batch_size']) * len(params['STEP4']['finetune']['learning_rate']) * len(params['STEP4']['finetune']['epochs'])
    i = 0
      
    for hidden_layers_sizes in params['STEP4']['hidden_layers_sizes']:
        for batch_size_pretrain in params['STEP4']['pretrain']['batch_size']:
            for learning_rate_pretrain in params['STEP4']['pretrain']['learning_rate']:
                for epochs_pretrain in params['STEP4']['pretrain']['epochs']: 
                    for hidden_recurrent in params['STEP4']['hidden_recurrent']: 
                        pretrain_params = ""
                        pretrain_params = {
                        'dataset' : dataset, 
                        'hidden_layers_sizes' : hidden_layers_sizes,
                        'pretrain_lr' : learning_rate_pretrain,
                        'pretrain_batch_size' : batch_size_pretrain,
                        'pretrain_epochs' : epochs_pretrain,
                        'corruption_levels' : params['STEP4']['corruption_levels'],
                        'k' : params['STEP4']['k'],
                        'hidden_recurrent': hidden_recurrent,
                        'n_outs' : (1 + y_type)
                        }
                        pretrain_model = ""   
                        pretrain_model = model.pretrain(pretrain_params, y_type)
                        pretrain_params = ""
                        pretrain_params = get_model_params(pretrain_model)
                        for brandcode in brandcodes:
                            change_brand(dataset,brandcode)
                            for batch_size_finetune in params['STEP4']['finetune']['batch_size']:
                                for learning_rate_finetune in params['STEP4']['finetune']['learning_rate']:
                                    for epochs_finetune in params['STEP4']['finetune']['epochs']: 
                                        set_model_params(pretrain_model,pretrain_params)
                                        finetune_params = ""
                                        finetune_params = {
                                        'dataset' : dataset,
                                        'model' : pretrain_model,
                                        'finetune_lr' : learning_rate_finetune,
                                        'finetune_batch_size' : batch_size_finetune,
                                        'finetune_epochs' : epochs_finetune
                                        }
                                        finetune_model = ""
                                        best_validation_loss = ""
                                        test_score = ""
                                        best_epoch = ""
                                        finetune_model, best_validation_loss, test_score, best_epoch = model.finetune(finetune_params, y_type)
                                        i += 1
                                        print '%d / %d is done...' % (i , all_size)
                                        out = open(model_dirs['STEP4_logs'], 'a')
                                        out.write('%f,%f,%s,%s,%d,%f,%d,%d,%d,%f,%d,%d,%s\n' % (best_validation_loss, test_score, brandcode, str(hidden_layers_sizes).replace(',', ' '), batch_size_pretrain, learning_rate_pretrain, hidden_recurrent, epochs_pretrain, batch_size_finetune, learning_rate_finetune, epochs_finetune, best_epoch, str(datetime.datetime.now())))
                                        out.close()
                        gc.collect()