Exemplo n.º 1
0
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)


    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    clf = RandomForestClassifier(bootstrap=False, class_weight=None,
            criterion='entropy', max_depth=29008, max_features=36,
            max_leaf_nodes=None, min_samples_leaf=5, min_samples_split=3,
            min_weight_fraction_leaf=0.0, n_estimators=4494, n_jobs=8,
            oob_score=False, random_state=979271, verbose=0,
            warm_start=False)

    clf.fit(train_x, train_y)

    ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv="prefit")
    ccv.fit(valid_x,valid_y)

    valid_predictions = ccv.predict_proba(valid_x)
    test_predictions= ccv.predict_proba(test_x)

    loss = test(valid_y,valid_predictions,True)
    if  loss<0.52:
        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
Exemplo n.º 2
0
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)


    random_state=random.randint(0, 1000000)
    rf = RandomForestClassifier(n_jobs=8)

    param_dist = {
            "n_estimators":sp_randint(100,300),
        "criterion": ["gini"],
        #"max_depth": sp_randint(3, 10000),
        #"min_samples_split": sp_randint(1, 300),
        #"min_samples_leaf": sp_randint(1, 300),
        "max_features": sp_randint(10, 26),
        "bootstrap": [True, False],
        'random_state':sp_randint(1, 1000000),
        }

    clf = RandomizedSearchCV(rf, param_distributions=param_dist,
                                   n_iter=50,cv=10,scoring='roc_auc')

    clf.fit(train_x, train_y)
    valid_predictions = clf.predict_proba(valid_x)[:, 1]
    test_predictions= clf.predict_proba(test_x)[:, 1]

    loss = roc_auc_score(valid_y,valid_predictions)
    print('loss:')
    print(loss)
    print(clf.best_estimator_)
    data.saveData(valid_id,valid_predictions,"./valid_results/valid_"+str(model_id)+".csv")
    data.saveData(test_id,test_predictions,"./results/results_"+str(model_id)+".csv")
Exemplo n.º 3
0
def createSignalFromWav(file_wav_name, file_data_name):
    try:
        with wave.open(file_wav_name, mode='rb') as wav:
            sample_rate = wav.getframerate()
            n_frame = wav.getnframes()

            # Duração do áudio
            duration = n_frame / sample_rate

            # Buffer de Dados
            dataBuffer = wav.readframes(n_frame)

            # Descompacta os bytes em pacotes de 2 bytes (short)   
            DataGenerator = struct.unpack("%ih" % (n_frame * wav.getnchannels()), dataBuffer)
            
            # Converte 16bits wav para float, entre -1.0 e 1.0
            DataGenerator = [float(value) / pow(2, 15) for value in DataGenerator]
        
        # Cria nosso sinal e salva em um file
        dataSignal = DataSignal(sample_rate, duration, len(DataGenerator), DataGenerator)
        data.saveData(file_data_name, dataSignal)
        
        return True
    except:
        return False
Exemplo n.º 4
0
 def saveSettings(self):
     settings.settings.setValue("data/MaximumURLLength", self.maximumURLLength.value())
     #settings.settings.setValue("data/MaximumCacheSize", self.maximumCacheSize.value())
     settings.settings.setValue("data/RememberHistory", self.rememberHistoryToggle.isChecked())
     settings.settings.setValue("network/GeolocationEnabled", self.geolocationToggle.isChecked())
     data.geolocation_whitelist = [self.geolocationWhitelist.item(authority).text() for authority in range(0, self.geolocationWhitelist.count())]
     data.geolocation_blacklist = [self.geolocationBlacklist.item(authority).text() for authority in range(0, self.geolocationBlacklist.count())]
     data.saveData()
Exemplo n.º 5
0
def prepareQuit():
    try: os.remove(settings.crash_file)
    except: pass
    common.downloadManager.saveSession()
    saveSession()
    settings.settings.hardSync()
    data.saveData()
    data.data.hardSync()
    filtering.adblock_filter_loader.quit()
    filtering.adblock_filter_loader.wait()
    server_thread.httpd.shutdown()
    server_thread.quit()
    server_thread.wait()
Exemplo n.º 6
0
def trainrf(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)


    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    clf = RandomForestClassifier(n_estimators=random.randint(50,5000),
                                 criterion='gini',
                                 max_depth=random.randint(10,1000),
                                 min_samples_split=random.randint(2,50),
                                 min_samples_leaf=random.randint(1,10),
                                 min_weight_fraction_leaf=random.uniform(0.0,0.5),
                                 max_features=random.uniform(0.1,1.0),
                                 max_leaf_nodes=random.randint(1,10),
                                 bootstrap=False,
                                 oob_score=False,
                                 n_jobs=30,
                                 random_state=random_state,
                                 verbose=0,
                                 warm_start=True,
                                 class_weight=None
                )

    clf.fit(train_x, train_y)

    valid_predictions1 = clf.predict_proba(valid_x)
    test_predictions1= clf.predict_proba(test_x)

    t1 = test(valid_y,valid_predictions1)

    ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv='prefit')
    ccv.fit(valid_x,valid_y)

    valid_predictions2 = ccv.predict_proba(valid_x)
    test_predictions2= ccv.predict_proba(test_x)

    t2 = test(valid_y,valid_predictions2)

    if t2<t1:
        valid_predictions=valid_predictions2
        test_predictions=test_predictions2
        t=t2
    else:
        valid_predictions=valid_predictions1
        test_predictions=test_predictions1
        t=t1

    if t < 0.450:
        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
Exemplo n.º 7
0
def prepareQuit():
    try:
        os.remove(settings.crash_file)
    except:
        pass
    common.downloadManager.saveSession()
    saveSession()
    settings.settings.hardSync()
    data.saveData()
    data.data.hardSync()
    filtering.adblock_filter_loader.quit()
    filtering.adblock_filter_loader.wait()
    server_thread.httpd.shutdown()
    server_thread.quit()
    server_thread.wait()
Exemplo n.º 8
0
 def saveSettings(self):
     settings.settings.setValue("data/MaximumURLLength",
                                self.maximumURLLength.value())
     #settings.settings.setValue("data/MaximumCacheSize", self.maximumCacheSize.value())
     settings.settings.setValue("data/RememberHistory",
                                self.rememberHistoryToggle.isChecked())
     settings.settings.setValue("network/GeolocationEnabled",
                                self.geolocationToggle.isChecked())
     data.geolocation_whitelist = [
         self.geolocationWhitelist.item(authority).text()
         for authority in range(0, self.geolocationWhitelist.count())
     ]
     data.geolocation_blacklist = [
         self.geolocationBlacklist.item(authority).text()
         for authority in range(0, self.geolocationBlacklist.count())
     ]
     data.saveData()
Exemplo n.º 9
0
Arquivo: lr.py Projeto: hujiewang/otto
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)

    # normalization
    scaler = StandardScaler()
    train_x = scaler.fit_transform(train_x)
    valid_x = scaler.transform(valid_x)
    test_x = scaler.transform(test_x)

    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    clf = LogisticRegression(penalty='l2',
                             dual=False,
                             tol=0.0001,
                             C=1.0,
                             fit_intercept=True,
                             intercept_scaling=1,
                             class_weight=None,
                             random_state=None,
                             solver='lbfgs',
                             max_iter=1000,
                             multi_class='ovr',
                             verbose=True
                             )

    clf.fit(train_x, train_y)

    valid_predictions = clf.predict_proba(valid_x)
    test(valid_y,valid_predictions)

    ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv='prefit')
    ccv.fit(train_x,train_y)

    valid_predictions = ccv.predict_proba(valid_x)
    test(valid_y,valid_predictions)

    test_predictions= ccv.predict_proba(test_x)

    data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
    data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
Exemplo n.º 10
0
def signalGenerator(file_name, signalGen):
    try:
        # List com os dados para serem salvos
        DataGenerator = []

        '''
            FS -> SAMPLE FRAME
            É necessário para gerar a amostragem do sinal
            A cada sample_rate, teremos um ciclo do sinal gerado
        '''
        
        FS = 1.0 / signalGen.sample_rate

        '''
            A fórmula utilizada para gerar o sinal foi
            Magnetude*sin(Frequence*2*PI*FS*x + Phase)
            O X representa a posição da amostra, e como dito,
            a cada sample_rate*FS teremos o sinal de repetindo
        '''
        for g in range(0, signalGen.sample_rate*signalGen.duration):
            DataGenerator.append(0)
            for i in range(0, len(signalGen.frequencies)):

                # Magnetude é opcional, portanto, se não encontrar dados, adiciona 1 como padrão
                if(i >= len(signalGen.magnetude)):
                    signalGen.magnetude.append(1)

                # Phase é opcional, portanto, se não encontrar dados, adiciona 0º como padrão 
                if(i >= len(signalGen.phases)):
                    signalGen.phases.append(0)
                    
                DataGenerator[g] += signalGen.magnetude[i]*math.sin(signalGen.frequencies[i]*2*math.pi*g*FS + (signalGen.phases[i]*math.pi / 180));

        # Gera a classe e sinal e salva o objeto no arquivo de dados
        dataSignal = DataSignal(signalGen.sample_rate, signalGen.duration, len(DataGenerator), DataGenerator)
        data.saveData(file_name, dataSignal)
        
        return True
    except:
        return False
Exemplo n.º 11
0
Arquivo: rf.py Projeto: hujiewang/otto
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)

    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    # build a classifier
    clf = RandomForestClassifier(n_jobs=8)

   # specify parameters and distributions to sample from

    param_dist = {
            "n_estimators":sp_randint(20,40),
            "criterion": ["gini", "entropy"],
            "max_depth": sp_randint(3, 10000),
            "min_samples_split": sp_randint(1, 30),
            "min_samples_leaf": sp_randint(1, 30),
            "max_features": sp_randint(1, 93),
            "bootstrap": [True, False],
            'random_state':sp_randint(1, 1000000),
            }


    # run randomized search
    random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=2,cv=9,n_jobs=3)
    random_search.fit(train_x,train_y)
    valid_predictions = random_search.predict_proba(valid_x)
    test_predictions= random_search.predict_proba(test_x)
    loss = test(valid_y,valid_predictions,True)
    if  loss<10.438:
        output=[loss,random_search.best_estimator_]
        print("model[\""+str(model_id)+"\"]="),
        print(output)

        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
Exemplo n.º 12
0
def trainxgb(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)

    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    xgb = XGBoostClassifier(base_estimator='gbtree',
                 objective='multi:softprob',
                 metric='mlogloss',
                 num_classes=9,
                 learning_rate=random.uniform(0.01,0.05),
                 max_depth=random.randint(10,20),
                 max_samples=random.uniform(0.0,1.0),
                 max_features=random.uniform(0.0,1.0),
                 max_delta_step=random.randint(1,10),
                 min_child_weight=random.randint(1,10),
                 min_loss_reduction=1,
                 l1_weight=0.0,
                 l2_weight=0.0,
                 l2_on_bias=False,
                 gamma=0.02,
                 inital_bias=random.uniform(0.0,1.0),
                 random_state=random_state,
                 watchlist=[[valid_x,valid_y]],
                 n_jobs=30,
                 n_iter=3000,
                )

    xgb.fit(train_x, train_y)

    valid_predictions = xgb.predict_proba(valid_x)

    if test(valid_y,valid_predictions) <0.450:
        test_predictions= xgb.predict_proba(test_x)
        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
Exemplo n.º 13
0
from time import sleep
# for triggering the modules
import data as dm
import motor as mm
import webcam as wm
import joystick as jm

maxThrottle = 0.25
motor = mm.Motor(2, 3, 4, 17, 22, 27)
record = 0

while True:
    joyval = jm.getJS()
    steering = joyval['axis1']
    throttle = joyval['o'] * maxThrottle

    if joyval['share'] == 1:
        if record ==0: 
            print('Recording Started ...')
        record +=1
        sleep(0.300)
    if record == 1:
        img = wm.getImg(True, size=[240,120])
        dm.saveData(img, steering)
    elif record == 2:
        dm.saveLog()
        record = 0

    motor.move(throttle, -steering)
    cv2.waitKey(1)
Exemplo n.º 14
0
from sklearn.decomposition import NMF
from data import loadData, saveData
import numpy as np

# 最重要的参数是n_components、alpha、l1_ratio、solver
nmf = NMF(
    n_components=128,  # k value,默认会保留全部特征
    # W H 的初始化方法,'random' | 'nndsvd'(默认) |  'nndsvda' | 'nndsvdar' | 'custom',
    init=None,
    solver='cd',  # 'cd' | 'mu'
    # {'frobenius', 'kullback-leibler', 'itakura-saito'},
    beta_loss='frobenius',
    tol=1e-10,  # 停止迭代的极限条件
    max_iter=200,  # 最大迭代次数
    random_state=None,
    alpha=0.,  # 正则化参数
    l1_ratio=0.,  # 正则化参数
    verbose=0,  # 冗长模式
    shuffle=False  # 针对"cd solver"
)

trius = loadData(filename='trius.npy')
X = np.abs(trius)
nmf.fit(X)
W = nmf.fit_transform(X)
H = nmf.components_
print('reconstruction_err_', nmf.reconstruction_err_)  # 损失函数值
print('n_iter_', nmf.n_iter_)  # 实际迭代次数
saveData(W, filename='nmf.npy')
saveData(H, filename='basis.npy')
Exemplo n.º 15
0
                               i * batch_size:len(x_train)]
        # Run
        _, c = sess.run([optimizer, loss], feed_dict={X: batch_xs})
    if epoch % display_step == 0:
        print("Iteration: %04d " % (epoch), "loss=", "{:.9f}".format(c))

print('Triang CLF')
for epoch in range(training_epoch):
    # Loop over all batches
    for i in range(total_batch):
        batch_xs = x_train[i * batch_size:(i + 1) *
                           batch_size] if i < total_batch else x_train[
                               i * batch_size:len(x_train)]
        batch_ys = y_train[i * batch_size:(i + 1) *
                           batch_size] if i < total_batch else y_train[
                               i * batch_size:len(y_train)]
        # Run
        _, c = sess.run([optimizer_clf, loss_clf],
                        feed_dict={
                            X: batch_xs,
                            Y: batch_ys
                        })
    if epoch % display_step == 0:
        a = sess.run(acc, feed_dict={X: x_test, Y: y_test})
        mse = sess.run(loss, feed_dict={X: x_test})
        print("Iteration: %04d " % (epoch), "loss=",
              "{:.9f} acc {:.9f} decode loss {:.9f}".format(c, a, mse))

code = sess.run(encoder_op, feed_dict={X: F})
saveData(code, filename='code.npy')
def main():
    path = "../Data/google_trace_timeseries/data_resource_usage_5Minutes_6176858948.csv"
    aspects = ["meanCPUUsage", "canonical memory usage"]
    predicted_aspect = "meanCPUUsage"
    num_epochs = 1000
    learning_rate = 0.005
    n_slidings_encoder = [26, 28]
    n_slidings_decoder = [2, 4, 6]
    batch_sizes = [16, 32]
    size_models = [[16], [32], [8, 4], [16, 8]]
    activations = ["tanh", "sigmoid"]
    input_keep_probs = [0.95, 0.9]
    output_keep_probs = [0.9]
    state_keep_probs  = [0.95, 0.9]

    # n_slidings_encoder = [16]
    # n_slidings_decoder = [2]
    # batch_sizes = [16]
    # size_models = [[4, 2]]
    # activations = ["tanh"]
    # input_keep_probs = [0.95]
    # output_keep_probs = [0.9]
    # state_keep_probs = [0.95]
    rate = 5
    result_file_path = 'result_encoder_decoder.csv'
    loss_file_path = 'loss_encoder_decoder.csv'

    ## GET COMBINATIONS ##
    combinations = []
    for n_sliding_encoder in n_slidings_encoder:
        for n_sliding_decoder in n_slidings_decoder:
            for batch_size in batch_sizes:
                for size_model in size_models:
                    for activation in activations:
                        for input_keep_prob in input_keep_probs:
                            for output_keep_prob in output_keep_probs:
                                for state_keep_prob in state_keep_probs:
                                    combination_i = [n_sliding_encoder, n_sliding_decoder,
                                                     batch_size, size_model, activation,
                                                     input_keep_prob, output_keep_prob,
                                                     state_keep_prob]
                                    combinations.append(combination_i)

    for combination in combinations:

        tf.reset_default_graph()

        n_sliding_encoder = combination[0]
        n_sliding_decoder = combination[1]
        batch_size = combination[2]
        size_model = combination[3]
        activation = combination[4]
        input_keep_prob = combination[5]
        output_keep_prob = combination[6]
        state_keep_prob = combination[7]

        ### GET DATA : TRAINING SET, TEST SET, VALIDATION SET ###

        nor_data, amax, amin = data.get_goodletrace_data(path, aspects)
        x_train_encoder, y_train, x_test_encoder, y_test = data.get_data_samples(nor_data, n_sliding_encoder,
                                                                                 predicted_aspect, rate)
        x_train_decoder, x_test_decoder = data.get_data_decoder(x_train_encoder, x_test_encoder, n_sliding_decoder)
        x_train_encoder, x_train_decoder, y_train, x_val_encoder, x_val_decoder, y_val = \
            data.getValidationSet(x_train_encoder, x_train_decoder, y_train, 5)
        # print(x_train_encoder.shape, x_train_decoder.shape, y_train.shape, x_val_encoder.shape, x_val_decoder.shape,
        #       y_val.shape)
        # return 0

        loss_train_value = []
        loss_valid_value = []

        n_train = y_train.shape[0]
        num_batches = int(x_train_encoder.shape[0] / batch_size)

        timestep_encoder = n_sliding_encoder
        timestep_decoder = n_sliding_decoder
        input_dim = len(aspects)
        X_encoder = tf.placeholder(tf.float32, [None, timestep_encoder, input_dim], name='X_encoder')
        X_decoder = tf.placeholder(tf.float32, [None, timestep_decoder, input_dim], name='X_decoder')
        y = tf.placeholder(tf.float32, [None, 1], name='output')

        output, outputs_encoder, outputs_decoder = encoder_decoder(X_encoder, X_decoder, size_model, activation,
                                                                   input_keep_prob, output_keep_prob, state_keep_prob)
        outputs_encoder = tf.identity(outputs_encoder, name='outputs_encoder')

        loss = tf.reduce_mean(tf.squared_difference(output, y))
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        init_op = tf.global_variables_initializer()

        with tf.Session() as sess:

            t = datetime.now().time()
            start_time = (t.hour * 60 + t.minute) * 60 + t.second

            sess.run(init_op)

            ## EARLY STOPPING ##
            pre_loss_valid = 100
            x = 0
            early_stopping_val = 5

            ### START TO TRAIN ###
            for i in range(num_epochs):
                num_epochs_i = i + 1
                for j in range(num_batches + 1):
                    a = batch_size * j
                    b = a + batch_size
                    if b > n_train:
                        b = n_train
                    x_batch_encoder = x_train_encoder[a:b, :, :]
                    x_batch_decoder = x_train_decoder[a:b, :, :]
                    y_batch = y_train[a:b, :]
                    # print(x_batch.shape, y_batch.shape)

                    loss_j, _ = sess.run([loss, optimizer], feed_dict={X_encoder: x_batch_encoder,
                                                                       X_decoder: x_batch_decoder,
                                                                       y: y_batch})
                loss_train_i = sess.run(loss, feed_dict={X_encoder: x_train_encoder,
                                                         X_decoder: x_train_decoder,
                                                         y: y_train})
                loss_valid_i = sess.run(loss, feed_dict={X_encoder: x_val_encoder,
                                                         X_decoder: x_val_decoder,
                                                         y: y_val})
                # print(num_epochs_i, loss_train_i, loss_valid_i)

                loss_train_value.append(loss_train_i)
                loss_valid_value.append(loss_valid_i)

                if loss_valid_i > pre_loss_valid:
                    x = x+1
                    if x == early_stopping_val:
                        break
                else:
                    x = 0
                pre_loss_valid = loss_valid_i

            ### OUTPUT ###
            output_test = sess.run(output, feed_dict={X_encoder: x_test_encoder,
                                                      X_decoder: x_test_decoder,
                                                      y: y_test})
            output_test = output_test * (amax[0] - amin[0]) + amin[0]
            y_test_act = y_test * (amax[0] - amin[0]) + amin[0]

            loss_test_act = np.mean(np.abs(output_test - y_test_act))
            # print(loss_test_act)

            t = datetime.now().time()
            end_time = (t.hour * 60 + t.minute) * 60 + t.second

            training_encoder_time = (end_time - start_time)

            ### SAVE DATA ###
            name = data.saveData(combination, loss_test_act, num_epochs_i, result_file_path, training_encoder_time)

            # print(name)

            # outputs_encoder = sess.run(outputs_encoder, feed_dict={X_encoder: x_train_encoder,
            #                                      X_decoder: x_train_decoder,
            #                                      y: y_train})
            # print(outputs_encoder[:, -1, :].shape)


            # print(time)

            ### SAVE MODEL ###
            # print('\nSaving...')
            cwd = os.getcwd()
            saved_path = 'model/model'
            saved_path += str(combination)
            saved_path += '.ckpt'
            saved_path = os.path.join(cwd, saved_path)
            # print(saved_path)
            shutil.rmtree(saved_path, ignore_errors=True)
            saver = tf.train.Saver()
            saver.save(sess=sess, save_path=saved_path)
            # print("ok")

            sess.close()
Exemplo n.º 17
0
def main():

    path = "../Data/google_trace_timeseries/data_resource_usage_5Minutes_6176858948.csv"
    aspects = ["meanCPUUsage", "canonical memory usage"]
    predicted_aspect = "meanCPUUsage"
    n_slidings = [3, 4, 5, 6]
    batch_sizes = [16, 32]
    learning_rate = 0.005
    num_epochs = 2
    rnn_cellsizes = [[4], [8], [16], [32], [4, 2], [8, 4], [16, 4], [16, 8],
                     [32, 4], [32, 8], [32, 16]]
    activations = ["tanh", "sigmoid"]
    rate = 5
    result_file_path = 'result_multi.csv'

    combination = []
    for n_sliding in n_slidings:
        for batch_size in batch_sizes:
            for rnn_cellsize in rnn_cellsizes:
                for activation in activations:
                    combination_i = [
                        n_sliding, batch_size, rnn_cellsize, activation
                    ]
                    combination.append(combination_i)

    for combination_i in combination:
        tf.reset_default_graph()

        n_sliding = combination_i[0]
        batch_size = combination_i[1]
        rnn_unit = combination_i[2]
        activation = combination_i[3]

        nor_data, amax, amin = data.get_goodletrace_data(path, aspects)
        x_train, y_train, x_test, y_test = data.get_data_samples(
            nor_data, n_sliding, predicted_aspect, rate)
        # x_train, y_train, x_valid, y_valid = data.getValidationSet(x_train, y_train, 5)

        loss_train_value = []
        loss_valid_value = []

        n_train = x_train.shape[0]
        num_batches = int(x_train.shape[0] / batch_size)
        timestep = n_sliding
        input_dim = len(aspects)
        X = tf.placeholder(tf.float32, [None, timestep, input_dim])
        y = tf.placeholder(tf.float32, [None, 1])

        output = model_rnn(X, rnn_unit, activation)

        loss = tf.reduce_mean(tf.squared_difference(output, y))
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:

            t = datetime.now().time()
            start_time = (t.hour * 60 + t.minute) * 60 + t.second

            sess.run(init_op)

            # pre_loss_valid = 100
            # x = 0
            # early_stopping_val = 5
            epoch_i = 0
            for i in range(num_epochs):
                for j in range(num_batches + 1):
                    a = batch_size * j
                    b = a + batch_size
                    if b > n_train:
                        b = n_train
                    x_batch = x_train[a:b, :, :]
                    y_batch = y_train[a:b, :]
                    # print(x_batch.shape, y_batch.shape)

                    loss_j, _ = sess.run([loss, optimizer],
                                         feed_dict={
                                             X: x_batch,
                                             y: y_batch
                                         })
                loss_train_i = sess.run(loss,
                                        feed_dict={
                                            X: x_train,
                                            y: y_train
                                        })
                # loss_valid_i = sess.run(loss, feed_dict={X: x_valid,
                #                                          y: y_valid})
                loss_train_value.append(loss_train_i)
                # loss_valid_value.append(loss_valid_i)

                # if loss_valid_i > pre_loss_valid:
                #     x = x+1
                #     if x == early_stopping_val:
                #         break
                # else:
                #     x = 0
                # pre_loss_valid = loss_valid_i
                epoch_i += 1

            t = datetime.now().time()
            train_time = (t.hour * 60 + t.minute) * 60 + t.second

            training_time = train_time - start_time

            output_test = sess.run(output, feed_dict={X: x_test, y: y_test})
            output_test = output_test * (amax[0] - amin[0]) + amin[0]
            y_test_act = y_test * (amax[0] - amin[0]) + amin[0]

            t = datetime.now().time()
            test_time = (t.hour * 60 + t.minute) * 60 + t.second
            testing_time = test_time - train_time

            system_time = test_time - start_time

            loss_test_act = np.mean(np.abs(output_test - y_test_act))

            explained_variance_score = sk.explained_variance_score(
                y_test_act, output_test)
            mean_absolute_error = sk.mean_absolute_error(
                y_test_act, output_test)
            mean_squared_error = sk.mean_squared_error(y_test_act, output_test)
            median_absolute_error = sk.median_absolute_error(
                y_test_act, output_test)
            r2_score = sk.r2_score(y_test_act, output_test)

            # t = datetime.now().time()
            # end_time = (t.hour * 60 + t.minute) * 60 + t.second
            #
            # training_time = (end_time - start_time)

            name = data.saveData(combination_i, loss_test_act,
                                 loss_valid_value, loss_train_value, epoch_i,
                                 result_file_path, output_test, y_test_act,
                                 explained_variance_score, mean_absolute_error,
                                 mean_squared_error, median_absolute_error,
                                 r2_score, training_time, testing_time,
                                 system_time)

            print(name)
Exemplo n.º 18
0
opt = {
	'architecture' : [784,784,784,30,10],
	'learningRate' : 1.5,
	'error' : 0.001,
	'epochs' : 15,
	'batch' : 100
}
nn = ann.ann(opt)

# #combain
# rbm = files.loadData('rbm-1000.db')
# nn.rbm(rbm)

#train
nn.train(train_data, train_result)
files.saveData(nn, 'nn.db')

_results = nn.sim(test_data)
_results = _results.transpose()

accuracy = 0
for i in range(len(test_result)):
	if i < 20:
		print _results[i].argmax(), " : ", test_result[i].argmax()
	if _results[i].argmax() == test_result[i].argmax():
		accuracy += 1.00

print accuracy, " / ", len(test_result)

accuracy = accuracy/len(test_result)
Exemplo n.º 19
0
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from test import test
train_x,train_y,valid_x,valid_y,test_x=data.loadData()
train_x,train_y=shuffle(train_x,train_y)

param_grid = [
 {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
 {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]
clf = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
    gamma=0.0, kernel='rbf', max_iter=-1, probability=True,
    random_state=None, shrinking=True, tol=0.001, verbose=True)

clf.fit(train_x, train_y)

#gs = GridSearchCV(svc,param_grid,n_jobs=8,verbose=2)
#gs.fit(train_x, train_y)


valid_predictions = clf.predict_proba(valid_x)
test_predictions= clf.predict_proba(test_x)

test(valid_y,valid_predictions)


data.saveData(valid_predictions,"../valid_results/valid_215.csv")
data.saveData(test_predictions,"../results/results_215.csv")

Exemplo n.º 20
0
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)

    maximum_auc=0.0
    random_state=random.randint(0, 1000000)
    for i in tqdm(range(1000)):
        config={
        'base_estimator':'gbtree',
        'objective':'multi:softprob',
        'metric':'mlogloss',
        'num_classes':2,
        'learning_rate':random.uniform(0.01,0.15),
        'max_depth':20+random.randint(0,10),
        'max_samples':random.uniform(0.3,1.0),
        'max_features':random.uniform(0.3,1.0),
        'max_delta_step':random.randint(1,10),
        'min_child_weight':random.randint(1,8),
        'min_loss_reduction':1,
        'l1_weight':random.uniform(0.0,10.0),
        'l2_weight':random.uniform(0.0,10.0),
        'l2_on_bias':False,
        'gamma':random.uniform(0.0,0.1),
        'inital_bias':0.5,
        'random_state':random_state,

        }
        clf = XGBoostClassifier(
            config['base_estimator'],
            config['objective'],
            config['metric'],
            config['num_classes'],
            config['learning_rate'],
            config['max_depth'],
            config['max_samples'],
            config['max_features'],
            config['max_delta_step'],
            config['min_child_weight'],
            config['min_loss_reduction'],
            config['l1_weight'],
            config['l2_weight'],
            config['l2_on_bias'],
            config['gamma'],
            config['inital_bias'],
            config['random_state'],
            watchlist=[[valid_x,valid_y]],
            n_jobs=8,
            n_iter=30000,
        )
        clf.fit(train_x, train_y)

        valid_predictions = clf.predict_proba(valid_x)[:, 1]
        test_predictions= clf.predict_proba(test_x)[:, 1]

        auc = roc_auc_score(valid_y,valid_predictions)
        if auc>maximum_auc:
            maximum_auc=auc
            best_config=config
            print('new auc:')
            print(auc)
            data.saveData(valid_id,valid_predictions,"./valid_results/valid_"+str(model_id)+".csv")
            data.saveData(test_id,test_predictions,"./results/results_"+str(model_id)+".csv")
    print('maximum_auc:')
    print(maximum_auc)
    print(config)
Exemplo n.º 21
0
	# plt.show()

def load_save_label(filename):
	binfile = open(filename , 'rb')
	buf = binfile.read()

	labelNum = len(buf)

	labelNum -= 8

	# print labelNum
	 
	index = 0
	labels = struct.unpack_from('>'+str(labelNum)+'B' , buf , index);

	print 'success: '+filename

	return labels


train_data = load_save_function('train-images-idx3-ubyte');
train_result = load_save_label('train-labels-idx1-ubyte');
test_data = load_save_function('t10k-images-idx3-ubyte');
test_result = load_save_label('t10k-labels-idx1-ubyte');


print train_data.shape
train_data.tofile("mnist_train_data.db")
data.saveData(train_result, "mnist_train_result.db")
test_data.tofile("mnist_test_data.db")
data.saveData(test_result, "mnist_test_result.db")