Пример #1
0
def main():
    #model = load_model('test_models/stock_multb_nosmooth_step4_1525242340/seq_len_50/model-1.h5')
    model = load_model(
        'D:\Source\Repos\StockPrediction\stock_single_nosmooth_1525405252\seq_len_50/model-1.h5'
    )
    seq_len = 50
    predict_len = 7

    #date_ranges = [(datetime.date(2016,11,1),datetime.date(2017,4,3)),(datetime.date(2002,3,18),datetime.date(2002,8,14)),(datetime.date(2015,3,8),datetime.date(2015,8,5))]
    date_ranges = [(datetime.date(2015, 1, 1), datetime.date(2015, 6, 1)),
                   (datetime.date(2016, 1, 1), datetime.date(2016, 6, 1)),
                   (datetime.date(2017, 1, 1), datetime.date(2017, 6, 1))]

    test_data = [
        dataload.load_data('daily_spx.csv',
                           seq_len,
                           normalise_window=True,
                           smoothing=False,
                           date_range=date_range,
                           train=False) for date_range in date_ranges
    ]

    predictions = [
        dataload.predict_sequences_multiple(model, test[0], seq_len,
                                            predict_len) for test in test_data
    ]
    scores = [
        model.evaluate(test[0], test[1], verbose=0) for test in test_data
    ]

    for prediction_index in range(len(predictions)):
        for sequence_index in range(len(predictions[prediction_index])):
            predictions[prediction_index][
                sequence_index] = dataload.denormalize_sequence(
                    test_data[prediction_index][2][sequence_index * 7],
                    predictions[prediction_index][sequence_index])

    for test_data_index in range(len(test_data)):
        for y_index in range(len(test_data[test_data_index][1])):
            test_data[test_data_index][1][
                y_index] = dataload.denormalize_point(
                    test_data[test_data_index][2][y_index],
                    test_data[test_data_index][1][y_index])

    model_plot = [(predictions[0], '2015'), (predictions[1], '2016'),
                  (predictions[2], '2017')]
    #model_plot = [(predictions[0], 'Bullish'), (predictions[1], 'Bearish'), (predictions[2], 'Neutral')]
    results_fname = 'test_singleb_nosmooth_byyear_{}'.format(
        int(datetime.datetime.now().timestamp()))
    os.makedirs(results_fname)
    plot_results_multiple(model_plot, [t[1] for t in test_data],
                          predict_len,
                          fig_path=results_fname + '/plots.pdf')
    with open(results_fname + "/score.txt", "w") as fout:
        for score in scores:
            pprint.pprint(score, fout)
Пример #2
0
def main():
    description = 'svm_smoothed'
    results_fname = '{}_{}'.format(description,
                                   int(datetime.now().timestamp()))
    model_fname = "model.sav"

    seq_len = 50
    predict_len = 10

    X_train, y_train, X_test, y_test = dataload.load_data(
        'daily_spx.csv',
        seq_len,
        normalise_window=True,
        smoothing=True,
        smoothing_window_length=5,
        smoothing_polyorder=3,
        reshape=False)

    print('> Data Loaded')

    # Grid Search

    param_grid = dict(
        C=np.logspace(-4, 4),
        gamma=np.logspace(-9, 3),
        kernel=['rbf', 'linear'],
    )

    grid = GridSearchCV(estimator=SVR(),
                        param_grid=param_grid,
                        scoring='neg_mean_squared_error',
                        cv=5)

    start_time = datetime.now()
    grid_result = grid.fit(X_train, y_train)
    end_time = datetime.now() - start_time

    print('> Time elapsed: ', end_time)
    print('> Best parameters:')
    print(grid.best_params_)
    results = pd.DataFrame(grid.cv_results_)
    results.sort_values(by='rank_test_score', inplace=True)

    # Build best model
    model = SVR(kernel=grid.best_params_['kernel'],
                C=grid.best_params_['C'],
                gamma=grid.best_params_['gamma'])
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    # Save Results
    os.makedirs(results_fname)
    results.to_csv('{}/results.csv'.format(results_fname))
    pickle.dump(model, open('{}/{}'.format(results_fname, model_fname), 'wb'))
Пример #3
0
def main():
    svm_model_path = 'svm_smoothed_1530436270/model.sav'
    results_fname = 'test_svm_smooth_byyear_{}'.format(
        int(datetime.datetime.now().timestamp()))
    seq_len = 50
    predict_len = 7
    os.makedirs(results_fname)

    # Define date ranges
    date_ranges = [(datetime.date(2015, 1, 1), datetime.date(2015, 6, 1)),
                   (datetime.date(2016, 1, 1), datetime.date(2016, 6, 1)),
                   (datetime.date(2017, 1, 1), datetime.date(2017, 6, 1))]

    # Load data
    model = pickle.load(open(svm_model_path, 'rb'))
    test_data = [
        dataload.load_data('daily_spx.csv',
                           seq_len,
                           normalise_window=True,
                           smoothing=False,
                           date_range=date_range,
                           train=False) for date_range in date_ranges
    ]

    # Generate predictions
    #[[print(seq.shape) for seq in test_date_range[0]] for test_date_range in test_data]
    #predictions = [[np.asscalar(model.predict(seq.transpose())) for seq in test_date_range[0]] for test_date_range in test_data]
    predictions = [
        dataload.predict_sequences_multiple(model, test[0], seq_len,
                                            predict_len) for test in test_data
    ]

    for prediction_index in range(len(predictions)):
        for sequence_index in range(len(predictions[prediction_index])):
            predictions[prediction_index][
                sequence_index] = dataload.denormalize_sequence(
                    test_data[prediction_index][2][sequence_index * 7],
                    predictions[prediction_index][sequence_index])

    for test_data_index in range(len(test_data)):
        for y_index in range(len(test_data[test_data_index][1])):
            test_data[test_data_index][1][
                y_index] = dataload.denormalize_point(
                    test_data[test_data_index][2][y_index],
                    test_data[test_data_index][1][y_index])

    # Save plot
    model_plot = [(predictions[0], '2015'), (predictions[1], '2016'),
                  (predictions[2], '2017')]
    plot_results_multiple(model_plot, [t[1] for t in test_data],
                          predict_len,
                          fig_path=results_fname + '/plots.pdf')
Пример #4
0
    def loadData(self):
        print "start loading data.."
        if self.analyze_type =='expression':
            if self.exp_file == None:
                raise Exception("expression file is not specified")
            dataload_result = dl.load_data(exp_file=self.exp_file,  gmt_file=self.gmt_file, analyzing_type=self.analyze_type , exp_normalize_tpye= self.exp_normalization_type)
        elif self.analyze_type =='mutation':
            if (self.mutation_file ==None):
                raise Exception("mutation file is not specified")
            elif (self.smoothing_source_file ==None):
                raise Exception("smoothing source file is not specified")
            dataload_result = dl.load_data(mutation_file =self.mutation_file , gmt_file=self.gmt_file, analyzing_type=self.analyze_type , network_file_for_smoothing=self.smoothing_source_file)
        elif self.analyze_type =='mut_with_exp':
            if (self.mutation_file ==None):
                raise Exception("mutation file is not specified")
            elif (self.exp_file ==None):
                raise Exception("expression file is not specified")
            elif (self.smoothing_source_file ==None):
                raise Exception("smoothing source file is not specified")

            dataload_result = dl.load_data(exp_file=self.exp_file, mutation_file =self.mutation_file , gmt_file=self.gmt_file, analyzing_type=self.analyze_type , network_file_for_smoothing=self.smoothing_source_file , exp_normalize_tpye= self.exp_normalization_type)
        else:
            raise Exception('unspecified analyzing type')
        return dataload_result
Пример #5
0
def get_AE_feats(encoder, data_frame_in, subtask, params):
    AE_feats = []
    labels = []
    ind_selected = []
    lengths = []
    for idx in data_frame_in.index:
        #print(idx)
        temp_train_Y = data_frame_in[subtask][idx]
        if np.isnan(temp_train_Y):
            print('nan label')
            continue
        temp_X = load_data(data_frame_in, idx, params)
        #temp_X = temp_X + np.random.normal(0,1,(temp_X.shape))
        temp_feats = encoder.predict(temp_X)
        lengths.append(temp_feats.shape[0])
        #temp_pad = np.zeros((max_len-temp_feats.shape[0],latent_dim))
        #temp_feats = np.concatenate((temp_feats,temp_pad),axis=0)
        #temp_feats = temp_feats.reshape(1,-1,latent_dim)
        ind_selected.append(idx)
        AE_feats.append(temp_feats)
        #temp_train_Y = to_categorical(temp_train_Y,5)
        #temp_train_Y = np.expand_dims(temp_train_Y,axis=0)
        labels.append(temp_train_Y)
    #
    max_len = np.max(lengths)
    #
    latent_dim = temp_feats.shape[-1]
    for i in range(len(AE_feats)):
        temp_pad = np.zeros((max_len - lengths[i], latent_dim))
        AE_feats[i] = np.concatenate((AE_feats[i], temp_pad), axis=0)
        AE_feats[i] = AE_feats[i].reshape(1, -1, latent_dim)
    #
    AE_feats = np.vstack(AE_feats)
    labels = np.vstack(labels)
    ind_selected = np.array(ind_selected)
    return AE_feats, labels, ind_selected
Пример #6
0
deconv6 = deconv_layer(conv5_4, 512, 64, 1)
# conv6 = conv_layer2(deconv6, 64, 64, "conv6")
# 20*14
deconv7 = deconv_layer(deconv6, 64, 8, 0)
# conv7 = conv_layer2(deconv7, 8, 8, "conv7")
# 40*28
deconv8 = deconv_layer(deconv7, 8, 1, 0)
# conv8 = conv_layer2(deconv8, 1, 1, "conv8")
# 80*56
y_ = tf.reshape(deconv8, [-1, Out_Width * Out_Height])

# loss function
loss = tf.reduce_sum(tf.square(y_ - y_reshape))
train = tf.train.AdamOptimizer(lr).minimize(loss)

data = dataload.load_data(is_training)

saver = tf.train.Saver()

sess = tf.Session()

sess.run(tf.initialize_all_variables())

for step in range(TRAIN_STEP):
    images, depths = dataload.get_batch(data, BATCH_SIZE)
    # print(depths[0])
    # print(sess.run(conv8, feed_dict={x:images, y:depths}))
    # print("image:", images[8])
    if is_training:
        if step % 1 == 0:
            loss_value = sess.run(loss, feed_dict={x: images, y: depths})
# Import basic libraries and keras
import os
import json
import keras
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential, load_model
from keras.layers import Dense, Embedding, LSTM
from keras.preprocessing.sequence import pad_sequences
from dataload import load_data

# Load input data and labels (0 to 4)
train_x, train_y = load_data()

# Use the 3000 most popular words found in our dataset
max_words = 3000

# Tokenize the data
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_x)
dictionary = tokenizer.word_index
# Save tokenizer dictionary to file
if not os.path.exists('dictionary.json'):
    with open('dictionary.json', 'w') as outfile:
        json.dump(tokenizer.word_index, outfile)

# For each tweet, change each token to its ID in the Tokenizer's word_index
sequences = tokenizer.texts_to_sequences(train_x)
train_x = pad_sequences(sequences, maxlen=300)
train_y = keras.utils.to_categorical(train_y, 5)

# Check if there is a pre-trained model
Пример #8
0
    # #             ypos += val
    # #             ax.text(rowNum, ypos , "{0:.2f}".format(val), color='black' ,ha='center')
    # #         featuer+=1
    # #     ypos = 0
    # plt.title('Average Number of Added Features in Each Epoch')
    # plt.savefig('./added_featues({0}).png'.format(max_dist))
    # print(frams)

if __name__ == '__main__':
    torch.multiprocessing.freeze_support()
    train = False
    # Define what device we are using
    # print("CUDA Available: ",torch.cuda.is_available())
    device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")
    #with sarogate _model
    (x_malware,x_benign) , test_data , feature_vectore_size,features = dataload.load_data(train)
    # (c_x_malware,c_x_benign) , c_test_data , c_feature_vectore_size = dataload.load_data(train)
    # (s_x_malware,s_x_benign) , s_test_data , s_feature_vectore_size = dataload.load_data(train)
    x_mal= x_malware
    adv_sample_path = './MalwareDataset/adversarial_samples_malJSMA_all/'

    # in black box setting
    # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN']
    # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN']


    ####<<<<<<<<generalizability
    # classifiers=[]
    # models_path = './models/SVMmodelsESORICS2020Genralization/'
    # models = [f for f in listdir(models_path) if isfile(join(models_path,f))]
    # for model in models:
Пример #9
0
import numpy as np

from gensim.models import Word2Vec

from dataload import load_data


def train_word2vec(data):
    model = Word2Vec(data, sg=1, size=100, window=5, min_count=1, workers=4)
    model.save("word2vec.model")


if __name__ == '__main__':
    docs, _ = load_data("data_ma.npz", 40, 25000, 700)
    train_word2vec(docs)
    model2 = Word2Vec.load("word2vec.model")
    temp = model2.wv[docs[0]]
    temp = temp[:20]
    print(temp.shape)
    temp2 = np.zeros((100, 10)).T
    print(temp2.shape)
    temp = np.insert(temp2, 0, values=temp, axis=0)
    print(temp.shape)

    # [model2[text] for text in docs]
    # print(temp)
Пример #10
0
        mse = criterion(SR, target)
        psnr = 10 * log10(1 / mse.data[0])
        avg_psnr += psnr
        print(iteration)

    print("===> Avg. SR PSNR: {:.4f} dB".format(avg_psnr / iteration))


opt = parser.parse_args()
print(opt)
gpuid = 0

print("===> Loading datasets")
root_dir = '/tmp4/hang_data/DIV2K'
test_dir = 'DIV2K_validate_HR_x4'
targets = dataload.load_data(root_dir, test_dir)
test_dir = 'DIV2K_validate_LR_x4'
inputs = dataload.load_data(root_dir, test_dir)

test_images = {"targets": targets, "inputs": inputs}
SR_dir = join(root_dir, 'SRResNet_DIV_train_x4')
if os.path.isdir(SR_dir):
    pass
else:
    os.mkdir(SR_dir)

model = torch.load(opt.model,
                   map_location=lambda storage, loc: storage)["model"]
#model = torch.load(opt.model)["model"]
model = model.cuda(gpuid)
criterion = torch.nn.MSELoss(size_average=True)
Пример #11
0
Файл: main.py Проект: preke/PELD
args.loss_function = 'Focal'  # CE or MSE or Focal
args.base = 'RoBERTa'
args.device = 0
args.SEED = 42
args.MAX_LEN = 256
args.batch_size = 16
args.lr = 1e-4
args.adam_epsilon = 1e-8
args.epochs = 50
args.result_name = args.Senti_or_Emo + '_Mode_' + str(
    args.mode) + '_' + args.loss_function + '_Epochs_' + str(
        args.epochs) + '.csv'

## LOAD DATA
from dataload import load_data
train_length, train_dataloader, valid_dataloader, test_dataloader = load_data(
    args, DATA_PATH)
args.train_length = train_length

## TRAIN THE MODEL
from model import Emo_Generation
from transformers import RobertaConfig, RobertaModel, PreTrainedModel
from train import train_model

if args.base == 'RoBERTa':
    model = Emo_Generation.from_pretrained('roberta-base',
                                           mode=args.mode).cuda(args.device)
else:
    model = Emo_Generation.from_pretrained('bert-base-uncased',
                                           mode=args.mode).cuda(args.device)
train_model(model, args, train_dataloader, valid_dataloader, test_dataloader)
Пример #12
0
output = drop(output)
# 全连接/sigmoid层
weight_dense = Dense(ALL_TAGS, activation='softmax')  # 2500
tag_out = weight_dense(output)

model = Model(inputs=[wordvec_input, lda_input], outputs=[tag_out])
# compile model
model.compile(optimizer='adam',
              loss=mean_negative_log_probs,
              metrics=[compute_precision, compute_recall])
model.summary()

# 装载数据
new_brs, sfs = load_data(path="data_ma.npz",
                         lenth=40,
                         num_words=25000,
                         num_sfs=700,
                         per=0.8)

# new_brs = new_brs[:200]
# sfs = sfs[:200]

sfs_back = sfs[:]
# 对tag进行裁切,只保留每篇文章的前x个
sfs = [sf[:1] for sf in sfs]

# 加载字典文件
with open("dictionary.b", "rb") as f:
    dic = pickle.load(f)
# 加载LDA模型
lda_model = LdaModel.load("lda_model")
Пример #13
0
	#for i, data in enumerate(predicted_data[1][0]):
		#padding = [None for p in range(i * prediction_len)]
		#ax2.plot(padding + data, label='Prediction')
		##plt.legend()
	#ax2.set_title(predicted_data[1][1])
	plt.show()

if __name__=='__main__':
	global_start_time = time.time()
	#epochs  = 1
	epochs  = 100
	seq_len = 40

	print('> Loading data... ')

	X_train, y_train, X_test, y_test = dataload.load_data('daily_spx.csv', seq_len, True)

	print('> Data Loaded. Compiling...')

	#lstm_model = lstm.build_model([1, seq_len, 100, 1])
	model_layers = [1, 2, 3, 4]
	cnn_models = [
			cnn_batchnorm_lstm.build_model([1, seq_len, 100, 1], x) for x in model_layers
		]
	[model.fit(
	    X_train,
	    y_train,
	    batch_size=512,
	    nb_epoch=epochs,
	    validation_split=0.05)
		for model in cnn_models
Пример #14
0
import model

vocabulary_size = 50000
embedding_size=128

maxlen = 150  # cut texts after this number of words (among top max_features most common words)
batchSize = 32
classNum=2
learning_rate=0.001
epochs=100
ckpt_dir=""

path='C:\\wuwei\\work\\github\\data\\imdb.pkl'

print('Loading data...')
trainData, testData = dataload.load_data(path=path, nb_words=vocabulary_size)
print(len(trainData), 'train sequences')
print(len(testData), 'test sequences')

trainBatches=dataload.get_batches(data=trainData)
testData=dataload.get_batches(data=testData)

def train():
    graph = tf.Graph()
    with graph.as_default():

        global_step = tf.contrib.framework.get_or_create_global_step()

        encoder_inputs = tf.placeholder(shape=(batchSize, None), dtype=tf.int32, name='encoder_inputs')

        class_targets = tf.placeholder(shape=(batchSize,), dtype=tf.int32, name='class_targets')
image_nc = 1
batch_size = 128
hidden_size = 200
# niose_size = 100
output_size = 1
gen_input_nc = image_nc

# Define what device we are using
print("CUDA Available: ", torch.cuda.is_available())
device = torch.device("cuda" if (
    use_cuda and torch.cuda.is_available()) else "cpu")

# test adversarial examples in Drebin training dataset
# mnist_dataset = torchvision.datasets.MNIST('./dataset', train=True, transform=transforms.ToTensor(), download=True)
# train_dataloader = DataLoader(mnist_dataset, batch_size=batch_size, shuffle=False, num_workers=1)
train_laoder, feature_vectore_size = dataload.load_data(train=True)

# load the pretrained model
pretrained_model = "./malware_classifier_net.pth"
target_model = malware_classifier_net(input_size=feature_vectore_size,
                                      hidden_size=hidden_size,
                                      output_size=output_size).to(device)
target_model.load_state_dict(torch.load(pretrained_model))
target_model.eval()

# load the generator of adversarial examples
pretrained_generator_path = './models/net_malG_epoch_60.pth'
pretrained_G = models.Mal_Generator(
    input_size=feature_vectore_size,
    hidden_size=hidden_size,
    output_size=feature_vectore_size).to(device)
    # description: several-word description of the purpose of the run
    description = 'single_gru'
    results_fname = '{}_{}'.format(description,
                                   int(datetime.now().timestamp()))

    early_stopping = EarlyStopping(patience=20)
    tensorboard = TensorBoard(log_dir='tensorboard', write_grads=True)

    for seq_len in seq_lens:
        print('Seq len: {}'.format(seq_len))
        print('> Loading data... ')

        X_train, y_train, X_test, y_test = dataload.load_data(
            'daily_spx.csv',
            seq_len,
            normalise_window=True,
            smoothing=False,
            smoothing_window_length=5,
            smoothing_polyorder=3,
            reshape=True)
        #X_train, y_train, X_test, y_test = dataload.load_sin_data(seq_len, normalise_window=True)

        print('> Data Loaded. Compiling...')

        # Grid search parameters
        kernel_sizes = [5, 9]
        step_sizes = [2]
        single_branch = True
        stride = [3]
        lstm_units = [200, 400]
        branches = [3]
        model = False
    # #     ypos = 0
    # plt.title('Average Number of Added Features in Each Epoch')
    # plt.savefig('./added_featues({0}).png'.format(max_dist))
    # print(frams)


if __name__ == '__main__':
    torch.multiprocessing.freeze_support()
    train = False
    # Define what device we are using
    print("CUDA Available: ", torch.cuda.is_available())
    device = torch.device("cuda" if (
        use_cuda and torch.cuda.is_available()) else "cpu")
    #with sarogate _model
    (x_malware, x_benign
     ), test_data, feature_vectore_size, features = dataload.load_data(train)
    # (c_x_malware,c_x_benign) , c_test_data , c_feature_vectore_size = dataload.load_data(train)
    # (s_x_malware,s_x_benign) , s_test_data , s_feature_vectore_size = dataload.load_data(train)
    x_mal = x_malware
    adv_sample_path = './MalwareDataset/adversarial_samples_malJSMA_all/'

    # in black box setting
    # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN']
    # classifiers = ['SVM_MALJSMA_all', 'SVM_MALJSMA_all_R1', 'SVM_MALJSMA_all_R2', 'SVM_MALJSMA_all_R3', 'SVM_MALJSMA_all_R4', 'SVM_MALJSMA_all_R5', 'SVM_MALJSMA_all_R6', 'SVM_MALJSMA_all_R7', 'SVM_MALJSMA_all_R8', 'SVM_MALJSMA_all_R9', 'SVM_MALJSMA_all_R10', 'SVM_MALJSMA_all_R11' , 'SVM_MALJSMA_all_R12', 'SVM_R13', 'SVM_R14', 'SVM_R15', 'SVM_R16', 'SVM_R17']# ,'RF' , 'RBF_SVM', 'LR', 'DT' , 'KNN', 'MLP']# ,'DNN']

    ####<<<<<<<<generalizability
    # classifiers=[]
    # models_path = './models/SVMmodelsESORICS2020Genralization/'
    # models = [f for f in listdir(models_path) if isfile(join(models_path,f))]
    # for model in models:
    #     classifiers.append(model[0:-4])
Пример #18
0
model,encoder = make_DNN_model(feat_size=params['frame_length']*3,latent_dim=latent_dim)

checkpointer = ModelCheckpoint(filepath=savedir+'mlp_AE_uad_'+str(use_ancillarydata)+params_append_str+'_ld_'+str(latent_dim)+'.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
#model.compile(optimizer='adam',loss='mse',metrics=['mse'])

lr=0.001
sgd = SGD(lr=lr, decay=0, momentum=0.9, nesterov=True)
model.compile(optimizer='adam',loss='mse',metrics=['mae'])

batch_size = 500
epochs = 200
model.fit(train_X,train_X,validation_split=0.2,batch_size=batch_size,epochs=epochs,shuffle=True, verbose=1,callbacks=[checkpointer, early_stopping])

model.load_weights(savedir+'mlp_AE_uad_'+str(use_ancillarydata)+params_append_str+'_ld_'+str(latent_dim)+'.h5') 
encoder.save(savedir+'mlp_encoder_uad_'+str(use_ancillarydata)+params_append_str+'_ld_'+str(latent_dim)+'.h5')

'''
encoder = load_model(savedir + 'mlp_encoder_uad_' + str(use_ancillarydata) +
                     '_ld_' + str(latent_dim) + '.h5')

if saveAEFeats:
    save_feats_path = '/export/b19/mpgill/BeatPD/AE_30ft_high_pass/'
    for idx in df_train_label.index:
        print(idx)
        temp_X = load_data(df_train_label, idx, cleanParams)
        temp_feats = encoder.predict(temp_X)
        name = df_train_label["measurement_id"][idx]
        sio.savemat(save_feats_path + name + '.mat', {'feat': temp_feats})
Пример #19
0
import tensorflow as tf
from dataload import load_data

DATA_DIR = './data/'
data = load_data(DATA_DIR)

# 입력 데이터를 위핚 플레이스홀더, 가중치

X = tf.placeholder(tf.float32, [None, 1024])  #1024
Y = tf.placeholder(tf.float32, [None, 22])  #42

W1 = tf.Variable(tf.random_normal([1024, 256], stddev=0.01))
L1 = tf.nn.relu(tf.matmul(X, W1))
W2 = tf.Variable(tf.random_normal([256, 256], stddev=0.01))
L2 = tf.nn.relu(tf.matmul(L1, W2))
W3 = tf.Variable(tf.random_normal([256, 22], stddev=0.01))
model = tf.matmul(L2, W3)
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

batch_size = 40
total_batch = int(data.train.num_examples / batch_size)

for epoch in range(100):
    total_cost = 0
import cnn_model_words
import dataload
import keras
import random
import numpy as np
from keras import backend as K

if __name__ == '__main__':
    BATCH_SIZE = 100
    EPOCHS = 100
    NUM_WORDS = 5000
    HEIGHT = 32
    WIDTH = 100

    # Load data
    word_data = dataload.load_data(NUM_WORDS)
    print('Loaded data')

    # Split into train and test sets
    train_images = []
    train_labels = []
    test_images = []
    test_labels = []

    random.seed(100)
    for k, v in word_data.items():
        # Shuffle data
        pixels = [p['pixel_array'] for p in v['points']]
        random.shuffle(pixels)
        del pixels[int(len(pixels) * 0.5):]  # Trims list to save memory
Пример #21
0
def index():
   if ac.len() < 1:
       load_data(redis_server, redis_port, redis_password)
   return render_template('search.html')
Пример #22
0
def main(_):

    # Load parameters from yaml file specified in argv
    paramfilename = sys.argv[1]
    modelname, param = load_config(paramfilename)
    n_classes = cfg.N_CLASSES

    with tf.Session() as sess:
        tf.logging.set_verbosity(tf.logging.INFO)

        # Placeholders for signals preprocessing inputs
        X_data = tf.placeholder(tf.float32, [None, cfg.SAMRATE], name='X_data')

        noise_factor = tf.placeholder(tf.float32,
                                      shape=(),
                                      name='noise_factor')
        noise_frac = tf.placeholder(tf.float32, shape=(), name='noise_frac')

        # Define the audio features
        x_mfcc, x_mel, x_zcr, x_rmse = signalProcessBatch(
            X_data,
            noise_factor=noise_factor,
            noise_frac=noise_frac,
            window=param['window'],
            maxamps=cfg.MAXAMPS,
            sr=cfg.SAMRATE,
            num_mel_bins=param['num_mel_bins'],
            num_mfccs=param['num_mfccs'])

        # Placeholder variables output (1-hot vectors of size n_classes)
        y_true = tf.placeholder(tf.float32,
                                shape=[None, n_classes],
                                name='y_true')
        y_true_class = tf.argmax(y_true, 1, name='y_true_class')

        # Dropout keep probability and training flag
        dropout_prob = tf.placeholder(tf.float32,
                                      shape=(),
                                      name='dropout_prob')
        is_training = tf.placeholder(tf.bool, name="is_training")

        # Prediction from model
        model = buildModel(modelname)
        y_pred = model(x_mel,
                       x_mfcc,
                       x_zcr,
                       x_rmse,
                       dropout_prob=dropout_prob,
                       is_training=is_training)

        # Cross entropy loss function with softmax then takes mean
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_pred,
                                                    labels=y_true))
        tf.summary.scalar('loss', loss)

        # Train and backprop gradients function
        optimizer = tf.train.AdamOptimizer(
            learning_rate=param['learning_rate']).minimize(loss)

        # Evaluation and accuracy
        y_pred_class = tf.argmax(y_pred, 1, name='y_pred_class')
        correct_prediction = tf.equal(y_pred_class, y_true_class)
        confusion_matrix = tf.confusion_matrix(y_true_class,
                                               y_pred_class,
                                               num_classes=n_classes)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)

        # Merge all summaries
        merged = tf.summary.merge_all()

        # Saver for checkpoints
        saver = tf.train.Saver(tf.global_variables())

        # Set path to summary logs and checkpoints
        now = datetime.now()
        logs_path = os.path.join(cfg.OUT_DIR, now.strftime("%Y%m%d-%H%M%S"),
                                 'summaries')

        # Create summary writers
        train_writer = tf.summary.FileWriter(os.path.join(logs_path, 'train'),
                                             graph=sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(logs_path, 'test'),
                                            graph=sess.graph)

        # Initialize variables
        tf.global_variables_initializer().run()

        # Main training section
        start_time = time.time()
        msg = "\n====================\nStarting training...\n===================="
        tf.logging.info(msg)

        # Load the audio file info dataframe
        df = load_data(cfg.DATA_DIR)

        # Log
        msg = "\nModel: {}\nParam File: {}\nIterations: {}"
        tf.logging.info(
            msg.format(modelname, paramfilename, param['num_iterations']))

        tf.logging.info(" Begin iterations...")
        for i in xrange(param['num_iterations']):

            # Unknown training weight adjustment for first 5000 cycles
            if i < 5000:
                w = (1 + param['unknown_weight_scaler'] *
                     i) * param['unknown_weight']
            else:
                w = param['unknown_weight']

            # Get the training batch
            X_train, y_true_batch = load_batch(
                df,
                cfg.DATA_DIR,
                batch_size=param['batch_size'],
                silence_size=param['silence_size'],
                label='train',
                random=True,
                seed=None,
                w=w,
                samples=cfg.SAMRATE)

            # Preprocess the training batch
            x_mfcc_batch, x_mel_batch, x_zcr_batch, x_rmse_batch = sess.run(
                [x_mfcc, x_mel, x_zcr, x_rmse],
                feed_dict={
                    X_data: X_train,
                    noise_factor: param['noise_factor_value'],
                    noise_frac: param['noise_frac_value']
                })

            # Training optimization
            sess.run(optimizer,
                     feed_dict={
                         x_mel: x_mel_batch,
                         x_mfcc: x_mfcc_batch,
                         x_zcr: x_zcr_batch,
                         x_rmse: x_rmse_batch,
                         y_true: y_true_batch,
                         dropout_prob: param['dropout_prob_value'],
                         is_training: True
                     })

            # Checkpoint save and validation step
            if ((i + 1) % param['checkpoint_step']
                    == 0) or (i == param['num_iterations'] - 1):

                # Checkpoint
                checkpoint_path = os.path.join(
                    logs_path, "{}-{}.ckpt".format(modelname,
                                                   paramfilename[:-4]))
                msg = " Saving checkpoint to: {}-{}"
                tf.logging.info(msg.format(checkpoint_path, i + 1))
                saver.save(sess, checkpoint_path, global_step=i + 1)

                # Load the validation batches
                val_batch_size = 100
                total_val_accuracy = 0
                total_conf_matrix = None
                val_set_size = 6700
                for j in xrange(0, val_set_size,
                                val_batch_size - param['silence_size']):
                    X_val, y_true_val = load_batch(
                        df,
                        cfg.DATA_DIR,
                        batch_size=val_batch_size,
                        silence_size=param['silence_size'],
                        label='val',
                        random=False,
                        seed=j,
                        w=1.0,
                        samples=cfg.SAMRATE)

                    # Preprocess the validation batch
                    x_mfcc_val, x_mel_val, x_zcr_val, x_rmse_val = sess.run(
                        [x_mfcc, x_mel, x_zcr, x_rmse],
                        feed_dict={
                            X_data: X_val,
                            noise_factor: 0.0,
                            noise_frac: 0.0
                        })

                    # Validation summary
                    val_summary, loss_val, acc_val, conf_matrix = sess.run(
                        [merged, loss, accuracy, confusion_matrix],
                        feed_dict={
                            x_mel: x_mel_val,
                            x_mfcc: x_mfcc_val,
                            x_zcr: x_zcr_val,
                            x_rmse: x_rmse_val,
                            y_true: y_true_val,
                            dropout_prob: 1.0,
                            is_training: False
                        })
                    total_val_accuracy += (acc_val *
                                           val_batch_size) / val_set_size
                    if total_conf_matrix is None:
                        total_conf_matrix = conf_matrix
                    else:
                        total_conf_matrix += conf_matrix

                msg = " Confusion Matrix:\n {}"
                tf.logging.info(msg.format(total_conf_matrix))
                msg = " VALIDATION ACC: {:6f}, (N = {})"
                tf.logging.info(msg.format(total_val_accuracy, val_set_size))

            # Display step
            if (i == 0) or ((i + 1) % param['display_step']
                            == 0) or (i == param['num_iterations'] - 1):
                # Training summary, loss and accuracy
                train_summary, loss_train, acc_train = sess.run(
                    [merged, loss, accuracy],
                    feed_dict={
                        x_mel: x_mel_batch,
                        x_mfcc: x_mfcc_batch,
                        x_zcr: x_zcr_batch,
                        x_rmse: x_rmse_batch,
                        y_true: y_true_batch,
                        dropout_prob: 1.0,
                        is_training: False
                    })
                train_writer.add_summary(train_summary, i + 1)

                # Display message
                msg = "  OPTIMIZE STEP: {:6d}, LOSS, {:.6f}, ACC: {:.6f}"
                tf.logging.info(msg.format(i + 1, loss_train, acc_train))

                # Check if loss is below minimum
                if loss_train < param['min_loss']:
                    msg = " Min loss acheived: {}"
                    tf.logging.info(msg.format(loss_train))
                    break

        # Load the testing batches
        test_batch_size = 100
        total_test_accuracy = 0
        total_conf_matrix = None
        test_set_size = 6700
        for j in xrange(0, test_set_size,
                        test_batch_size - param['silence_size']):
            X_test, y_true_test = load_batch(
                df,
                cfg.DATA_DIR,
                batch_size=test_batch_size,
                silence_size=param['silence_size'],
                label='test',
                random=False,
                seed=j,
                w=1.0,
                samples=cfg.SAMRATE)

            # Preprocess the testing batch
            x_mfcc_test, x_mel_test, x_zcr_test, x_rmse_test = sess.run(
                [x_mfcc, x_mel, x_zcr, x_rmse],
                feed_dict={
                    X_data: X_test,
                    noise_factor: 0.0,
                    noise_frac: 0.0
                })

            # Testing summary
            test_summary, loss_test, acc_test, conf_matrix = sess.run(
                [merged, loss, accuracy, confusion_matrix],
                feed_dict={
                    x_mel: x_mel_test,
                    x_mfcc: x_mfcc_test,
                    x_zcr: x_zcr_test,
                    x_rmse: x_rmse_test,
                    y_true: y_true_test,
                    dropout_prob: 1.0,
                    is_training: False
                })
            test_writer.add_summary(test_summary, i + 1)

            total_test_accuracy += (acc_test * test_batch_size) / test_set_size
            if total_conf_matrix is None:
                total_conf_matrix = conf_matrix
            else:
                total_conf_matrix += conf_matrix

        msg = " Confusion Matrix:\n {}"
        tf.logging.info(msg.format(total_conf_matrix))
        msg = " TESTING ACC: {:6f}, (N = {})"
        tf.logging.info(msg.format(total_test_accuracy, test_set_size))

        # End-time
        end_time = time.time()
        msg = " Time usage: {}"
        tf.logging.info(
            msg.format(timedelta(seconds=int(round(end_time - start_time)))))
Пример #23
0
args.device       = torch.device('cuda')
args.kernel_num   = 100
args.kernel_sizes = '3,4,5'
args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]
args.dropout      = 0.1

# Preprocess
if not os.path.exists(TEST_PRE_PATH):
    logger.info('Preprocessing begin...')
    preprocess_write(TEST_PATH, TEST_PRE_PATH)
else:
    logger.info('No need to preprocess!')

# Load data
logger.info('Loading data begin...')
text_field, label_field, train_data, train_iter, dev_data, dev_iter = load_data(amazon_train, amazon_test, args)
text_field.build_vocab(train_data, dev_data, min_freq=10)
label_field.build_vocab(train_data)
logger.info('Length of vocab is: ' + str(len(text_field.vocab)))


args.vocab_size = len(text_field.vocab)
args.word_2_index = text_field.vocab.stoi # tuple of dict({word: index})
args.index_2_word = text_field.vocab.itos # only list of words

# Initial word embedding
logger.info('Getting pre-trained word embedding ...')
args.pretrained_weight = get_pretrained_word_embed(small_glove_path, args, text_field)  


# Build model and train
Пример #24
0
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from dataload import load_data
from sklearn import preprocessing
from IPython import embed
import time
h = .02  # step size in the mesh

X_train, y_train, X_test, y_test = load_data()
# test_label = np.where(y_test == 1)[1]
# min_max_scaler = preprocessing.MinMaxScaler()
# X_train_minmax = min_max_scaler.fit_transform(X_train)
# X_test_minmax = min_max_scaler.fit_transform(X_test)

scaler = preprocessing.StandardScaler().fit(X_train)
trainX = scaler.transform(X_train)
testX = scaler.transform(X_test)

names = [
    "Nearest Neighbors", "Decision Tree", "Neural Net", "AdaBoost",
    "Naive Bayes"
]

classifiers = [
Пример #25
0
def main():

    # Original 4 pipelines
    model = load_model('single_cnn_gru_1542783564/seq_len_50/model-3.h5')

    # 2 New pipelines for Bidir GRU
    #model = load_model('../final_sem_code/single_cnn_bidir_gru_1548982892/seq_len_50/model-3.h5')

    seq_len = 50
    predict_len = 7

    date_ranges = [(datetime.date(2016, 1, 1), datetime.date(2016, 6, 1)),
                   (datetime.date(2017, 1, 1), datetime.date(2017, 6, 1)),
                   (datetime.date(2018, 1, 1), datetime.date(2018, 6, 1))]
    '''
    date_ranges = [(datetime.date(2015,1,1),datetime.date(2015,12,31)),                   
                   (datetime.date(2016,1,1),datetime.date(2016,12,31)),
                   (datetime.date(2017,1,1),datetime.date(2017,12,31))]
    '''

    #test_data = [dataload.load_data('daily_spx.csv', seq_len, normalise_window=True, smoothing=False, date_range=date_range, train=False) for date_range in date_ranges]
    test_data = [
        dataload.load_data('../2018_data/Yahoo_2000_to_2018.csv',
                           seq_len,
                           normalise_window=True,
                           smoothing=False,
                           date_range=date_range,
                           train=False) for date_range in date_ranges
    ]

    predictions = [
        dataload.predict_sequences_multiple(model, test[0], seq_len,
                                            predict_len) for test in test_data
    ]
    scores = [
        model.evaluate(test[0], test[1], verbose=0) for test in test_data
    ]

    for prediction_index in range(len(predictions)):
        for sequence_index in range(len(predictions[prediction_index])):
            predictions[prediction_index][
                sequence_index] = dataload.denormalize_sequence(
                    test_data[prediction_index][2][sequence_index * 7],
                    predictions[prediction_index][sequence_index])

    for test_data_index in range(len(test_data)):
        for y_index in range(len(test_data[test_data_index][1])):
            test_data[test_data_index][1][
                y_index] = dataload.denormalize_point(
                    test_data[test_data_index][2][y_index],
                    test_data[test_data_index][1][y_index])

    model_plot = [(predictions[0], '2016'), (predictions[1], '2017'),
                  (predictions[2], '2018')]

    #model_plot = [(predictions[0], 'Mar 2002 to Aug 2002'), (predictions[1], 'Mar 2015 to Aug 2015'),(predictions[2], 'Jan 2016 to Apr 2017')]
    #model_plot = [(predictions[0], 'Bullish'), (predictions[1], 'Bearish'), (predictions[2], 'Neutral')]
    results_fname = 'test_single_cnn_bidir_gru_{}'.format(
        int(datetime.datetime.now().timestamp()))
    #os.makedirs(results_fname)
    plot_results_multiple(model_plot, [t[1] for t in test_data],
                          predict_len,
                          fig_path='plots_test/plots.pdf')
    with open('plots_test' + "/score.txt", "w") as fout:
        for score in scores:
            pprint.pprint(score, fout)
Пример #26
0
from dnn2 import DNN2
import dataload

_, test_data = dataload.load_data(
    'C:/Users/student/machineLearning/kerasTest/iris.csv', 4, 0.6)

model = DNN2(3, 10, 3)
# 입력노드수 = train_data.x_data.shape[1] => 3
# 출력노드수 = train_data.y_data.shape[1] => 3
# 은닉층노드수 = 10

model.load_weights('kerasTest/mnist_mlp_model.h5')

loss, accuracy = model.evaluate(test_data.x_data,
                                test_data.y_data,
                                batch_size=100)

print(loss, accuracy)

# print('Predictions:', model.predict(test_data.x_data).flatten())

# loss, accuracy = model.evaluate(test_data.x_data, test_data.y_data, batch_size=100)

# print(loss, accuracy)
Пример #27
0
import tensorflow as tf
from dataload import load_data
from classify import Classifier

# 데이터 준비
train_data, test_data = load_data(
    'C:/Users/student/machineLearning/Tensorflow/iris.csv', 4, 0.6)

# 노드 수 결정
num_input = train_data.x_data.shape[1]  # 입력 노드 수
num_output = train_data.y_data.shape[1]  # 출력 노드 수
num_hidden = 10
num_hidden2 = 20

# 분류기 생성
iris = Classifier(num_input, num_output, num_hidden, num_hidden2, 0.01)

# 훈련 평가
iris.train(train_data, 1000, 100)
iris.test(test_data)

# 질의
answer = iris.query([[5.8, 4, 1.2, 0.2]])
print(answer)
iris.close()