def vqa_answer2(imgFeat, question, imageid, socketid): imgFeat = np.asarray([imgFeat]) ques = process_string(question).encode('utf-8') print "Ques:", ques print "imgFeat Shape:", imgFeat.shape train_q_toked = qtok.texts_to_sequences([ques]) # q_word_index = qtok.word_index train_q_toked = sequence.pad_sequences(train_q_toked, maxlen=20) print "train token:", train_q_toked.shape # max_question_features = len(q_word_index.keys()) + 1 train_q_toked = np.asarray(train_q_toked) predictions = model.predict([train_q_toked, imgFeat]) print predictions np_utils.categorical_probas_to_classes(predictions) label_set = sorted(range(len(predictions[0])), key=lambda x: predictions[0][x], reverse=True)[:5] a_word_rev = dict((v, k) for k, v in a_word_idx.items()) ans = [] for i in range(5): idx = label_set[i] + 1 ans.append([a_word_rev[idx], predictions[0][idx - 1]]) print 'The Top %d predict Answer is : %s, with score %.6f' % (i, a_word_rev[idx], predictions[0][idx - 1]) web_result = {} web_result[imageid] = ans r.publish('result-rest', json.dumps({'web_result': web_result})) r.publish('chat', json.dumps({'web_result': json.dumps(web_result), 'socketid': str(socketid)})) return ans
def cateAccuracy(model_fitted, X_test, Y_test): Y_test = categorical_probas_to_classes(Y_test) Y_predict = model_fitted.predict(X_test) if len(Y_predict.shape) != 1: #转换onehot编码 Y_predict = categorical_probas_to_classes(Y_predict) accu_count = {} accu_total = {} for cat in set(Y_test): total = list(Y_test).count(cat) accu_total[cat] = total accu_count[cat] = 0 for iidx, cat in enumerate(Y_test): if cat == Y_predict[iidx]: accu_count[cat] = accu_count[cat] + 1 sum1 = 0 sum2 = 0 for i in range(len(set(Y_test))): sum1 = sum1 + accu_total[i] sum2 = sum2 + accu_count[i] print(sum2 / float(sum1)) return [ accu_count[i] / float(accu_total[i]) for i in range(len(set(Y_test))) ]
def fn_model(fn, stype='testing'): # parse config options to use (except for LR, which conflicts with the . param separator) p = dict([ x.split(":") for x in os.path.dirname(fn.replace("tmp/w/", "")).replace( "lr:0.001.", "").split(".") ]) strk = ['af', 'ptype'] # force string conversion boolk = ['w2v', 'randposts', 'noempty', 'etrain', 'balbatch', 'cosine'] # force bool conversion for k in p: if k == 'prep': if p[k].lower() == 'none': p[k] = None else: p[k] = str(p[k]) elif k in strk: p[k] = str(p[k]) elif k in boolk: p[k] = (p[k] == 'True') else: p[k] = int(p[k]) p["lr"] = 0.001 tf.reset_default_graph() model, _ = build_model(p) model.load_weights(fn) _, genf = datagen(p['max_posts'], p['max_length'], stype=stype, force_full=True, mintf=p['mintf'], mindf=p['mindf'], noempty=p['noempty'], prep=p['prep'], batch_size=9999999999) for i, (X, y) in enumerate(genf()): assert i == 0, "test set should contain only one batch (and it should not be sampled)" val_X, val_y = X, categorical_probas_to_classes(y) y_pred = categorical_probas_to_classes(model.predict(val_X, batch_size=32)) y_true = val_y posf1 = sklearn.metrics.f1_score(y_true, y_pred, pos_label=1, average='binary') posp = sklearn.metrics.precision_score(y_true, y_pred, pos_label=1, average='binary') posr = sklearn.metrics.recall_score(y_true, y_pred, pos_label=1, average='binary') return (posf1, posp, posr), y_pred, val_y, y_true
def modelMetrics(model_fitted, X_test, Y_test): Y_predict = model_fitted.predict(X_test) if len(Y_predict.shape) != 1: #转换onehot编码 Y_predict = categorical_probas_to_classes(Y_predict) report = classification_report( Y_predict, categorical_probas_to_classes(Y_test)) #各个类的f1score accuracy = accuracy_score(Y_predict, categorical_probas_to_classes(Y_test)) #总的准确度 return report, accuracy
def cnn1d_selfembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, vocab_size, embd_dim, nb_filter, filter_length, batch_size, nb_epoch, optm): """ - CNN-1d on text input (represented in int) - MOT - dropout + L2 softmax :param <X, Y> train and test sets :param nb_classes # of classes :param maxlen max of n char in a sentence :param vocab_size :param embd_dim :param nb_filter :param filter_length :param batch_size :param nb_epoch :param optm optimizer options, e.g., adam, rmsprop, etc. :return: """ pool_length = maxlen - filter_length + 1 model = Sequential() model.add(Embedding(vocab_size, embd_dim, input_length=maxlen)) model.add(Dropout(0.25)) model.add(Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode="valid", activation="relu")) model.add(MaxPooling1D(pool_length=pool_length)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optm) earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, show_accuracy=True, callbacks=[earlystop]) classes = earlystop.model.predict_classes(X_test, batch_size=batch_size) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test accuracy:', acc) # return(acc) kappa = metrics.quadratic_weighted_kappa(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test Kappa:', kappa) return (kappa)
def test(): with open("save_weight.pickle", mode="rb") as f: weights = pickle.load(f) model = Sequential() model.add(Dense(output_dim=100, input_dim=28*28)) model.add(Activation("relu")) model.set_weights(weights) layey1_value = model.predict(X_test[:5]) y_pred = np_utils.categorical_probas_to_classes(y) Y = np_utils.categorical_probas_to_classes(y_test) print np_utils.accuracy(y_pred,Y) print y_pred.shape
def run_algorithm(train,test): EPOCHS=100 BATCHES=124 m=train.shape[1]-1 model=NN_model(m,10,128,0.5) X_train=train.iloc[:,1:].values y_train=np_utils.to_categorical(train[target].values) model.fit(X_train,y_train, nb_epoch=EPOCHS, batch_size=BATCHES,shuffle=True,verbose=1,validation_split=0.2) X_test=test.values ans_m=X_test.shape[0] print("xtest=",X_test.shape) ans = model.predict_proba(X_test, verbose=0) ans = np_utils.categorical_probas_to_classes(ans) # ans = np.array(ans).reshape((ans_m,)) print("ans.shape=",ans.shape) csvfile = 'results/keras-naive.csv' writer = csv.writer(open(csvfile,'w'), lineterminator='\n') writer.writerow(["ImageId,Label"]) for i,x in enumerate(ans): writer.writerow([i+1,x])
def main(): rev_by_star = get_data() X_train, Y_test, X_target, Y_target = split_data(rev_by_star) X_train = X_train.toarray() Y_test = Y_test.toarray() #Y_target = trans_target(Y_target) #X_target = trans_target(X_target) input_num = 1000 output_num = 5 X_target = to_categorical(X_target, 5) #Y_target = to_categorical(Y_target, 5) #data = np.random.random((2148051, input_num)) #labels = np.random.randint(output_num, size=(2148051, 1)) #print(X_target[0]) #print(X_train.dtype, X_target.dtype) #labels = to_categorical(labels, 5) #print("data shape", data.dtype) #print("label shape", labels.dtype) print(type(X_train)) model = build_model(input_num, output_num) model.fit(X_train, X_target, batch_size=128, nb_epoch=5, validation_split=0.25) #model.fit(data, labels, batch_size=32, nb_epoch=10) Y_pred = model.predict(Y_test) Y_pred = categorical_probas_to_classes(Y_pred) print("Accuracy is : %.2f" % ((Y_target == Y_pred).sum() * 1.0 / (1.0 * Y_test.shape[0]))) plot_confusion_matrix(Y_pred, Y_target, "neural_network")
def report_performance(): y_pred = model.predict(X_dev.as_matrix(), batch_size=2000, verbose=2) pred_classes = np_utils.categorical_probas_to_classes(y_pred) conf_matrix = confusion_matrix(y_dev, pred_classes) print conf_matrix print "accuracy", accuracy_score(y_dev, pred_classes) print "f1", f1_score(y_dev, pred_classes, average='weighted')
def combinationPredict(predict, samples_test): labels_samples, merger_min, merger_max, merger_sum, merger_pro = add.fusoesDiego( predict, samples_test) classSeg = np_utils.categorical_probas_to_classes(predict) classMin = np_utils.categorical_probas_to_classes(merger_min) classMax = np_utils.categorical_probas_to_classes(merger_max) classSom = np_utils.categorical_probas_to_classes(merger_sum) classPro = np_utils.categorical_probas_to_classes(merger_pro) print() print("Min: " + str(acc(labels_samples, classMin))) print("Max: " + str(acc(labels_samples, classMax))) print("Sum: " + str(acc(labels_samples, classSom))) print("Product: " + str(acc(labels_samples, classPro))) print()
def run_algorithm(train, test): EPOCHS = 100 BATCHES = 124 m = train.shape[1] - 1 model = NN_model(m, 10, 128, 0.5) X_train = train.iloc[:, 1:].values y_train = np_utils.to_categorical(train[target].values) model.fit(X_train, y_train, nb_epoch=EPOCHS, batch_size=BATCHES, shuffle=True, verbose=1, validation_split=0.2) X_test = test.values ans_m = X_test.shape[0] print("xtest=", X_test.shape) ans = model.predict_proba(X_test, verbose=0) ans = np_utils.categorical_probas_to_classes(ans) # ans = np.array(ans).reshape((ans_m,)) print("ans.shape=", ans.shape) csvfile = 'results/keras-naive.csv' writer = csv.writer(open(csvfile, 'w'), lineterminator='\n') writer.writerow(["ImageId,Label"]) for i, x in enumerate(ans): writer.writerow([i + 1, x])
def predict(model, x, y, ix, output_dir): """ Store predictions in a CSV file and predicted probabilities in an NPZ file. """ y_proba_pred = model.predict(x) np.savez_compressed(output_dir + '/predictions_proba.npz', y_proba_pred=y_proba_pred) df = pd.DataFrame({ 'y_pred': np_utils.probas_to_classes(y_proba_pred), 'y_true': np_utils.categorical_probas_to_classes(y) }) df['accurate'] = df['y_true'] == df['y_pred'] df['split'] = '' for key, indexes in ix.items(): df.ix[indexes, 'split'] = key df = df[['split', 'y_true', 'y_pred', 'accurate']] df.to_csv(output_dir + '/predictions.csv', index=None) return y_proba_pred
def sample_count(Y): sample_count_train = {} if len(Y.shape) != 1: #转换onehot编码 Y = np_utils.categorical_probas_to_classes(Y) for i in set(Y): sample_count_train[i] = list(Y).count(i) return sample_count_train
def keras_model(): import pandas as pd import numpy as np from keras.preprocessing import sequence from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution1D, MaxPooling1D from keras.callbacks import EarlyStopping from keras.utils import np_utils from data_util import load_csvs, load_other import ml_metrics as metrics nb_words = 6500 maxlen = 175 filter_length = 10 other_col_dim = 4 X_train, Y_train, X_test, Y_test, nb_classes = load_csvs('data/tpov4/train_1.csv', 'data/tpov4/test_1.csv', nb_words, maxlen, 'self', w2v=None) # read _other.csv other_train = load_other('data/tpov4/train_1_other.csv', maxlen, other_col_dim) other_test = load_other('data/tpov4/test_1_other.csv', maxlen, other_col_dim) print('other tensor:', other_train.shape) pool_length = maxlen - filter_length + 1 model = Sequential() model.add(Convolution1D(nb_filter=50, filter_length=filter_length, border_mode="valid", activation="relu", input_shape=(maxlen, other_col_dim))) model.add(MaxPooling1D(pool_length=pool_length)) model.add(Flatten()) model.add(Dropout(0.05)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer={{choice(['rmsprop', 'adam', 'adadelta', 'adagrad'])}}) earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit(other_train, Y_train, batch_size=32, nb_epoch=25, validation_split=0.1, show_accuracy=True, callbacks=[earlystop]) classes = earlystop.model.predict_classes(other_test, batch_size=32) org_classes = np_utils.categorical_probas_to_classes(Y_test) acc = np_utils.accuracy(classes, org_classes) # accuracy only supports classes print('Test accuracy:', acc) kappa = metrics.quadratic_weighted_kappa(classes, org_classes) print('Test Kappa:', kappa) return {'loss': -acc, 'status': STATUS_OK}
def _getECOCAccuracy(self, GT, pred, topN=5): """ Calculates the topN accuracy obtained from a set of samples on a ECOC_Classifier. """ top_pred = np.argsort(pred, axis=1)[:, ::-1][:, :np.min([topN, pred.shape[1]])] pred = np_utils.categorical_probas_to_classes(pred) GT = np_utils.categorical_probas_to_classes(GT) # Top1 accuracy correct = [1 if pred[i] == GT[i] else 0 for i in range(len(pred))] accuracies = float(np.sum(correct)) / float(len(correct)) # TopN accuracy top_correct = [1 if GT[i] in top_pred[i, :] else 0 for i in range(top_pred.shape[0])] top_accuracies = float(np.sum(top_correct)) / float(len(top_correct)) return [accuracies, top_accuracies]
def applyClassMapping(self, Y): """ Returns the corresponding integer identifiers for the current Stage's mapping given a set of categorical arrays Y. """ # Get labels from Keras' categorical representation labels = np_utils.categorical_probas_to_classes(Y) # Map labels for this stage return [self.mapping[l] for l in labels]
def cnn1d_w2vembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, nb_filter, filter_length, batch_size, nb_epoch, optm): """ - CNN-1d on 3d sensor which uses word2vec embedding - MOT :param <X, Y> train and test sets :param nb_classes # of classes :param maxlen max of n char in a sentence :param nb_filter :param filter_length :param batch_size :param nb_epoch :param optm :return: """ pool_length = maxlen - filter_length + 1 model = Sequential() model.add(Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode="valid", activation="relu", input_shape=(maxlen, 300))) model.add(MaxPooling1D(pool_length=pool_length)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optm) earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, show_accuracy=True, callbacks=[earlystop]) classes = earlystop.model.predict_classes(X_test, batch_size=batch_size) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) # accuracy only supports classes print('Test accuracy:', acc) # return(acc) kappa = metrics.quadratic_weighted_kappa(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test Kappa:', kappa) return (kappa)
def lstm_selfembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, vocab_size, embd_dim, batch_size, nb_epoch, optm): """ - LSTM on text input (represented in int) - fully-connected model :param <X, Y> train and test sets :param nb_classes # of classes :param maxlen max of n char in a sentence :param vocab_size :param embd_dim :param batch_size :param nb_epoch :param optm optimizer options, e.g., adam, rmsprop, etc. :return: """ model = Sequential() model.add(Embedding(vocab_size, embd_dim, input_length=maxlen)) model.add(Dropout(0.25)) # model.add(LSTM(100, return_sequences=True)) model.add(LSTM(50)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optm) earlystop = EarlyStopping(monitor='val_loss', patience=2, verbose=1) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, show_accuracy=True, callbacks=[earlystop]) classes = earlystop.model.predict_classes(X_test, batch_size=batch_size) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) # accuracy only supports classes print('Test accuracy:', acc) kappa = metrics.quadratic_weighted_kappa(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test Kappa:', kappa) return (kappa)
def predict(self, X_test): model = self.model nb_classes = self.nb_classes #Y_test = np_utils.to_categorical(y_test, nb_classes) X_test = self.X_reshape(X_test) Y_pred = model.predict(X_test, verbose=0) y_pred = np_utils.categorical_probas_to_classes(Y_pred) return y_pred
def get_output(i, model, name, X, Y): # Build TSNE model tsne_model = TSNE(n_components=2, random_state=0) get_layer_output = K.function([model.layers[0].input], [model.layers[i].output]) # We pick first 1000 points to do TSNE reduced_layer_output = tsne_model.fit_transform(get_layer_output([X])[0][:500]) #plt.figure(figsize=(12,12)) #plt.title(name, fontsize=20) plt = scatter(reduced_layer_output, np_utils.categorical_probas_to_classes(Y[:500])) plt.savefig('./img/' + name)
def predict(self, X_test): model = self.model nb_classes = self.nb_classes #Y_test = np_utils.to_categorical(y_test, nb_classes) X_test = self.X_reshape(X_test) Y_pred = model.predict(X_test, verbose=0) y_pred = np_utils.categorical_probas_to_classes(Y_pred) return y_pred
def cnn_var_selfembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, vocab_size, embd_size, nb_filter, batch_size, nb_epoches, optm): ngram_filters = [2, 5, 8] input = Input(shape=(maxlen,), name='input', dtype='int32') embedded = Embedding(input_dim=vocab_size, output_dim=embd_size, input_length=maxlen)(input) convs = [None, None, None] # three CNNs for i, n_gram in enumerate(ngram_filters): pool_length = maxlen - n_gram + 1 convs[i] = Convolution1D(nb_filter=nb_filter, filter_length=n_gram, border_mode="valid", activation="relu")(embedded) convs[i] = MaxPooling1D(pool_length=pool_length)(convs[i]) convs[i] = Flatten()(convs[i]) merged = merge([convs[0], convs[1], convs[2]], mode='concat', concat_axis=1) merged = Dropout(0.5)(merged) output = Dense(nb_classes, activation='softmax', name='output')(merged) model = Model(input, output) model.compile(optm, loss={'output': 'categorical_crossentropy'}) earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit(X_train, Y_train, nb_epoch=nb_epoches, batch_size=batch_size, validation_split=0.1, callbacks=[earlystop]) probs = earlystop.model.predict(X_test, batch_size=batch_size) classes = np_utils.categorical_probas_to_classes(probs) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test accuracy:', acc) kappa = metrics.quadratic_weighted_kappa(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test Kappa:', kappa) return acc
def __init__(self, outdir, p): if os.path.exists(outdir): if len(os.listdir(outdir)) > 0: raise RuntimeError("callback outdir already exists: %s" % outdir) else: os.makedirs(outdir) self.outdir = outdir self.p = p self.losses = [] self.bestf1 = 0.0 stype = 'validation' cache = "%s_%s_%s_mintf%s_df%s_%s" % ( stype, self.p['max_posts'], self.p['max_length'], self.p['mintf'], self.p['mindf'], self.p['noempty']) if self.p['prep'] is not None: cache += "_prep-%s" % self.p['prep'] if os.path.exists("data/redcache/%s_X.npy" % cache): self.val_X = np.load("data/redcache/%s_X.npy" % cache) self.val_y = categorical_probas_to_classes( np.load("data/redcache/%s_y.npy" % cache)) else: _, genf = datagen(self.p['max_posts'], self.p['max_length'], stype=stype, force_full=True, mintf=self.p['mintf'], mindf=self.p['mindf'], noempty=self.p['noempty'], prep=self.p['prep'], batch_size=9999999999) for i, (X, y) in enumerate(genf()): assert i == 0 self.val_X, self.val_y = X, categorical_probas_to_classes(y) np.save("data/redcache/%s_X.npy" % cache, X) np.save("data/redcache/%s_y.npy" % cache, y)
def train_pair(args, train_csv, test_csv): print('Reading word vectors.') embeddings_index = read_glove_vectors(args.embedding_file_path) print('Found {} word vectors.'.format(len(embeddings_index))) print('Processing input data') x_train, y_train, x_test, y_test, word_index, = read_input_csv(train_csv, test_csv, args.nb_words, args.max_sequence_len) print('train tensor {}.'.format(x_train.shape)) print('Preparing embedding matrix.') # initiate embedding matrix with zero vectors. nb_words = min(args.nb_words, len(word_index)) embedding_matrix = np.zeros((nb_words + 1, args.embedding_dim)) for word, i in word_index.items(): if i > nb_words: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector args.nb_words = nb_words # args.len_labels_index = len(labels_index) args.len_labels_index = 2 # fixed for sentiment detection. model = model_selector(args, embedding_matrix) checkpoint_filepath = os.path.join(args.model_dir, "weights.best.hdf5") checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='val_loss', verbose=1, save_best_only=True) earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) tsb = TensorBoard(log_dir='./log', histogram_freq=0, write_graph=True, write_images=False) callbacks_list = [checkpoint, earlystop, tsb] model_json = model.to_json() with open(os.path.join(args.model_dir, "model.json"), "w") as json_file: json_file.write(model_json) model.fit(x_train, y_train, validation_split=0.1, nb_epoch=args.num_epochs, batch_size=args.batch_size, callbacks=callbacks_list) classes = earlystop.model.predict_classes(x_test, batch_size=args.batch_size) # acc only supports classes acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(y_test)) print('Test accuracy: {}.'.format(acc))
def detect_in_image(self, img): if len(img.shape) == 2: window_size = (self.window_size[0], self.window_size[1], 1) img = img.reshape((img.shape[0], img.shape[1], 1)) else: window_size = self.window_size windows = view_as_windows(img, window_size, self.stride) windows_shape = windows.shape windows = windows.reshape((windows_shape[0] * windows_shape[1], windows_shape[3], windows_shape[4], 3)) predictions = self.model.predict(windows, batch_size=32, verbose=1) y = np_utils.categorical_probas_to_classes(predictions) y = y.reshape(windows_shape[0], windows_shape[1]) detections = zip(*np.where(y == 1)) # print(detections) # print(predictions) bxs = [] for c, r in detections: # print(p) bxs.append([ r * self.stride, c * self.stride, r * self.stride + window_size[0], c * self.stride + window_size[1], ]) bxs = non_max_suppression_fast(np.asarray(bxs), 0.1) # bxs = zip(*bxs) # print('Boxes :', bxs) return bxs
def evalon(stype, model, batch=32, X=None, y_true=None): print('\n-------- %s -------' % stype) print(datetime.datetime.now(), "predicting") y_pred = categorical_probas_to_classes(model.predict(X, batch_size=batch)) print(datetime.datetime.now()) posf1 = sklearn.metrics.f1_score(y_true, y_pred, pos_label=1, average='binary') posp = sklearn.metrics.precision_score(y_true, y_pred, pos_label=1, average='binary') posr = sklearn.metrics.recall_score(y_true, y_pred, pos_label=1, average='binary') print(sklearn.metrics.classification_report(y_true, y_pred)) print("pred", collections.Counter(y_pred)) print("true", collections.Counter(y_true)) return posf1, posp, posr
def predict(model, x, y, ix, output_dir): """ Store predictions in a CSV file and predicted probabilities in an NPZ file. """ y_proba_pred = model.predict(x) np.savez_compressed(output_dir + '/predictions_proba.npz', y_proba_pred=y_proba_pred) df = pd.DataFrame({ 'y_pred': np_utils.probas_to_classes(y_proba_pred), 'y_true': np_utils.categorical_probas_to_classes(y)}) df['accurate'] = df['y_true'] == df['y_pred'] df['split'] = '' for key, indexes in ix.items(): df.ix[indexes, 'split'] = key df = df[['split', 'y_true', 'y_pred', 'accurate']] df.to_csv(output_dir + '/predictions.csv', index=None) return y_proba_pred
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_data=(X_test, Y_test)) score, acc = model.evaluate(X_test, Y_test, verbose=0) return {'loss': -acc, 'status': STATUS_OK, 'model': model} if __name__ == '__main__': X_train, X_test, Y_train, Y_test = data() ''' Generate ensemble model from optimization run: First, run hyperas optimization on specified setup, i.e. 10 trials with TPE, then return the best 5 models and create a majority voting model from it. ''' ensemble_model = optim.best_ensemble(nb_ensemble_models=5, model=model, data=data, algo=rand.suggest, max_evals=10, trials=Trials(), voting='hard') preds = ensemble_model.predict(X_test) y_test = np_utils.categorical_probas_to_classes(Y_test) print(accuracy_score(preds, y_test))
def validate(self, features, labels, number_folds, encoded_labels): """ Compute a model's performance metrics based on k-fold cross-validation technique. Parameters ---------- features: array-like of shape = [number_samples, number_features] The validation input samples. labels: array-like of shape = [number_samples] The target values (class labels in classification). number_folds: int The amount of folds for the k-fold cross-validation. If 0 compute metrics withput folds. If > 0 compute metrics with n folds, n=number_folds. encoded_labels: array-like of shape = [number_samples, number_outputs] The target values (class labels in classification) in one-hot-encoding. Return ---------- accuracy: float The accuracy of the model based on it's confusion matrix. precision: float The precision of the model based on it's confusion matrix. sensitivity: float The sensitivity of the model based on it's confusion matrix. specificity: float The specificity of the model based on it's confusion matrix. kappa: float The Cohen's Kappa of the model based on it's confusion matrix. """ if number_folds == 0: predictions = self.model.predict_classes(features) else: predictions = numpy.empty(len(labels), dtype=float) folds = Utilities.getFolds(labels, number_folds) for i, (train, test) in enumerate(folds): self.model.fit(features[train], encoded_labels[train], nb_epoch=250, batch_size=10, verbose=1) fold_prediction = self.model.predict_classes(features[test]) for j in range(len(test)): predictions[test[j]]=fold_prediction[j] matrix = confusion_matrix(np_utils.categorical_probas_to_classes(encoded_labels), predictions) sum_columns = numpy.sum(matrix, 0) sum_rows = numpy.sum(matrix, 1) diagonal_sum = numpy.trace(matrix) total_sum = numpy.sum(sum_rows) accuracy = diagonal_sum / total_sum temp_precision = [] temp_sensitivity = [] temp_specificity = [] for i in range(len(matrix)): temp_precision.append(matrix[i][i] / sum_columns[i]) temp_sensitivity.append(matrix[i][i] / sum_rows[i]) temp_reduced_sum = total_sum - sum_rows[i] - sum_columns[i] + matrix[i][i] temp_specificity.append(temp_reduced_sum / (temp_reduced_sum + sum_columns[i] - matrix[i][i])) precision = sum(temp_precision * sum_rows) / total_sum sensitivity = sum(temp_sensitivity * sum_rows) / total_sum specificity = sum(temp_specificity * sum_rows) / total_sum kappa_sum = sum(sum_rows * sum_columns) kappa_numerator = (total_sum * diagonal_sum) - kappa_sum kappa_denominator = (total_sum * total_sum) - kappa_sum kappa = kappa_numerator / kappa_denominator return accuracy, precision, sensitivity, specificity, kappa
border_mode='valid', input_shape=(3, img_rows, img_cols))) model.add(Activation('relu')) model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_data=(X_test, y_test)) score = model.evaluate(X_test, y_test, show_accuracy=True, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) y_hat = model.predict(X_test) y_test_classes = np_utils.categorical_probas_to_classes(y_test) y_hat_classes = np_utils.categorical_probas_to_classes(y_hat) print confusion_matrix(y_test_classes, y_hat_classes) #showWrongOnes(X_test, y_test_classes, y_hat_classes, 0, 5)
np.random.seed(0) print ("Method = MLP classification - Default features") encoder = preprocessing.LabelEncoder( ) train_labels_aux = np.array( [ latlon2healpix( lat , lon , resolution ) for (lat,lon) in train_labels1 ] ) train_labels_aux = np.array( encoder.fit_transform( train_labels_aux ) ) num_classes = len( set( train_labels_aux ) ) train_labels_aux = np_utils.to_categorical( train_labels_aux , num_classes ) model = Sequential( ) model.add(Dense(hidden_dim, input_dim=train_matrix1.shape[1], init='uniform', activation='sigmoid')) model.add(Dropout(0.25)) model.add(Dense(hidden_dim / 2, activation='sigmoid', init='uniform')) model.add(Dropout(0.25)) model.add(Dense( num_classes , activation='softmax' , init='uniform' )) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit( train_matrix1 , train_labels_aux , nb_epoch=1500, batch_size=16, verbose=1) results = encoder.inverse_transform( np_utils.categorical_probas_to_classes( model.predict( test_matrix1 ) ) ) results = np.array( [ healpix2latlon( code , resolution ) for code in results ] ) print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) ) print ("Method = MLP classification - Default features + chromatic features") np.random.seed(0) encoder = preprocessing.LabelEncoder( ) train_labels_aux = np.array( [ latlon2healpix( lat , lon , resolution ) for (lat,lon) in train_labels1 ] ) train_labels_aux = np.array( encoder.fit_transform( train_labels_aux ) ) num_classes = len( set( train_labels_aux ) ) train_labels_aux = np_utils.to_categorical(train_labels_aux , num_classes ) model = Sequential( ) model.add(Dense(hidden_dim, input_dim=train_matrix2.shape[1], init='uniform', activation='sigmoid')) model.add(Dropout(0.25)) model.add(Dense(hidden_dim / 2, activation='sigmoid', init='uniform')) model.add(Dropout(0.25))
def run_dl_mgh_params_2cl(X, y, Lx, Ly, nb_epoch=5000, batch_size = 128, nb_classes = 2): # input image dimensions img_rows, img_cols = Lx, Ly # number of convolutional filters to use nb_filters = 8 # size of pooling area for max pooling pool_size = (10, 10) # convolution kernel size kernel_size = (20, 20) # the data, shuffled and split between train and test sets X_train, X_test, y_train, y_test = model_selection.train_test_split(X,y, test_size=0.2, random_state=0) if K.image_dim_ordering() == 'th': X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) model = Sequential() model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='valid', input_shape=input_shape)) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation('tanh')) model.add(MaxPooling2D(pool_size=pool_size)) #model.add(Dropout(0.25)) model.add(Convolution2D(5, 5, 5, border_mode='valid')) model.add(BatchNormalization()) model.add(Activation('tanh')) model.add(MaxPooling2D(pool_size=(5,5))) model.add(Flatten()) model.add(Dense(4)) model.add(BatchNormalization()) model.add(Activation('tanh')) #model.add(Activation('relu')) #model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) # earlyStopping=callbacks.EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto') history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, validation_data=(X_test, Y_test)) #, callbacks=[earlyStopping]) score = model.evaluate(X_test, Y_test, verbose=0) Y_test_pred = model.predict(X_test, verbose=0) print('Confusion metrix') y_test_pred = np_utils.categorical_probas_to_classes(Y_test_pred) print(metrics.confusion_matrix(y_test, y_test_pred)) print('Test score:', score[0]) print('Test accuracy:', score[1]) kkeras.plot_acc( history) plt.show() kkeras.plot_loss( history)
nb_epoch = 10 batch_size = 128 model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_data=(X_test, Y_test)) score, acc = model.evaluate(X_test, Y_test, verbose=0) return {'loss': -acc, 'status': STATUS_OK, 'model': model} if __name__ == '__main__': X_train, X_test, Y_train, Y_test = data() ''' Generate ensemble model from optimization run: First, run hyperas optimization on specified setup, i.e. 10 trials with TPE, then return the best 5 models and create a majority voting model from it. ''' ensemble_model = optim.best_ensemble(nb_ensemble_models=5, model=model, data=data, algo=rand.suggest, max_evals=10, trials=Trials(), voting='hard') preds = ensemble_model.predict(X_test) y_test = np_utils.categorical_probas_to_classes(Y_test) print(accuracy_score(preds, y_test))
def cnn_other(Y_train, Y_test, nb_classes, Other_train, Other_test, k, maxlen, nb_filter, filter_size, batch_size, nb_epoches, optm): """ cnn1d using varying filter lengths note need using Graph :param Y_Train: :param Y_test: :param nb_classes: :param maxlen: :param vocab_size: :param embd_size: :param batch_size: :param nb_epoches: :param optm: :return: """ model = Graph() # CNN for other pos_pool_len = maxlen / 2 - filter_size + 1 model.add_input(name='other_input', input_shape=(maxlen, k), dtype='float') model.add_node(Convolution1D(nb_filter=nb_filter, filter_length=filter_size, border_mode='valid', activation='relu', input_shape=(maxlen, k)), name='poscnn', input='other_input') model.add_node(MaxPooling1D(pool_length=5), name='pospool', input='poscnn') # 2nd CNN model.add_node(Convolution1D(nb_filter=nb_filter * 2, filter_length=filter_size, border_mode='valid', activation='relu'), name='cnn2', input='pospool') model.add_node(MaxPooling1D(pool_length=10), name='cnn2_pool', input='cnn2') model.add_node(Flatten(), name='posflat', input='cnn2_pool') model.add_node(Dropout(0.5), name='posdropout', input='posflat') model.add_node(Dense(nb_classes, activation='softmax'), name='softmax', input='posdropout') model.add_output(name='output', input='softmax') model.compile(optm, loss={'output': 'categorical_crossentropy'}) # note Graph()'s diff syntax # early stopping earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit({'other_input': Other_train, 'output': Y_train}, nb_epoch=nb_epoches, batch_size=batch_size, validation_split=0.1, callbacks=[earlystop]) # Graph doesn't have several arg/func existing in Sequential() # - fit no show-accuracy # - no predict_classes classes = model.predict({'other_input': Other_test}, batch_size=batch_size)['output'].argmax(axis=1) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) # accuracy only supports classes print('Test accuracy:', acc) kappa = metrics.quadratic_weighted_kappa(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test Kappa:', kappa) return acc
def evaluate_model(self, segment_size, model, data_loader): ''' :param segment_size: :param model: :param data_loader: :return: ''' # ________________ Frame level evaluation for Test/Validation splits ________________________ print('Validation segments = ' + str(data_loader.validation_segments.shape) + ' one-hot encoded target' + str(data_loader.validation_one_hot_target.shape)) score = model.evaluate(data_loader.validation_segments, data_loader.validation_one_hot_target, verbose=0) print('Validation score:', score[0]) print('Validation accuracy:', score[1]) print('Test segments = ' + str(data_loader.test_segments.shape) + ' one-hot encoded target' + str(data_loader.test_one_hot_target.shape)) score = model.evaluate(data_loader.test_segments, data_loader.test_one_hot_target, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) # ___________________ predict frame-level classes ___________________________________ test_predicted_labels = np_utils.categorical_probas_to_classes( model.predict(data_loader.test_segments)) test_target_labels = np_utils.categorical_probas_to_classes( data_loader.test_one_hot_target) cm_frames = confusion_matrix(test_target_labels, test_predicted_labels) print('Confusion matrix, frame level') print(cm_frames) print( 'Frame level accuracy :' + str(np_utils.accuracy(test_predicted_labels, test_target_labels))) # -------------- Voting ------------------------ clip_predicted_probability_mean_vote = [] clip_predicted_majority_vote = [] for i, clip in enumerate(data_loader.test_clips): segments, segments_target_labels = data_loader.segment_clip( data=clip, label=data_loader.test_clips_labels[i], segment_size=segment_size, step_size=Config.STEP_SIZE) segments_predicted_prop = model.predict(segments) test_predicted_labels = np_utils.categorical_probas_to_classes( segments_predicted_prop) labels_histogram = np.bincount(test_predicted_labels) clip_predicted_majority_vote.append(np.argmax(labels_histogram)) clip_predicted_probability_mean_vote.append( np.argmax(np.mean(segments_predicted_prop, axis=0))) cm_majority = confusion_matrix(data_loader.test_clips_labels, clip_predicted_majority_vote) print('Fold Confusion matrix - Majority voting - Clip level :') print(Config.CLASS_NAMES) print(cm_majority) print('Clip-level majority-vote Accuracy ' + str( np_utils.accuracy(clip_predicted_majority_vote, data_loader.test_clips_labels))) print('Fold Confusion matrix - Probability MEAN voting - Clip level :') cm_probability = confusion_matrix( data_loader.test_clips_labels, clip_predicted_probability_mean_vote) print(Config.CLASS_NAMES) print(cm_probability) print('Clip-level probability-vote Accuracy ' + str( np_utils.accuracy(np.asarray(clip_predicted_probability_mean_vote), np.squeeze(data_loader.test_clips_labels)))) scoref1 = f1score(data_loader.test_clips_labels, clip_predicted_probability_mean_vote, average='micro') print('F1 Score micro ' + str(scoref1)) scoref1 = f1score(data_loader.test_clips_labels, clip_predicted_probability_mean_vote, average='weighted') print('F1 Score weighted ' + str(scoref1)) return cm_majority, cm_probability, clip_predicted_majority_vote, clip_predicted_probability_mean_vote, data_loader.test_clips_labels
def cnn_multi_selfembd(X_train, Y_train, X_test, Y_test, nb_classes, maxlen, vocab_size, embd_size, pos_train, pos_test, pos_embd_dim, dp_train, dp_test, dp_embd_dim, nb_filter, batch_size, nb_epoches, optm): """ cnn1d using multi-inputs, i.e., word, POS, DP word using varying filter lengths note need using Graph :param X_train: :param Y_Train: :param X_test: :param Y_test: :param nb_classes: :param maxlen: :param vocab_size: :param embd_size: :param batch_size: :param nb_epoches: :param optm: :return: """ ngram_filters = [2, 5, 8] nd_convs = ['conv_' + str(n) for n in ngram_filters] nd_pools = ['pool_' + str(n) for n in ngram_filters] nd_flats = ['flat_' + str(n) for n in ngram_filters] model = Graph() model.add_input(name='input', input_shape=(maxlen,), dtype=int) model.add_node(Embedding(vocab_size, embd_size, input_length=maxlen), name='embedding', input='input') # three CNNs for i, n_gram in enumerate(ngram_filters): pool_length = maxlen - n_gram + 1 model.add_node(Convolution1D(nb_filter=nb_filter, filter_length=n_gram, border_mode="valid", activation="relu"), name=nd_convs[i], input='embedding') model.add_node(MaxPooling1D(pool_length=pool_length), name=nd_pools[i], input=nd_convs[i]) model.add_node(Flatten(), name=nd_flats[i], input=nd_pools[i]) model.add_node(Dropout(0.5), name='dropout', inputs=nd_flats, merge_mode='concat') # POS CNN nb_pos = 15 pos_f_len = 3 pos_pool_len = maxlen - pos_f_len + 1 model.add_input(name='posinput', input_shape=(maxlen,), dtype=int) model.add_node(Embedding(nb_pos, pos_embd_dim, input_length=maxlen), name='posembd', input='posinput') model.add_node(Convolution1D(nb_filter=nb_filter, filter_length=pos_f_len, border_mode='valid', activation='relu'), name='poscnn', input='posembd') model.add_node(MaxPooling1D(pool_length=pos_pool_len), name='pospool', input='poscnn') model.add_node(Flatten(), name='posflat', input='pospool') model.add_node(Dropout(0.5), name='posdropout', input='posflat') # DP CNN nb_dp = vocab_size dp_f_len = 3 dp_pool_len = maxlen - dp_f_len + 1 model.add_input(name='dpinput', input_shape=(maxlen,), dtype=int) model.add_node(Embedding(nb_dp, dp_embd_dim, input_length=maxlen), name='dpembd', input='dpinput') model.add_node(Convolution1D(nb_filter=nb_filter, filter_length=dp_f_len, border_mode='valid', activation='relu'), name='dpcnn', input='dpembd') model.add_node(MaxPooling1D(pool_length=dp_pool_len), name='dppool', input='dpcnn') model.add_node(Flatten(), name='dpflat', input='dppool') model.add_node(Dropout(0.5), name='dpdropout', input='dpflat') model.add_node(Dense(nb_classes, activation='softmax'), name='softmax', inputs=['dropout', 'posdropout', 'dpdropout'], merge_mode='concat') model.add_output(name='output', input='softmax') model.compile(optm, loss={'output': 'categorical_crossentropy'}) # note Graph()'s diff syntax # early stopping earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit({'input': X_train, 'posinput': pos_train, 'dpinput': dp_train, 'output': Y_train}, nb_epoch=nb_epoches, batch_size=batch_size, validation_split=0.1, callbacks=[earlystop]) # Graph doesn't have several arg/func existing in Sequential() # - fit no show-accuracy # - no predict_classes classes = model.predict({'input': X_test, 'posinput': pos_test, 'dpinput': dp_test} , batch_size=batch_size)['output'].argmax(axis=1) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) # accuracy only supports classes print('Test accuracy:', acc) kappa = metrics.quadratic_weighted_kappa(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test Kappa:', kappa) return kappa
predictions = Dense(5, activation="softmax", name="dl1preds")(fc1) model = Model(input=[imgvecs], output=[predictions]) model.compile(optimizer="adadelta", loss="categorical_crossentropy", metrics=["accuracy"]) best_model = os.path.join(MODEL_DIR, "tl-dl1-model-best.h5") checkpoint = ModelCheckpoint(filepath=best_model, verbose=1, save_best_only=True) history = model.fit([Xtrain], [Ytrain], nb_epoch=NUM_EPOCHS, batch_size=BATCH_SIZE, validation_split=0.1, callbacks=[checkpoint]) fttlutils.plot_loss(history) # evaluate final model Ytest_ = model.predict(Xtest) ytest = np_utils.categorical_probas_to_classes(Ytest) ytest_ = np_utils.categorical_probas_to_classes(Ytest_) fttlutils.print_stats(ytest, ytest_, "Final Model (DL#1)") model.save(os.path.join(MODEL_DIR, "tl-dl1-model-final.h5")) # load best model and evaluate model = load_model(os.path.join(MODEL_DIR, "tl-dl1-model-best.h5")) model.compile(optimizer="adadelta", loss="categorical_crossentropy", metrics=["accuracy"]) Ytest_ = model.predict(Xtest) ytest = np_utils.categorical_probas_to_classes(Ytest) ytest_ = np_utils.categorical_probas_to_classes(Ytest_) fttlutils.print_stats(ytest, ytest_, "Best Model (DL#1)")
from HSIDatasetLoad import * from keras.utils import np_utils import numpy as np HSI = HSIData(rootPath) X_data = HSI.X_data Y_data = HSI.Y_data data_source = HSI.data_source idx_data = HSI.idx_data #是否使用PCA降维 if use_pca==True: data_source = HSI.PCA_data_Source(data_source,n_components=n_components) X_data_nei = HSI.getNeighborData(data_source=data_source,idx_data=idx_data,block_size=block_size) Y_data = np_utils.categorical_probas_to_classes(Y_data) X_train_nei,X_test_nei,Y_train,Y_test,idx_train,idx_test = HSI.datasetSplit(X_data_nei,Y_data,idx_data,16,test_size = test_size) X_train = data_source[idx_train] X_test = data_source[idx_test] #%% (2)自编码器 from keras.layers import Input, Dense, Flatten from keras.models import Model from keras.utils import np_utils from keras.callbacks import ReduceLROnPlateau from keras.callbacks import EarlyStopping """ categorical_crossentropy ‘valid’:image_shape - filter_shape + 1.即滤波器在图像内部滑动 ‘full’ shape: image_shape + filter_shape - 1.允许滤波器超过图像边界
def CNN (batch_size,nb_classes,nb_epoch,img_rows, img_cols,nb_filters,nb_pool,nb_conv,X_train, Y_train, X_val,Y_val,X_test,Y_test,opt,dp): model = Sequential() k = (dp & 12)>>2 dp = (dp& 3) if k==0: w1 = 'glorot_uniform' w2 = 'glorot_uniform' elif k ==1: w1 = 'glorot_uniform' w2 = 'he_uniform' elif k==2: w1 = 'he_uniform' w2 = 'glorot_uniform' else: w1 = 'he_uniform' w2 = 'he_uniform' model.add(Convolution2D(nb_filters, nb_conv, nb_conv,init=w1, border_mode='valid',subsample=(dp,dp), input_shape=(1, img_rows, img_cols))) convonet1 = Activation('relu') model.add(convonet1) ##if ((dp&2)>>1) ==1: #3 print('dp2') model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) model.add(Convolution2D(nb_filters, nb_conv, nb_conv,init=w1,subsample=(dp,dp))) convonet2 = Activation('relu') model.add(convonet2) model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) ##if dp&1 ==1: ##print ('dp1') model.add(Dropout(0.5)) model.add(Flatten()) d_size = 128 / (dp * dp) print (d_size) model.add(Dense(d_size,init=w2)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes,init=w2)) model.add(Activation('softmax')) #model.compile(loss='categorical_crossentropy', optimizer='adadelta') model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=["accuracy"]) history = LossHistory() early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, # show_accuracy=True, verbose=1, validation_split=0.2) verbose=0,callbacks=[history,early_stop], validation_data=[X_val, Y_val]) predicted = model.predict_classes(X_test) con_mat = confusion_matrix(np_utils.categorical_probas_to_classes(Y_test),predicted) model.summary() #from models import model_from_json #json_string = model.to_json() #print json_string #config = model.get_config() #print config #print (con_mat) ##false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, predicted) #roc_auc = auc(false_positive_rate, true_positive_rate) #plt.title('Receiver Operating Characteristic') #plt.plot(false_positive_rate, true_positive_rate, 'b'`, label='AUC = %0.2f'% roc_auc) #plt.legend(loc='lower right') #plt.plot([0,1],[0,1],'r--') #plt.xlim([-0.1,1.2]) #plt.ylim([-0.1,1.2]) #plt.ylabel('True Positive Rate') #plt.xlabel('False Positive Rate') #plt.show() score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) return score , model,history,con_mat
full_conn = Dense(128, activation='tanh')(flatten) dropout_1 = Dropout(0.5)(full_conn) full_conn_2 = Dense(64, activation='tanh')(dropout_1) dropout_2 = Dropout(0.5)(full_conn_2) output = Dense(6, activation='softmax')(dropout_2) model = Model(input=inputs, output=output) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=50, nb_epoch=int(epochs)) #validation_data = (x_test, y_test)) model.save('word2vec_main_model_' + epochs + '.h5') #from keras.models import load_model #model = load_model('500_filter_adam_50_50_valid_0.1.h5') #model = load_model(sys.argv[1]) predictions = model.predict(x_test) predictions = np_utils.categorical_probas_to_classes(predictions) originals = np_utils.categorical_probas_to_classes(y_test) lend = len(predictions) * 1.0 print np.sum(predictions == originals) / lend from sklearn.metrics import confusion_matrix print confusion_matrix( originals, predictions) #labels = ['abbr', 'desc', 'enty', 'hum', 'loc', 'num'])
for i in range(sample): for row in range(block_size): for col in range(block_size): new_X_data[i,:,row*block_size+col] = X_data[i,row,col,:] return new_X_data HSI = HSIData(rootPath) X_data = HSI.X_data Y_data = HSI.Y_data data_source = HSI.data_source idx_data = HSI.idx_data X_data = HSI.getNeighborData(data_source=data_source,idx_data=idx_data,block_size=block_size) X_data = data_standard(X_data) Y_data = np_utils.categorical_probas_to_classes(Y_data) X_train,X_test,Y_train,Y_test,idx_train,idx_test = HSI.datasetSplit(X_data,Y_data,idx_data,16,test_size = test_size) #%% from keras.layers import Input,merge,Dense,Dropout,Flatten,Convolution1D,MaxPooling1D from keras.layers.normalization import BatchNormalization from keras.models import Model from keras.regularizers import l2 import tensorflow as tf from keras.layers import LSTM def get_model(input_shape, classify_output_num, my_optimizer): input_layer = Input(input_shape)
border_mode='valid', activation='relu'), name='dpcnn', input='dpembd') model.add_node(MaxPooling1D(pool_length=dp_pool_len), name='dppool', input='dpcnn') model.add_node(Flatten(), name='dpflat', input='dppool') model.add_node(Dropout(0.5), name='dpdropout', input='dpflat') # using three CNNs to predict with L1 model.add_node(Dense(nb_classes, activation='softmax', W_regularizer=l2(0.05)), name='softmax', inputs=['dropout', 'posdropout', 'dpdropout'], merge_mode='concat') model.add_output(name='output', input='softmax') model.compile('rmsprop', loss={'output': 'categorical_crossentropy'}) # early stopping earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit({'input': X_train, 'posinput': pos_train, 'dpinput': dp_train, 'output': Y_train}, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[earlystop]) # Graph doesn't have several arg/func existing in Sequential() # - fit no show-accuracy # - no predict_classes classes = model.predict({'input': X_test, 'posinput': pos_test, 'dpinput': dp_test}, batch_size=batch_size)['output'].argmax(axis=1) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) # accuracy only supports classes print('Test accuracy:', acc)
class_weight_to_fit = incp.GetDataRatio(ppmethod, class_list, nb_training_samples, 'Train') #print('class_weight_to_fit: {}'.format(class_weight_to_fit)) #Now we test the best weight. "Instrumented best weight run" full_model.load_weights( Best_Model_Weights_Path) #LOAD (not set) the best weights. X_test, Y_test = incp.GetXYData(ppmethod, nb_testing_samples, class_list, 'Test') #get them big arrays y_predictions = full_model.predict(X_test, batch_size=BATCH_SIZE) Y_test_categorical = np_utils.to_categorical(Y_test, len(class_list)) #print('Y_test_categorical:{}'.for) #print('y_predictions before conversion: {}'.format(y_predictions)) y_preds_as_classes = np_utils.categorical_probas_to_classes( y_predictions) #converts class probs to one-hot print('y_predictions after conversion: {}'.format(y_preds_as_classes)) #print('pre-converted preds: {}'.format(y_predictions)) print('converted preds: {}'.format(y_preds_as_classes)) print('ground truth labels: {}'.format(Y_test)) test_comparo = np.zeros((len(y_preds_as_classes), 2), dtype=np.uint8) for i in range(0, len(y_preds_as_classes) - 1): # first column is truth. 2nd is predict. test_comparo[i, 0] = Y_test[i] test_comparo[i, 1] = y_preds_as_classes[i] np.savetxt(Base_Path + 'Results/TestSetPredsComparo' + ppmethod + '.txt', test_comparo, fmt='%1i',
# 0 -> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] # 2 -> [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] # only the right element == 1 # create sequential model model = Sequential() # add network levels model.add(Dense(800, input_dim=784, init="normal", activation="relu")) model.add(Dense(10, init="normal", activation="softmax")) # compile model model.compile(loss="categorical_crossentropy", optimizer="SGD", metrics=["accuracy"]) print(model.summary()) #_________training_______________________ # batch_size - size of data portion # nb_epoch - amount times of teaching (100 times) model.fit(X_train, y_train, batch_size=200, nb_epoch=100, verbose=1) #___________working_____________ # work on input data predictions = model.predict(X_train) # transform output data # from categories to tags of classes (num from 0 to 9) predictions = np_utils.categorical_probas_to_classes(predictions) # now we can compare
import numpy as np HSI = HSIData(rootPath) X_data = HSI.X_data Y_data = HSI.Y_data data_source = HSI.data_source idx_data = HSI.idx_data #是否使用PCA降维 if use_pca == True: data_source = HSI.PCA_data_Source(data_source, n_components=n_components) X_data_nei = HSI.getNeighborData(data_source=data_source, idx_data=idx_data, block_size=block_size) Y_data = np_utils.categorical_probas_to_classes(Y_data) X_train_nei, X_test_nei, Y_train, Y_test, idx_train, idx_test = HSI.datasetSplit( X_data_nei, Y_data, idx_data, 16, test_size=test_size) X_train = data_source[idx_train] X_test = data_source[idx_test] #%% (2)自编码器 from keras.layers import Input, Dense, Flatten from keras.models import Model from keras.utils import np_utils from keras.callbacks import ReduceLROnPlateau from keras.callbacks import EarlyStopping """ categorical_crossentropy ‘valid’:image_shape - filter_shape + 1.即滤波器在图像内部滑动 ‘full’ shape: image_shape + filter_shape - 1.允许滤波器超过图像边界
name='pospool', input='poscnn') model.add_node(Flatten(), name='posflat', input='pospool') model.add_node(Dropout(0.5), name='posdropout', input='posflat') # using three CNNs to predict with L1 model.add_node(Dense(nb_classes, activation='softmax'), name='softmax', inputs=['dropout', 'posdropout'], merge_mode='concat') model.add_output(name='output', input='softmax') model.compile('rmsprop', loss={'output': 'categorical_crossentropy'}) # model.compile('rmsprop', loss={'output': 'mean_squared_error'}) # early stopping earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=1) model.fit({'input': X_train, 'posinput': pos_train, 'output': Y_train}, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[earlystop]) # Graph doesn't have several arg/func existing in Sequential() # - fit no show-accuracy # - no predict_classes classes = model.predict({'input': X_test, 'posinput': pos_test}, batch_size=batch_size)['output'].argmax(axis=1) acc = np_utils.accuracy(classes, np_utils.categorical_probas_to_classes(Y_test)) # accuracy only supports classes print('Test accuracy:', acc) kappa = metrics.quadratic_weighted_kappa(classes, np_utils.categorical_probas_to_classes(Y_test)) print('Test Kappa:', kappa)
def calculate_f1(predictions, actual): return f1_score(categorical_probas_to_classes(actual), categorical_probas_to_classes(predictions), average="micro")
batch=dev_batch) pos = np.array([(np.concatenate([ np_utils.to_categorical(p, pos_length), np.zeros((step_length - length[l], pos_length)) ])) for l, p in enumerate(pos)]) y = np.array( [np_utils.to_categorical(each, output_length) for each in label]) # for loss dev_metrics = model.test_on_batch([embed_index, hash_index, pos], y) dev_loss += dev_metrics[0] # for accuracy prob = model.predict_on_batch([embed_index, hash_index, pos]) for i, l in enumerate(length): predict_label = np_utils.categorical_probas_to_classes(prob[i]) correct_predict += np.sum(predict_label[:l] == label[i][:l]) all_predict += np.sum(length) epcoh_accuracy = float(correct_predict) / all_predict all_dev_accuracy.append(epcoh_accuracy) all_dev_loss.append(dev_loss) if epcoh_accuracy >= best_accuracy: best_accuracy = epcoh_accuracy best_epoch = epoch end = datetime.now() model.save('%s/model_epoch_%d.h5' % (folder_path, epoch), overwrite=True)
def main(): print("::: midas started :::") param_dict = dict() config_path = os.path.abspath(sys.argv[1]) print("> using config file: "+str(config_path)) if config_path: param_dict = cmd_line.parse(config_path) else: raise ValueError("No config file specified.") #print(param_dict) model_name = param_dict["model_name"] if param_dict["mode"] == "train": print("> start training") # make sure that we have a fresh model folder to work in if not os.path.isdir("../models"): os.mkdir("../models") if os.path.isdir("../models/"+model_name): shutil.rmtree("../models/"+model_name) os.mkdir("../models/"+model_name) train_tokens, train_postags, train_lemmas = \ datasets.load_annotated_data_dir(data_dir = os.path.abspath(param_dict["train_dir"]), nb_instances = 500000000000) dev_tokens, dev_postags, dev_lemmas = \ datasets.load_annotated_data_dir(data_dir = os.path.abspath(param_dict["dev_dir"]), nb_instances = 500000000000) if param_dict["tokenize"]: left_X, right_X, concat_y, char_vector_dict = \ tokenize_stuff.vectorize(tokens = train_tokens, nb_left_tokens = param_dict["tok_nb_left_tokens"], left_char_len = param_dict["tok_left_char_len"], nb_right_tokens = param_dict["tok_nb_right_tokens"], right_char_len = param_dict["tok_right_char_len"]) tokenizer = tokenize_stuff.build_tokenizer(nb_filters = 2500, filter_length = 3, char_vector_dict = char_vector_dict) tokenizer.fit([left_X, right_X], concat_y, validation_split = 0.20, show_accuracy=True, batch_size = BATCH_SIZE, nb_epoch = param_dict["tok_nb_epochs"], class_weight={0:1, 1:100}) # save relevant objects: tokenizer.save_weights("../models/"+model_name+"/tokenizer.model_weights", overwrite=True) pickle.dump(char_vector_dict, open("../models/"+model_name+"/char_vector_dict.p", "wb" )) if param_dict["postag"]: pass if param_dict["lemmatize"]: train_lemmas = [lem for lem in train_lemmas if lem not in ("@", "$")] train_postags = [pos for pos in train_postags if pos not in ("@", "$")] train_labels = [lem+"_"+pos for lem, pos in zip(train_lemmas, train_postags)] dev_lemmas = [lem for lem in dev_lemmas if lem not in ("@", "$")] dev_postags = [pos for pos in dev_postags if pos not in ("@", "$")] dev_labels = [lem+"_"+pos for lem, pos in zip(dev_lemmas, dev_postags)] label_encoder = LabelEncoder() label_encoder.fit(train_labels+dev_labels) train_ints = label_encoder.transform(train_labels) dev_ints = label_encoder.transform(dev_labels) train_y = np_utils.to_categorical(train_ints, len(label_encoder.classes_)) dev_y = np_utils.to_categorical(dev_ints, len(label_encoder.classes_)) train_left_X, train_tokens_X, train_right_X, train_char_vector_dict = tagger_stuff.vectorize(tokens = train_tokens, std_token_len = param_dict["lemma_std_len_token"], nb_left_tokens = param_dict["lemma_nb_left_tokens"], left_char_len = param_dict["lemma_left_char_len"], nb_right_tokens = param_dict["lemma_nb_right_tokens"], right_char_len = param_dict["lemma_right_char_len"], ) print(train_tokens_X.shape) dev_left_X, dev_tokens_X, dev_right_X, _ = tagger_stuff.vectorize(tokens = dev_tokens, std_token_len = param_dict["lemma_std_len_token"], nb_left_tokens = param_dict["lemma_nb_left_tokens"], left_char_len = param_dict["lemma_left_char_len"], nb_right_tokens = param_dict["lemma_nb_right_tokens"], right_char_len = param_dict["lemma_right_char_len"], char_vector_dict = train_char_vector_dict ) print(dev_tokens_X.shape) lemmatizer = tagger_stuff.build_lemmatizer_new(nb_filters = 1024, filter_length = 3, std_token_len = param_dict["lemma_std_len_token"], left_char_len = param_dict["lemma_left_char_len"], right_char_len = param_dict["lemma_right_char_len"], char_vector_dict = train_char_vector_dict, nb_labels = len(label_encoder.classes_), dense_dims = 1024, ) for e in range(param_dict["lemma_nb_epochs"]): print("-> epoch ", e+1, "...") lemmatizer.fit({#'left_input': train_left_X, 'token_input': train_tokens_X, #'right_input': train_right_X, 'label_output': train_y }, nb_epoch = 1, batch_size = BATCH_SIZE) print("+++ TRAIN SCORE") predictions = lemmatizer.predict({#'left_input': train_left_X, 'token_input': train_tokens_X, #'right_input': train_right_X, }, batch_size = BATCH_SIZE) predictions = np_utils.categorical_probas_to_classes(predictions['label_output']) accuracy = np_utils.accuracy(predictions, train_ints) print("\t - acc:\t{:.2%}".format(accuracy)) print("+++ DEV SCORE") predictions = lemmatizer.predict({#'left_input': dev_left_X, 'token_input': dev_tokens_X, #'right_input': dev_right_X, }, batch_size = BATCH_SIZE) predictions = np_utils.categorical_probas_to_classes(predictions['label_output']) accuracy = np_utils.accuracy(predictions, dev_ints) print("\t - acc:\t{:.2%}".format(accuracy)) """ ######################################################################################################## # train data: train_lemmas = [lem for lem in train_lemmas if lem not in ("@", "$")] train_postags = [pos for pos in train_postags if pos not in ("@", "$")] print("orig nb lemmas:", len(set(train_lemmas))) train_lemma_counter = Counter(train_lemmas) train_lemma_vocab = [k for k, v in train_lemma_counter.items() if v > 1] print("reduced nb lemmas:", len(train_lemma_vocab)) train_lemmas = [lem if lem in train_lemma_vocab else '<unk>' for lem in train_lemmas] dev_lemmas = [lem for lem in dev_lemmas if lem not in ("@", "$")] dev_postags = [pos for pos in dev_postags if pos not in ("@", "$")] dev_lemmas = [lem if lem in train_lemma_vocab else '<unk>' for lem in dev_lemmas] lemma_encoder = LabelEncoder() lemma_encoder.fit(train_lemmas+dev_lemmas+['<unk>']) train_lemmas_y = lemma_encoder.transform(train_lemmas) dev_lemmas_y = lemma_encoder.transform(dev_lemmas) pos_encoder = LabelEncoder() pos_encoder.fit(train_postags+dev_postags) train_pos_y = pos_encoder.transform(train_postags) dev_pos_y = pos_encoder.transform(dev_postags) train_lemma_labels_y = np_utils.to_categorical(train_lemmas_y, len(lemma_encoder.classes_)) train_pos_labels_y = np_utils.to_categorical(train_pos_y, len(pos_encoder.classes_)) dev_lemma_labels_y = np_utils.to_categorical(dev_lemmas_y, len(lemma_encoder.classes_)) dev_pos_labels_y = np_utils.to_categorical(dev_pos_y, len(pos_encoder.classes_)) train_left_X, train_tokens_X, train_right_X, train_char_vector_dict = tagger_stuff.vectorize(tokens = train_tokens, std_token_len = param_dict["lemma_std_len_token"], nb_left_tokens = param_dict["lemma_nb_left_tokens"], left_char_len = param_dict["lemma_left_char_len"], nb_right_tokens = param_dict["lemma_nb_right_tokens"], right_char_len = param_dict["lemma_right_char_len"], ) print(train_tokens_X.shape) dev_left_X, dev_tokens_X, dev_right_X, _ = tagger_stuff.vectorize(tokens = dev_tokens, std_token_len = param_dict["lemma_std_len_token"], nb_left_tokens = param_dict["lemma_nb_left_tokens"], left_char_len = param_dict["lemma_left_char_len"], nb_right_tokens = param_dict["lemma_nb_right_tokens"], right_char_len = param_dict["lemma_right_char_len"], char_vector_dict = train_char_vector_dict ) print(dev_tokens_X.shape) lemmatizer = tagger_stuff.build_lemmatizer(nb_filters = 1024, filter_length = 3, std_token_len = param_dict["lemma_std_len_token"], left_char_len = param_dict["lemma_left_char_len"], right_char_len = param_dict["lemma_right_char_len"], char_vector_dict = train_char_vector_dict, nb_lemmas = len(lemma_encoder.classes_), nb_postags = len(pos_encoder.classes_), dense_dims = 500 ) for e in range(param_dict["lemma_nb_epochs"]): print("-> epoch ", e+1, "...") lemmatizer.fit({'left_input': train_left_X, 'token_input': train_tokens_X, 'right_input': train_right_X, 'lemma_output': train_lemma_labels_y, 'pos_output': train_pos_labels_y }, nb_epoch = 1, batch_size = BATCH_SIZE) print("+++ TRAIN SCORE") predictions = lemmatizer.predict({'left_input': train_left_X, 'token_input': train_tokens_X, 'right_input': train_right_X, }, batch_size = BATCH_SIZE) pos_predictions = np_utils.categorical_probas_to_classes(predictions['pos_output']) pos_accuracy = np_utils.accuracy(pos_predictions, train_pos_y) print("\t - postags acc:\t{:.2%}".format(pos_accuracy)) lemma_predictions = np_utils.categorical_probas_to_classes(predictions['lemma_output']) lemma_accuracy = np_utils.accuracy(lemma_predictions, train_lemmas_y) print("\t - lemmas acc:\t{:.2%}".format(lemma_accuracy)) print("+++ DEV SCORE") dev_predictions = lemmatizer.predict({'left_input': dev_left_X, 'token_input': dev_tokens_X, 'right_input': dev_right_X, }, batch_size = BATCH_SIZE) dev_pos_predictions = np_utils.categorical_probas_to_classes(dev_predictions['pos_output']) dev_pos_accuracy = np_utils.accuracy(dev_pos_predictions, dev_pos_y) print("\t - postags acc:\t{:.2%}".format(dev_pos_accuracy)) dev_lemma_predictions = np_utils.categorical_probas_to_classes(dev_predictions['lemma_output']) dev_lemma_accuracy = np_utils.accuracy(dev_lemma_predictions, dev_lemmas_y) print("\t - lemmas acc:\t{:.2%}".format(dev_lemma_accuracy)) """ elif param_dict["mode"] == "test": print("> start testing") test_tokens, test_postags, test_lemmas = \ datasets.load_annotated_data_dir(data_dir = os.path.abspath(param_dict["test_dir"]), nb_instances = 5000) char_vector_dict = pickle.load(open("../models/"+model_name+"/char_vector_dict.p", "rb")) left_X, right_X, concat_y, _ = \ tokenize_stuff.vectorize(tokens = test_tokens, nb_left_tokens = param_dict["tok_nb_left_tokens"], left_char_len = param_dict["tok_left_char_len"], nb_right_tokens = param_dict["tok_nb_right_tokens"], right_char_len = param_dict["tok_right_char_len"], char_vector_dict = char_vector_dict) tokenizer = tokenize_stuff.build_tokenizer(nb_filters = 250, filter_length = 3, char_vector_dict = char_vector_dict) tokenizer.load_weights("../models/"+model_name+"/tokenizer.model_weights") preds = tokenizer.predict_classes([left_X, right_X], batch_size = 1000) for item in zip(tokenize_stuff.unconcatenate_tokens(test_tokens)[0], preds): print(item) print("::: midas ended :::")