def run(Emin, Emax, BDT=False): if BDT: figureName = 'pred_BDT_' + str(int(Emin / 1000)) + '-' + str( int(Emax / 1000)) else: figureName = 'pred_DNN_' + str(int(Emin / 1000)) + '-' + str( int(Emax / 1000)) if os.path.isfile(figureName + '.png'): return train_e = getParticleSet( '/home/drozd/analysis/fraction1/data_train_elecs.npy', -1, Emax, BDT) train_p = getParticleSet( '/home/drozd/analysis/fraction1/data_train_prots.npy', -1, Emax, BDT) train = np.concatenate((train_e, train_p)) np.random.shuffle(train) X_train = train[:, 0:-1] Y_train = train[:, -1] ghost = plt.figure() plt.hist(train_e[:, 30], 50, histtype='step', label='e', color='green') plt.hist(train_p[:, 30], 50, histtype='step', label='p', color='red') plt.title(str(int(Emin / 1000)) + ' - ' + str(int(Emax / 1000))) plt.xlabel('Binned normalised energy') plt.legend(loc='best') plt.savefig(figureName.replace('pred_', 'spectre_')) plt.close(ghost) del train_e, train_p, train val_e = getParticleSet( '/home/drozd/analysis/fraction1/data_validate_elecs_1.npy', Emin, Emax, BDT) val_p = getParticleSet( '/home/drozd/analysis/fraction1/data_validate_prots_1.npy', Emin, Emax, BDT) val = np.concatenate((val_e, val_p)) np.random.shuffle(val) X_val = val[:, 0:-1] Y_val = val[:, -1] del val_e, val_p, val print( str(int(Emin / 1000)) + '-' + str(int(Emax / 1000)) + ': Training on ', X_train.shape[0], ' events') if BDT: model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, min_samples_leaf=0.0001) model.fit(X_train, Y_train) predictions = model.predict_proba(X_val)[:, 1] else: model = Sequential() model.add( Dense(300, input_shape=(X_train.shape[1], ), kernel_initializer='he_uniform', activation='relu')) model.add(Dropout(0.1)) model.add( Dense(150, kernel_initializer='he_uniform', activation='relu')) model.add(Dropout(0.1)) model.add(Dense(70, kernel_initializer='he_uniform', activation='relu')) model.add(Dropout(0.1)) model.add( Dense(1, kernel_initializer='he_uniform', activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy']) rdlronplt = ReduceLROnPlateau(monitor='loss', patience=3, min_lr=0.001) #~ earl = EarlyStopping(monitor='loss',min_delta=0.0001,patience=5) callbacks = [rdlronplt] history = model.fit(X_train, Y_train, batch_size=150, epochs=75, verbose=0, callbacks=callbacks, validation_data=(X_val, Y_val)) predictions = model.predict(X_val) elecs_p, prots_p = getClassifierScore(Y_val, predictions) Nbins_plt = 50 binList = [x / Nbins_plt for x in range(0, Nbins_plt + 1)] fig = plt.figure() plt.hist(elecs_p, bins=binList, label='e', alpha=0.7, histtype='step', color='green') plt.hist(prots_p, bins=binList, label='p', alpha=0.7, histtype='step', color='red') plt.xlabel('Classifier score') plt.ylabel('Number of events') plt.title( str(int(Emin / 1000)) + ' GeV - ' + str(int(Emax / 1000)) + ' GeV') plt.legend(loc='upper center') plt.yscale('log') plt.savefig(figureName) plt.close(fig) arr_e = np.load('/home/drozd/analysis/fraction1/data_test_elecs_1.npy') X_e = _normalise(arr_e[:, 0:-2]) Y_e = arr_e[:, -1] arr_e = np.concatenate((X_e, Y_e.reshape((Y_e.shape[0], 1))), axis=1) del X_e, Y_e arr_p = np.load('/home/drozd/analysis/fraction1/data_test_prots_1.npy') X_p = _normalise(arr_p[:, 0:-2]) Y_p = arr_p[:, -1] arr_p = np.concatenate((X_p, Y_p.reshape((Y_p.shape[0], 1))), axis=1) del X_p, Y_p tst = np.concatenate((arr_e, arr_p)) np.random.shuffle(tst) del arr_e, arr_p X_tst = tst[:, 0:-1] Y_tst = tst[:, -1] del tst if BDT: pred_tst = model.predict_proba(X_tst)[:, 1] else: pred_tst = model.predict(X_tst) del X_tst elecs_pd, prots_pd = getClassifierScore(Y_tst, pred_tst) fig = plt.figure() plt.hist(elecs_pd, bins=binList, label='e', alpha=0.7, histtype='step', color='green') plt.hist(prots_pd, bins=binList, label='p', alpha=0.7, histtype='step', color='red') plt.xlabel('Classifier score') plt.ylabel('Number of events') plt.title('Full validation range, limited train range \n' + str(int(Emin / 1000)) + ' GeV - ' + str(int(Emax / 1000)) + ' GeV') plt.legend(loc='upper center') plt.yscale('log') plt.savefig(figureName.replace('pred', 'full')) plt.close(fig)
index = [i for i in range(len(input_data))] random.shuffle(index) data = input_data[index] label = label_data[index] # In[32]: splitpoint1 = int(round(num * 0.8)) splitpoint2 = int(round(num * 0.9)) (X_train, X_val, X_test) = (data[0:splitpoint1], data[splitpoint1:splitpoint2], data[splitpoint2:]) (Y_train, Y_val, X_test) = (label[0:splitpoint1], label[splitpoint1:splitpoint2], data[splitpoint2:]) # In[ ]: model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=16, nb_epoch=10, verbose=1, validation_data=(X_val, Y_val), callbacks=[history]) # In[ ]: c = 3 print(c) #%
model.add(Activation('sigmoid')) model.add(Dropout(0.4)) model.add(Dense(64)) model.add(BatchNormalization(axis = 1, momentum=0.99, epsilon=0.001)) model.add(Activation('sigmoid')) model.add(Dropout(0.4)) model.add(Dense(64)) model.add(BatchNormalization(axis = 1, momentum=0.99, epsilon=0.001)) model.add(Activation('sigmoid')) model.add(Dropout(0.4)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy',optimizer='adamax', metrics=['binary_accuracy']) #callbacks_list = [EarlyStopping(monitor = 'binary_accuracy', mode = max, patience = 2)] train_histroy = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size = batch_size, epochs = epochs) X_test = normalization(X_test, col, train = False, X_mean = X_mean, X_std = X_std) type(X_test) ans = model.predict(X_test) for i in range(len(ans)): if ans[i] >= 0.5: submission.iloc[i,1] = 1 #注意, 很多答案都是同一個, 在ans裡面(感覺不是很合理)