def top10NN(tr_enc_path, tr_ids_path, te_enc_path, te_ids_path, reduced=None): tr_enc = np.load(tr_enc_path) tr_ids = np.load(tr_ids_path) te_enc = np.load(te_enc_path) te_ids = np.load(te_ids_path) tr_idx = cleanDataset(tr_ids) tr_enc = tr_enc[tr_idx] tr_ids = tr_ids[tr_idx] te_idx = cleanDataset(te_ids) te_enc = te_enc[te_idx] te_ids = te_ids[te_idx] if reduced: te_idx = randomSubset(reduced, len(te_ids)) te_enc = te_enc[te_idx] te_ids = te_ids[te_idx] knut = ModNN(tr_enc, tr_ids, 10) t10 = 0 matchlist = [] matchables = len(te_enc) for ite in range(len(te_enc)): results = knut.predictTopK( te_enc[ite]) #results are structured [[name,bbox,label],distance] for ir in range(len(results)): if results[ir][0][-1] == te_ids[ ite, -1]: #if the class in the results matches the one this encoding belongs to t10 += 1 matchlist.append(results[ir]) break print("TOP 10: " + str(t10 / matchables)) return (t10 / matchables)
def top10MLP(hidden_layer_sizes, tr_enc_path, tr_ids_path, te_enc_path, te_ids_path, activation="relu", reduced=None): tr_enc = np.load(tr_enc_path) tr_ids = np.load(tr_ids_path) te_enc = np.load(te_enc_path) te_ids = np.load(te_ids_path) tr_idx = cleanDataset(tr_ids) tr_enc = tr_enc[tr_idx] tr_ids = tr_ids[tr_idx] te_idx = cleanDataset(te_ids) te_enc = te_enc[te_idx] te_ids = te_ids[te_idx] if reduced: te_idx = randomSubset(reduced, len(te_ids)) te_enc = te_enc[te_idx] te_ids = te_ids[te_idx] mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, activation=activation, max_iter=1000) mlp.fit(tr_enc, tr_ids[:, -1]) top10matches = [] c = mlp.classes_ for x in range(len(te_enc)): pp = mlp.predict_proba([te_enc[x]])[0] top10cidx = [] for i in range(10): max_ = 0 maxidx = 0 for j in range(len(pp)): if max_ < pp[j] and not (j in top10cidx): max_ = pp[j] maxidx = j top10cidx.append(maxidx) top10c = c[top10cidx] if te_ids[x, -1] in top10c: top10matches.append(te_ids[x, -1]) print(len(top10matches) / len(te_ids)) return (len(top10matches) / len(te_ids))
def objective(trial): epochs = 1000 learning_rate = 0.00001 #trial.suggest_loguniform("learning_rate", 1e-5, 1e-3) batch_size = 64 #trial.suggest_int("batch_size",8,64,8) size1 = trial.suggest_categorical("size1", [1024, 512, 256]) size2 = trial.suggest_categorical("size2", [32, 64, 128]) print("Learning rate: " + str(learning_rate)) print("batch size: " + str(batch_size)) print("Size1:" + str(size1)) print("Size2:" + str(size2)) set1 = np.load("../ae/vae_training_encodings_simple_v3.npy") ids1 = np.load("../ae/vae_training_ids_simple_v3.npy") tr_idx = cleanDataset(ids1) set1 = set1[tr_idx] ids1 = ids1[tr_idx] validation_split = 1 / 3 val_set_size = int(len(ids1) * validation_split) val_set = set1[:val_set_size] val_ids = ids1[:val_set_size] set1 = set1[val_set_size:] ids1 = ids1[val_set_size:] inputsize = len(set1[0]) model = SiameseNetwork(inputsize, size1, size2).cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate) bce = nn.BCELoss() es = EarlyStopper(patience=20, delta=0.1, save_path="siamese.pth", save=False) validation_losses = [] validation_accuracies = [] for epoch in range(epochs): total_train_loss = 0 optimizer.zero_grad() labels = [] predictions = [] model.train() for i in range(int(np.ceil(len(set1) / batch_size))): b1, b2, l = createBatch(batch_size, set1, ids1) o = model(b1, b2) loss = bce(o, l) loss.backward() optimizer.step() total_train_loss += loss labels.extend(l[:, 0].tolist()) predictions.extend(o[:, 0].tolist()) a = accuracy_score(labels, np.where(np.array(predictions) < 0.5, 0.0, 1.0)) r = recall_score(labels, np.where(np.array(predictions) < 0.5, 0.0, 1.0)) stop_epoch = epoch if epoch % 5 == 0: val_loss = 0 vpredictions = [] model.eval() vlabels = [] with torch.no_grad(): for i in range(int(np.ceil(len(val_set) / batch_size))): b1, b2, l = createBatch(batch_size, val_set, val_ids) o = model(b1, b2) loss = bce(o, l) val_loss += loss vlabels.extend(l[:, 0].tolist()) vpredictions.extend(o[:, 0].tolist()) va = accuracy_score( vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0)) vr = recall_score( vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0)) print("EPOCH " + str(epoch) + " with loss " + str(val_loss.item()) + ", accuracy " + str(va) + " and recall " + str(vr)) stop = es.earlyStopping(val_loss, model) trial.report(val_loss, epoch) validation_losses.append(val_loss.item()) validation_accuracies.append(va) if stop: print("TRAINING FINISHED AFTER " + str(epoch) + " EPOCHS. K BYE.") break if (int(stop_epoch / 5) - 20) < len(validation_losses): final_loss = validation_losses[ int(stop_epoch / 5) - 20] #every 5 epochs validation and 20 coz of patience final_accuracy = validation_accuracies[int(stop_epoch / 5) - 20] else: final_loss = validation_losses[-1] final_accuracy = validation_accuracies[-1] #WRITE OPTIM filename = str("siamese_optim_vae_sum_v3.txt") file = open(filename, 'a') file.write("size1:" + str(size1)) file.write("size2:" + str(size2)) file.write("final_loss:" + str(final_loss)) file.write("final_accuracy:" + str(final_accuracy)) file.write('\n') file.close() return final_loss
def train(epochs, learning_rate, batch_size, tr_enc_path, tr_ids_path, save_path="siamese_network_vae_correct_v2_2.pth", size1=128, size2=32, validation_split=1 / 3): set1 = np.load(tr_enc_path) ids1 = np.load(tr_ids_path) tr_idx = cleanDataset(ids1) set1 = set1[tr_idx] ids1 = ids1[tr_idx] validation_split = 1 / 3 val_set_size = int(len(ids1) * validation_split) val_set = set1[:val_set_size] val_ids = ids1[:val_set_size] set1 = set1[val_set_size:] ids1 = ids1[val_set_size:] inputsize = len(set1[0]) model = SiameseNetwork(inputsize, size1, size2).cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate) bce = nn.BCELoss() es = EarlyStopper(patience=20, delta=0.1, save_path=save_path, save=True) training_losses = [] training_accuracies = [] validation_losses = [] validation_accuracies = [] for epoch in range(epochs): total_train_loss = 0 optimizer.zero_grad() labels = [] predictions = [] model.train() for i in range(int(np.ceil(len(set1) / batch_size))): b1, b2, l = createBatch(batch_size, set1, ids1) o = model(b1, b2) loss = bce(o, l) loss.backward() optimizer.step() total_train_loss += loss labels.extend(l[:, 0].tolist()) predictions.extend(o[:, 0].tolist()) training_losses.append(loss.item()) a = accuracy_score(labels, np.where(np.array(predictions) < 0.5, 0.0, 1.0)) training_accuracies.append(a) r = recall_score(labels, np.where(np.array(predictions) < 0.5, 0.0, 1.0)) stop_epoch = epoch if epoch % 10 == 0: val_loss = 0 vpredictions = [] model.eval() vlabels = [] with torch.no_grad(): for i in range(int(np.ceil(len(val_set) / batch_size))): b1, b2, l = createBatch(batch_size, val_set, val_ids) o = model(b1, b2) loss = bce(o, l) val_loss += loss vlabels.extend(l[:, 0].tolist()) vpredictions.extend(o[:, 0].tolist()) va = accuracy_score( vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0)) vr = recall_score( vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0)) print("EPOCH " + str(epoch) + " with loss " + str(val_loss.item()) + ", accuracy " + str(va) + " and recall " + str(vr)) stop = es.earlyStopping(val_loss, model) validation_losses.append(val_loss.item()) validation_accuracies.append(va) if stop: print("TRAINING FINISHED AFTER " + str(epoch) + " EPOCHS. K BYE.") break if (int(stop_epoch / 10) - 20) < len(validation_losses): final_loss = validation_losses[ int(stop_epoch / 10) - 20] #every 10 epochs validation and 20 coz of patience final_accuracy = validation_accuracies[int(stop_epoch / 10) - 20] else: final_loss = validation_losses[-1] final_accuracy = validation_accuracies[-1] #WRITE OPTIM filename = str("siamese_optim_losses_v3.txt") file = open(filename, 'a') file.write("Training loss:") file.write('\n') for l in training_losses: file.write(str(l)) file.write('\n') file.write("Training accuracy:") file.write('\n') for l in training_accuracies: file.write(str(l)) file.write('\n') file.write("Validation loss:") file.write('\n') for l in validation_losses: file.write(str(l)) file.write('\n') file.write("Validation accuracy:") file.write('\n') for l in validation_accuracies: file.write(str(l)) file.write('\n') file.write("final_loss:" + str(final_loss)) file.write("final_accuracy:" + str(final_accuracy)) file.write('\n') file.close()
def top10Siamese(net_path, tr_enc_path, tr_ids_path, te_enc_path, te_ids_path, reduced=None): tr_enc = np.load(tr_enc_path) tr_ids = np.load(tr_ids_path) te_enc = np.load(te_enc_path) te_ids = np.load(te_ids_path) tr_idx = cleanDataset(tr_ids) tr_enc = tr_enc[tr_idx] tr_ids = tr_ids[tr_idx] te_idx = cleanDataset(te_ids) te_enc = te_enc[te_idx] te_ids = te_ids[te_idx] if reduced: te_idx = randomSubset(reduced, len(te_ids)) te_enc = te_enc[te_idx] te_ids = te_ids[te_idx] pred = [] tp = 0 fp = 0 tn = 0 fn = 0 model = torch.load(net_path).cuda() model.eval() for i in range(len(te_ids)): if i % 1000 == 0: print(i) te = torch.from_numpy(np.array([te_enc[i], te_enc[i]])).float().cuda() matches = [] for j in range(0, len(tr_ids)): tr = torch.from_numpy(np.array([tr_enc[j], te_enc[i]])).float().cuda() r = model(te, tr) r = r[0] matches.append([r.item(), tr_ids[j, -1]]) if r < 0.5: #classified match if te_ids[i, -1] == tr_ids[j, -1]: #and is match tp += 1 else: #but is no match fp += 1 else: #classified not match if te_ids[i, -1] == tr_ids[j, -1]: #but is match fn += 1 else: #and is no match tn += 1 matches = np.array(matches) if len(matches) > 0: matches = matches[ matches[:, 0].argsort()] #sorts from low numbers to high numbers match = False whales = [] counter = 0 while len(whales) < 10: m_id = matches[counter, -1] if not (m_id in whales): whales.append(m_id) if te_ids[i, -1] == m_id: match = True break counter += 1 if match: pred.append(1) else: pred.append(0) a = np.mean(np.array(pred)) print("test accuracy: " + str(a)) return a