Python cleanDataset示例，helper.cleanDataset Python示例

示例#1

0

显示文件

文件： nn.py 项目： sophiaaez/sibiris

def top10NN(tr_enc_path, tr_ids_path, te_enc_path, te_ids_path, reduced=None):
    tr_enc = np.load(tr_enc_path)
    tr_ids = np.load(tr_ids_path)
    te_enc = np.load(te_enc_path)
    te_ids = np.load(te_ids_path)
    tr_idx = cleanDataset(tr_ids)
    tr_enc = tr_enc[tr_idx]
    tr_ids = tr_ids[tr_idx]
    te_idx = cleanDataset(te_ids)
    te_enc = te_enc[te_idx]
    te_ids = te_ids[te_idx]
    if reduced:
        te_idx = randomSubset(reduced, len(te_ids))
        te_enc = te_enc[te_idx]
        te_ids = te_ids[te_idx]
    knut = ModNN(tr_enc, tr_ids, 10)
    t10 = 0
    matchlist = []
    matchables = len(te_enc)
    for ite in range(len(te_enc)):
        results = knut.predictTopK(
            te_enc[ite])  #results are structured [[name,bbox,label],distance]
        for ir in range(len(results)):
            if results[ir][0][-1] == te_ids[
                    ite,
                    -1]:  #if the class in the results matches the one this encoding belongs to
                t10 += 1
                matchlist.append(results[ir])
                break
    print("TOP 10: " + str(t10 / matchables))
    return (t10 / matchables)

示例#2

0

显示文件

文件： mlp.py 项目： sophiaaez/sibiris

def top10MLP(hidden_layer_sizes,
             tr_enc_path,
             tr_ids_path,
             te_enc_path,
             te_ids_path,
             activation="relu",
             reduced=None):
    tr_enc = np.load(tr_enc_path)
    tr_ids = np.load(tr_ids_path)
    te_enc = np.load(te_enc_path)
    te_ids = np.load(te_ids_path)
    tr_idx = cleanDataset(tr_ids)
    tr_enc = tr_enc[tr_idx]
    tr_ids = tr_ids[tr_idx]
    te_idx = cleanDataset(te_ids)
    te_enc = te_enc[te_idx]
    te_ids = te_ids[te_idx]
    if reduced:
        te_idx = randomSubset(reduced, len(te_ids))
        te_enc = te_enc[te_idx]
        te_ids = te_ids[te_idx]
    mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
                        activation=activation,
                        max_iter=1000)
    mlp.fit(tr_enc, tr_ids[:, -1])
    top10matches = []
    c = mlp.classes_
    for x in range(len(te_enc)):
        pp = mlp.predict_proba([te_enc[x]])[0]
        top10cidx = []
        for i in range(10):
            max_ = 0
            maxidx = 0
            for j in range(len(pp)):
                if max_ < pp[j] and not (j in top10cidx):
                    max_ = pp[j]
                    maxidx = j
            top10cidx.append(maxidx)
        top10c = c[top10cidx]
        if te_ids[x, -1] in top10c:
            top10matches.append(te_ids[x, -1])
    print(len(top10matches) / len(te_ids))
    return (len(top10matches) / len(te_ids))

示例#3

0

显示文件

def objective(trial):
    epochs = 1000
    learning_rate = 0.00001  #trial.suggest_loguniform("learning_rate", 1e-5, 1e-3)
    batch_size = 64  #trial.suggest_int("batch_size",8,64,8)
    size1 = trial.suggest_categorical("size1", [1024, 512, 256])
    size2 = trial.suggest_categorical("size2", [32, 64, 128])
    print("Learning rate: " + str(learning_rate))
    print("batch size: " + str(batch_size))
    print("Size1:" + str(size1))
    print("Size2:" + str(size2))
    set1 = np.load("../ae/vae_training_encodings_simple_v3.npy")
    ids1 = np.load("../ae/vae_training_ids_simple_v3.npy")
    tr_idx = cleanDataset(ids1)
    set1 = set1[tr_idx]
    ids1 = ids1[tr_idx]
    validation_split = 1 / 3
    val_set_size = int(len(ids1) * validation_split)
    val_set = set1[:val_set_size]
    val_ids = ids1[:val_set_size]
    set1 = set1[val_set_size:]
    ids1 = ids1[val_set_size:]
    inputsize = len(set1[0])
    model = SiameseNetwork(inputsize, size1, size2).cuda()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    bce = nn.BCELoss()
    es = EarlyStopper(patience=20,
                      delta=0.1,
                      save_path="siamese.pth",
                      save=False)
    validation_losses = []
    validation_accuracies = []
    for epoch in range(epochs):
        total_train_loss = 0
        optimizer.zero_grad()
        labels = []
        predictions = []
        model.train()
        for i in range(int(np.ceil(len(set1) / batch_size))):
            b1, b2, l = createBatch(batch_size, set1, ids1)
            o = model(b1, b2)
            loss = bce(o, l)
            loss.backward()
            optimizer.step()
            total_train_loss += loss
            labels.extend(l[:, 0].tolist())
            predictions.extend(o[:, 0].tolist())
        a = accuracy_score(labels,
                           np.where(np.array(predictions) < 0.5, 0.0, 1.0))
        r = recall_score(labels,
                         np.where(np.array(predictions) < 0.5, 0.0, 1.0))
        stop_epoch = epoch
        if epoch % 5 == 0:
            val_loss = 0
            vpredictions = []
            model.eval()
            vlabels = []
            with torch.no_grad():
                for i in range(int(np.ceil(len(val_set) / batch_size))):
                    b1, b2, l = createBatch(batch_size, val_set, val_ids)
                    o = model(b1, b2)
                    loss = bce(o, l)
                    val_loss += loss
                    vlabels.extend(l[:, 0].tolist())
                    vpredictions.extend(o[:, 0].tolist())
                va = accuracy_score(
                    vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0))
                vr = recall_score(
                    vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0))
                print("EPOCH " + str(epoch) + " with loss " +
                      str(val_loss.item()) + ", accuracy " + str(va) +
                      " and recall " + str(vr))
                stop = es.earlyStopping(val_loss, model)
                trial.report(val_loss, epoch)
                validation_losses.append(val_loss.item())
                validation_accuracies.append(va)
                if stop:
                    print("TRAINING FINISHED AFTER " + str(epoch) +
                          " EPOCHS. K BYE.")
                    break
    if (int(stop_epoch / 5) - 20) < len(validation_losses):
        final_loss = validation_losses[
            int(stop_epoch / 5) -
            20]  #every 5 epochs validation and 20 coz of patience
        final_accuracy = validation_accuracies[int(stop_epoch / 5) - 20]
    else:
        final_loss = validation_losses[-1]
        final_accuracy = validation_accuracies[-1]
    #WRITE OPTIM
    filename = str("siamese_optim_vae_sum_v3.txt")
    file = open(filename, 'a')
    file.write("size1:" + str(size1))
    file.write("size2:" + str(size2))
    file.write("final_loss:" + str(final_loss))
    file.write("final_accuracy:" + str(final_accuracy))
    file.write('\n')
    file.close()
    return final_loss

示例#4

0

显示文件

def train(epochs,
          learning_rate,
          batch_size,
          tr_enc_path,
          tr_ids_path,
          save_path="siamese_network_vae_correct_v2_2.pth",
          size1=128,
          size2=32,
          validation_split=1 / 3):
    set1 = np.load(tr_enc_path)
    ids1 = np.load(tr_ids_path)
    tr_idx = cleanDataset(ids1)
    set1 = set1[tr_idx]
    ids1 = ids1[tr_idx]
    validation_split = 1 / 3
    val_set_size = int(len(ids1) * validation_split)
    val_set = set1[:val_set_size]
    val_ids = ids1[:val_set_size]
    set1 = set1[val_set_size:]
    ids1 = ids1[val_set_size:]
    inputsize = len(set1[0])
    model = SiameseNetwork(inputsize, size1, size2).cuda()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    bce = nn.BCELoss()
    es = EarlyStopper(patience=20, delta=0.1, save_path=save_path, save=True)
    training_losses = []
    training_accuracies = []
    validation_losses = []
    validation_accuracies = []
    for epoch in range(epochs):
        total_train_loss = 0
        optimizer.zero_grad()
        labels = []
        predictions = []
        model.train()
        for i in range(int(np.ceil(len(set1) / batch_size))):
            b1, b2, l = createBatch(batch_size, set1, ids1)
            o = model(b1, b2)
            loss = bce(o, l)
            loss.backward()
            optimizer.step()
            total_train_loss += loss
            labels.extend(l[:, 0].tolist())
            predictions.extend(o[:, 0].tolist())
        training_losses.append(loss.item())
        a = accuracy_score(labels,
                           np.where(np.array(predictions) < 0.5, 0.0, 1.0))
        training_accuracies.append(a)
        r = recall_score(labels,
                         np.where(np.array(predictions) < 0.5, 0.0, 1.0))
        stop_epoch = epoch
        if epoch % 10 == 0:
            val_loss = 0
            vpredictions = []
            model.eval()
            vlabels = []
            with torch.no_grad():
                for i in range(int(np.ceil(len(val_set) / batch_size))):
                    b1, b2, l = createBatch(batch_size, val_set, val_ids)
                    o = model(b1, b2)
                    loss = bce(o, l)
                    val_loss += loss
                    vlabels.extend(l[:, 0].tolist())
                    vpredictions.extend(o[:, 0].tolist())
                va = accuracy_score(
                    vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0))
                vr = recall_score(
                    vlabels, np.where(np.array(vpredictions) < 0.5, 0.0, 1.0))
                print("EPOCH " + str(epoch) + " with loss " +
                      str(val_loss.item()) + ", accuracy " + str(va) +
                      " and recall " + str(vr))
                stop = es.earlyStopping(val_loss, model)
                validation_losses.append(val_loss.item())
                validation_accuracies.append(va)
                if stop:
                    print("TRAINING FINISHED AFTER " + str(epoch) +
                          " EPOCHS. K BYE.")
                    break
    if (int(stop_epoch / 10) - 20) < len(validation_losses):
        final_loss = validation_losses[
            int(stop_epoch / 10) -
            20]  #every 10 epochs validation and 20 coz of patience
        final_accuracy = validation_accuracies[int(stop_epoch / 10) - 20]
    else:
        final_loss = validation_losses[-1]
        final_accuracy = validation_accuracies[-1]
    #WRITE OPTIM
    filename = str("siamese_optim_losses_v3.txt")
    file = open(filename, 'a')
    file.write("Training loss:")
    file.write('\n')
    for l in training_losses:
        file.write(str(l))
        file.write('\n')
    file.write("Training accuracy:")
    file.write('\n')
    for l in training_accuracies:
        file.write(str(l))
        file.write('\n')
    file.write("Validation loss:")
    file.write('\n')
    for l in validation_losses:
        file.write(str(l))
        file.write('\n')
    file.write("Validation accuracy:")
    file.write('\n')
    for l in validation_accuracies:
        file.write(str(l))
        file.write('\n')
    file.write("final_loss:" + str(final_loss))
    file.write("final_accuracy:" + str(final_accuracy))
    file.write('\n')
    file.close()

示例#5

0

显示文件

def top10Siamese(net_path,
                 tr_enc_path,
                 tr_ids_path,
                 te_enc_path,
                 te_ids_path,
                 reduced=None):
    tr_enc = np.load(tr_enc_path)
    tr_ids = np.load(tr_ids_path)
    te_enc = np.load(te_enc_path)
    te_ids = np.load(te_ids_path)
    tr_idx = cleanDataset(tr_ids)
    tr_enc = tr_enc[tr_idx]
    tr_ids = tr_ids[tr_idx]
    te_idx = cleanDataset(te_ids)
    te_enc = te_enc[te_idx]
    te_ids = te_ids[te_idx]
    if reduced:
        te_idx = randomSubset(reduced, len(te_ids))
        te_enc = te_enc[te_idx]
        te_ids = te_ids[te_idx]
    pred = []
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    model = torch.load(net_path).cuda()
    model.eval()
    for i in range(len(te_ids)):
        if i % 1000 == 0:
            print(i)
        te = torch.from_numpy(np.array([te_enc[i], te_enc[i]])).float().cuda()
        matches = []
        for j in range(0, len(tr_ids)):
            tr = torch.from_numpy(np.array([tr_enc[j],
                                            te_enc[i]])).float().cuda()
            r = model(te, tr)
            r = r[0]
            matches.append([r.item(), tr_ids[j, -1]])
            if r < 0.5:  #classified match
                if te_ids[i, -1] == tr_ids[j, -1]:  #and is match
                    tp += 1
                else:  #but is no match
                    fp += 1
            else:  #classified not match
                if te_ids[i, -1] == tr_ids[j, -1]:  #but is match
                    fn += 1
                else:  #and is no match
                    tn += 1
        matches = np.array(matches)
        if len(matches) > 0:
            matches = matches[
                matches[:,
                        0].argsort()]  #sorts from low numbers to high numbers
        match = False
        whales = []
        counter = 0
        while len(whales) < 10:
            m_id = matches[counter, -1]
            if not (m_id in whales):
                whales.append(m_id)
                if te_ids[i, -1] == m_id:
                    match = True
                    break
            counter += 1
        if match:
            pred.append(1)
        else:
            pred.append(0)
    a = np.mean(np.array(pred))
    print("test accuracy: " + str(a))
    return a