예제 #1
0
    def predict(self, x, batch_size=512):
        '''Model prediction

        Args:
            x (ndarray): input image of shape (m, hi, wi, ci).
                Where m: number of records. hi, wi: height and width of input image.
                ci: channels of input image.
        Returns:
            yhat (ndarray): input with shape (m, k). k is the number of output units,
                m is the number of records.
        '''

        n = len(x)
        x = force4D(x)
        # do prediction in batches as the vectorized convolution inflates memory
        # usage considerably.
        yhat = []
        batches = getBatch(n, batch_size)
        for idxii in batches:
            xii = np.take(x, idxii, axis=0)
            _, activations = self.feedForward(xii)
            yhatii = activations[self.n_layers]
            yhat.append(yhatii)

        return np.concatenate(yhat, axis=0)
예제 #2
0
def evaluate(model, loss_func, dictionary, data):
    """evaluate the model while training"""
    model.eval()  # turn on the eval() switch to disable dropout
    total_loss = 0
    total_correct = 0
    ####3
    total_prediction = []
    total_labels = []

    for texts, labels, masks, bsz in utils.getBatch(data=data,
                                                    dictionary=dictionary,
                                                    maxlen=MAX_LEN,
                                                    batch_size=BATCH_SIZE):
        hidden = model.init_hidden(texts.size(0))
        fc, outh, pred, attention = model.forward(texts, masks, hidden)
        output_flat = pred.view(texts.size(0), -1)
        total_loss += loss_func(output_flat, labels).data
        prediction = torch.max(output_flat, 1)[1]
        total_correct += torch.sum((prediction == labels).float())

        total_prediction += list(prediction)
        total_labels += list(labels)
    res = classification_report(total_labels, total_prediction)
    return res, total_loss[0] / (len(data) //
                                 BATCH_SIZE), total_correct.data[0] / len(data)
예제 #3
0
    def batchTrain(self, x, y, epochs, batch_size):
        '''Training using mini batches

        Args:
            x (ndarray): input image of shape (m, hi, wi, ci).
                Where m: number of records. hi, wi: height and width of input image.
                ci: channels of input image.
            y (ndarray): input with shape (m, k). k is the number of output units,
                m is the number of records.
            epochs (int): number of epochs to train.
            batch_size (int): mini-batch size.
        Returns:
            costs (ndarray): overall cost at each epoch.
        '''
        costs = []
        m = len(x)
        x = force4D(x)
        for ee in range(epochs):
            batches = getBatch(m, batch_size, randomize=True)
            for idxii in batches:
                xii = np.take(x, idxii, axis=0)
                yii = np.take(y, idxii, axis=0)
                weight_sums, activations = self.feedForward(xii)
                gradsii, grads_biasii = self.feedBackward(weight_sums, activations, yii)
                self.gradientDescent(gradsii, grads_biasii, batch_size)

            je = self.evaluateCost(x, y)
            print('# <batchTrain>: cost at epoch %d, j = %f' % (ee, je))
            costs.append(je)

        return np.array(costs)
예제 #4
0
def train(model, loss_func, dictionary, epoch, train_data, dev_data,
          identity_mat, stop_counter):
    global best_dev_loss, best_acc
    model.train()
    total_loss = 0
    for texts, labels, masks, bsz in utils.getBatch(data=train_data,
                                                    dictionary=dictionary,
                                                    maxlen=MAX_LEN,
                                                    batch_size=BATCH_SIZE):
        init_state = model.init_hidden(bsz)
        fc, outh, pred, attention = model.forward(sents=texts,
                                                  mask=masks,
                                                  init_hc=init_state)

        loss = loss_func(pred.view(texts.size(0), -1), labels)
        if USE_ATTENTION:
            attentionT = torch.transpose(attention, 1, 2).contiguous()
            extra_loss = Frobenius(
                torch.bmm(attention, attentionT) -
                identity_mat[:attention.size(0)])
            loss += PENALIZATION_COEFF * extra_loss

        optimizer.zero_grad()
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), CLIP)
        optimizer.step()

        total_loss += loss.data

    res, dev_loss, acc = evaluate(model, loss_func, dictionary, dev_data)
    print(res)
    utils.saveLog(LOG_PATH, res)

    total_res = 'epoch: %d, dev loss: %f, acc: %f' % (epoch + 1, dev_loss, acc)
    print(total_res)
    utils.saveLog(LOG_PATH, total_res)
    utils.div('-')

    if not best_dev_loss or dev_loss < best_dev_loss:
        with open(MODEL_PATH % (dev_loss, acc), 'wb') as f:
            torch.save(model, f)
        best_dev_loss = dev_loss
        stop_counter = 0
    else:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.2
        if EARLY_STOP != 0:
            stop_counter += 1

    return stop_counter
예제 #5
0
    def batchTrain(self, x, y, epochs, batch_size):
        '''Training using mini batches

        Args:
            x (ndarray): input with shape (n, h, w) or (n, h, w, c).
                n is the number of records.
                h the image height, w the image width and c the number of channels.
            y (ndarray): input with shape (n, m). m is the number of output units,
                n is the number of records.
            epochs (int): number of epochs to train.
            batch_size (int): mini-batch size.
        Returns:
            self.costs (ndarray): overall cost at each epoch.
        '''
        costs = []
        m = len(x)
        for ee in range(epochs):
            batches = getBatch(m, batch_size, randomize=True)
            for idxjj in batches:
                for idxii in idxjj:
                    xii = np.atleast_3d(x[idxii])
                    yii = y[idxii]
                    weight_sums, activations = self.feedForward(xii)
                    gradsii, grads_biasii = self.feedBackward(
                        weight_sums, activations, yii)

                    if idxii == idxjj[0]:
                        gradsjj = gradsii
                        grads_biasjj = grads_biasii
                    else:
                        gradsjj = self.sumGradients(gradsjj, gradsii)
                        grads_biasjj = self.sumGradients(
                            grads_biasjj, grads_biasii)

                self.gradientDescent(gradsjj, grads_biasjj, batch_size)

            je = self.evaluateCost(x, y)
            print('# <batchTrain>: cost at epoch %d, j = %f' % (ee, je))
            costs.append(je)

        return np.array(costs)
예제 #6
0
    def evaluateCost(self, x, y, batch_size=512):
        '''Compute mean cost on a dataset

        Args:
            x (ndarray): input image of shape (m, hi, wi, ci).
                Where m: number of records. hi, wi: height and width of input image.
                ci: channels of input image.
            y (ndarray): input with shape (m, k). k is the number of output units,
                m is the number of records.
        Returns:
            j (float): mean cost over dataset <x, y>.
        '''
        j = 0
        n = len(x)
        batches = getBatch(n, batch_size)
        for idxii in batches:
            xii = np.take(x, idxii, axis=0)
            yii = np.take(y, idxii, axis=0)
            yhatii = self.predict(xii, batch_size)
            jii = self.sampleCost(yhatii, yii)
            j += jii
        j2 = self.regCost()
        j += j2
        return j/n
예제 #7
0
    if use_gpu:
        model.cuda()

    parameters = model.parameters()
    optimizer = torch.optim.Adamax(parameters)
    loss_function = torch.nn.MSELoss()
    total_len = len(train_dataset)
    print("start training...")

    for epoch in range(epochs):
        start_time = time.time()
        total_loss = 0

        for i in tqdm(range(0, len(train_dataset) - batch_size, batch_size)):
            features, labels = getBatch(train_dataset, i, batch_size)

            if use_gpu:
                features, labels = features.cuda(), labels.cuda()

            model.zero_grad()

            output = model(features)

            loss = loss_function(output, Variable(labels))
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * len(features)

        logger.info(f'epoch:{epoch},train_loss:{total_loss/total_len}')
예제 #8
0
    tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels))
optimizer = tf.train.AdamOptimizer().minimize(loss)

sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())

tf.summary.scalar('Loss', loss)
tf.summary.scalar('Accuracy', accuracy)
merged = tf.summary.merge_all()
logdir = "tensorboard/" + datetime.datetime.now().strftime(
    "%Y%m%d-%H%M%S") + "/"
writer = tf.summary.FileWriter(logdir, sess.graph)

for i in range(NUMBER_OF_ITERATIONS):
    nextBatch, nextBatchLabels = getBatch(train, BATCH_SIZE,
                                          MAX_SEQUENCE_LENGTH)
    sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels})

    # Write summary to Tensorboard
    if i % 50 == 0:
        summary = sess.run(merged, {
            input_data: nextBatch,
            labels: nextBatchLabels
        })
        writer.add_summary(summary, i)

    # Save the network every 10,000 training iterations
    if i % 10000 == 0 and i != 0:
        save_path = saver.save(sess,
                               "models/pretrained_lstm.ckpt",
                               global_step=i)
예제 #9
0
from utils import getBatch, calMetric, drawPicture
import torch

if __name__ == "__main__":
    test_dataset = pd.read_pickle('dataset/test.pkl')
    model_path = 'log/birnn.pt'

    model = rnnModel(hidden_dim, feature_dim, batch_size)
    model.load_state_dict(torch.load(model_path))
    if use_gpu:
        model.cuda()

    predict = []
    ground_truth = []
    for i in tqdm(range(0, len(test_dataset) - batch_size, batch_size)):
        features, labels = getBatch(test_dataset, i, batch_size)

        if use_gpu:
            features, labels = features.cuda(), labels.cuda()

        model.zero_grad()

        output = model(features)

        predict.extend(output.data.cpu().numpy())
        ground_truth.extend(labels.data.cpu().numpy())

    df = pd.DataFrame()
    df['predict'] = predict
    df['ground_truth'] = ground_truth
    df.to_csv("log/result.csv")
예제 #10
0
def train(model,
          x_train,
          y_train,
          x_validation,
          y_validation,
          loss_object,
          optimizer,
          ckpt,
          manager,
          batch_size=32,
          n_epochs=10):
    # trying to restore a previous checkpoint. If it fails, starts from scratch
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restored from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")

    iterations = x_train.shape[0] / batch_size
    if x_train.shape[0] % batch_size != 0:
        iterations += 1

    best_step = -1
    best_epoch_val_loss = 100.0
    for e in range(n_epochs):
        loss_iteration = 0
        x_train, y_train = shuffle(x_train, y_train, random_state=0)
        total_loss = 0.0
        for ibatch in range(int(iterations)):
            batch_x = getBatch(x_train, ibatch, batch_size)
            batch_y = getBatch(y_train, ibatch, batch_size)

            with tf.GradientTape() as tape:
                predictions = model(batch_x, training=True)
                loss = loss_object(batch_y, predictions)
                loss_iteration += loss.numpy()
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))

        total_loss = loss_iteration / int(iterations)

        pred = model(x_train, training=False)
        y_argmax = np.argmax(y_train, axis=1)
        pred_argmax = np.argmax(pred, axis=1)
        ac = accuracy_score(y_argmax, pred_argmax)

        # increment of checkpoint step
        ckpt.step.assign_add(1)
        if total_loss <= best_epoch_val_loss:
            # new best model found, so save the checkpoint into a file
            best_epoch_val_loss = total_loss
            best_step = int(ckpt.step)
            save_path = manager.save()
            print("Saved checkpoint for step {}: {}".format(
                best_step, save_path))
            print("loss {:1.2f}".format(best_epoch_val_loss))

        print("epoch %d loss %f Train Accuracy %f" %
              (e, total_loss, np.round(ac, 4)))
        pred = model(x_validation, training=False)
        print("vs. Validation Accuracy %f" % accuracy_score(
            np.argmax(y_validation, axis=1), np.argmax(pred, axis=1)))
        print("===============")
예제 #11
0
파일: main.py 프로젝트: bmaneesh/NER
HIDDEN_SIZE = 300
EPOCH = 3
LEARNING_RATE = 0.001

model = WindowClassifier(len(word2index), EMBEDDING_SIZE, WINDOW_SIZE,
                         HIDDEN_SIZE, len(tag2index))
print model
if cuda:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for ep in range(EPOCH):
    losses = []
    acc = []
    for bid, batch in enumerate(getBatch(BATCH_SIZE, train_data)):
        x, y = list(zip(*batch))
        inputs = torch.cat(
            [prepare_sequence(sent, word2index).view(1, -1) for sent in x])
        targets = torch.cat([prepare_tag(tag, tag2index) for tag in y])
        model.zero_grad()
        # print inputs.size()
        pred = model(inputs, istraining=True)
        # acc.append(np.where(pred==targets).shape[0])
        # print pred
        loss = loss_function(pred, targets)
        losses.append(loss.data.tolist()[0])
        loss.backward()
        optimizer.step()

        if bid % 1000 == 0:
예제 #12
0

base = "data/"
readList = open("remaining.txt").read().split()[:50]  ##########
batch_size = 64
NUM_EPOCH = 80  # 1200
learning_rate = 0.0003

enc = fontEncoder().cuda()
cla = classifier().cuda()

loss_function = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(list(enc.parameters()) + list(cla.parameters()),
                             lr=learning_rate)

train_set = getBatch(base, readList, BATCH_SIZE=batch_size)

for epoch in range(NUM_EPOCH):

    print("current epoch: ", epoch)

    for index, (image, label) in enumerate(train_set):
        optimizer.zero_grad()

        image = image.cuda()
        label = label.cuda()

        embedding = enc(image)
        output = cla(embedding)

        loss = loss_function(output, label)
예제 #13
0
model.add(Dense(input_dim=INPUT_SHAPE, output_dim=150))
model.add(Activation("relu"))
model.add(Dropout(0.50))
model.add(Dense(output_dim=130))
model.add(Activation("relu"))
model.add(Dropout(0.50))
model.add(Dense(output_dim=128))
model.compile("nadam", "mae")

print('Training...')
i = 0
for epoch in range(EPOCHS):
    random.seed(42)
    random.shuffle(text)
    print('    EPOCH:', epoch)
    for text_descriptors, img_descriptors in utils.getBatch(
            text, images, BATCH_SIZE):

        print(vstack(text_descriptors).shape)
        """    tmp = list(zip(text_descriptors, img_descriptors))
            random.seed(42)
            random.shuffle(tmp)
            text_descriptors, img_descriptors = zip(*tmp)"""
        t0 = time.time()
        mlp.partial_fit(vstack(text_descriptors), img_descriptors)
        print('        Partial fit {} took: {} min, Score {}'.format(
            i, round((time.time() - t0) / 60, 2), mlp.loss_))
        """    
            mlp.fit(vstack(text_descriptors), img_descriptors)
            """
        """    i = i+1
            if i == 2: