Exemplo n.º 1
0
    def add(self, lab, data, name='main'):

        already_in = False

        try:
            type(self.data[name][lab])
            already_in = True
        except:
            self.pos[name][lab] = [self.init_pos[0], self.init_pos[1] - 25]
            self.init_pos = self.pos[name][lab]

        if type(data) == type(0.001):
            self.data[name][lab] = utils.truncate(data, 3)
        else:
            self.data[name][lab] = data

        if not already_in:
            self.labels[name][lab] = pyglet.text.Label(
                lab + ' ' + str(self.data[name][lab]),
                font_name=self.font,
                font_size=self.size_ft,
                x=self.pos[name][lab][0],
                y=self.pos[name][lab][1],
                batch=self.batch)
        else:
            self.labels[name][lab].text = lab + ' ' + str(self.data[name][lab])
Exemplo n.º 2
0
    def eval(self):
        params = self.params
        self.model.eval()

        lang_id1 = params.lang2id[params.src_lang]
        lang_id2 = params.lang2id[params.trg_lang]

        valid = 0
        total = 0

        for sent1, len1, sent2, len2, y, _, _ in tqdm(
                self.dataloader['valid']):
            sent1, len1 = truncate(sent1, len1, params.max_len,
                                   params.eos_index)
            sent2, len2 = truncate(sent2, len2, params.max_len,
                                   params.eos_index)
            x, lengths, positions, langs = concat_batches(sent1,
                                                          len1,
                                                          lang_id1,
                                                          sent2,
                                                          len2,
                                                          lang_id2,
                                                          params.pad_index,
                                                          params.eos_index,
                                                          reset_positions=True)

            # cuda
            x, y, lengths, positions, langs = to_cuda(x,
                                                      y,
                                                      lengths,
                                                      positions,
                                                      langs,
                                                      gpu=self.gpu)

            # forward
            output = self.model(x, lengths, positions, langs)
            predictions = output.data.max(1)[1]

            # update statistics
            valid += predictions.eq(y).sum().item()
            total += len(len1)

        # compute accuracy
        acc = 100.0 * valid / total
        scores = {}
        scores['acc'] = acc
        return scores
Exemplo n.º 3
0
def get_diff_noised_list(w, h, nb, f=frequency, oct=4, style=1):

    tab = []

    manager = p.PerlinNoiseFactory(2, oct)

    for j in range(h):
        tabj = []
        u = []
        for i in range(w):
            noise = manager(float(i / f[0]), float(j / f[1]))
            u.append(utils.truncate(noise, 3))

            if style == 1:
                tabj.append(int((noise // (1 / nb))))

            elif style == 2:

                if abs(noise) < 0.01 * nb:
                    noise = 0
                else:
                    noise = -1
                """mid = nb//2
                noise2 =  int(((noise+1)/2) // (1/nb) )
                #print(mid,noise2)
                if noise2 != mid:
                    noise2 = -1
                else:
                    noise2 = 0"""
                tabj.append(noise)

            elif style == 3:
                tabj.append(int(((noise + 1) / 2) // (1 / nb)))

        tab.append(tabj)
        #print(u)
    """for y in tab:
        print(y)"""
    return tab
Exemplo n.º 4
0
def main(args):
    rng = np.random.RandomState(0)

    # Make dump path
    if not os.path.exists(args.dump_path):
        subprocess.Popen("mkdir -p %s" % args.dump_path, shell=True).wait()
    else:
        if os.listdir(args.dump_path):
            m = "Directory {} is not empty.".format(args.dump_path)
            raise ValueError(m)
    if len(args.log_file):
        write_log = True
    else:
        write_log = False

    # load model parameters
    model_dir = os.path.dirname(args.load_model)
    params_path = os.path.join(model_dir, 'params.pkl')
    with open(params_path, "rb") as f:
        params = pickle.load(f)

    # load data parameters and model parameters from checkpoint
    checkpoint_path = os.path.join(model_dir, 'checkpoint.pth')
    assert os.path.isfile(checkpoint_path)
    data = torch.load(
        checkpoint_path,
        map_location=lambda storage, loc: storage.cuda(params.local_rank))
    for k, v in data["params"].items():
        params.__dict__[k] = v
    dico = Dictionary(data["dico_id2word"], data["dico_word2id"],
                      data["dico_counts"])

    # Print score
    for k, v in data["best_metrics"].items():
        print("- {}: {}".format(k, v))

    # Fix some of the params we pass to load_data
    params.debug_train = False
    params.max_vocab = -1
    params.min_count = 0
    params.tokens_per_batch = -1
    params.max_batch_size = args.batch_size
    params.batch_size = args.batch_size

    # load data
    data = load_data(args.data_path, params)

    # Print data summary
    for (src, tgt), dataset in data['para'].items():
        datatype = "Para data (%s)" % (
            "WITHOUT labels" if dataset.labels is None else "WITH labels")
        m = '{: <27} - {: >12}:{: >10}'.format(datatype, '%s-%s' % (src, tgt),
                                               len(dataset))
        print(m)

    # Fix some of the params we pass to the model builder
    params.reload_model = args.load_model

    # build model
    if params.encoder_only:
        model = build_model(params, dico)
    else:
        encoder, decoder = build_model(params, dico)
        model = encoder

    # Predict
    model = model.module if params.multi_gpu else model
    model.eval()
    start = time.time()
    for (src, tgt), dataset in data['para'].items():
        path = os.path.join(args.dump_path, "{}-{}.pred".format(src, tgt))
        scores_file = open(path, "w")
        lang1_id = params.lang2id[src]
        lang2_id = params.lang2id[tgt]
        diffs = []
        nb_written = 0
        for batch in dataset.get_iterator(False,
                                          group_by_size=False,
                                          n_sentences=-1,
                                          return_indices=False):
            (sent1, len1), (sent2, len2), labels = batch
            sent1, len1 = truncate(sent1, len1, params.max_len,
                                   params.eos_index)
            sent2, len2 = truncate(sent2, len2, params.max_len,
                                   params.eos_index)
            x, lengths, positions, langs = concat_batches(sent1,
                                                          len1,
                                                          lang1_id,
                                                          sent2,
                                                          len2,
                                                          lang2_id,
                                                          params.pad_index,
                                                          params.eos_index,
                                                          reset_positions=True)
            x, lengths, positions, langs = to_cuda(x, lengths, positions,
                                                   langs)
            with torch.no_grad():
                # Get sentence pair embedding
                h = model('fwd',
                          x=x,
                          lengths=lengths,
                          positions=positions,
                          langs=langs,
                          causal=False)[0]
                CLF_ID1, CLF_ID2 = 8, 9  # very hacky, use embeddings to make weights for the classifier
                emb = (model.module
                       if params.multi_gpu else model).embeddings.weight
                pred = F.linear(h, emb[CLF_ID1].unsqueeze(0), emb[CLF_ID2, 0])
                pred = torch.sigmoid(pred)
                pred = pred.view(-1).cpu().numpy().tolist()
            for p, l1, l2 in zip(pred, len1, len2):
                if l1.item() == 0 and l2.item() == 0:
                    scores_file.write("0.00000000\n")
                else:
                    scores_file.write("{:.8f}\n".format(p))
            nb_written += len(pred)
            if nb_written % 1000 == 0:
                elapsed = int(time.time() - start)
                lpss = elapsed % 60
                lpsm = elapsed // 60
                lpsh = lpsm // 60
                lpsm = lpsm % 60
                msg = "[{:02d}:{:02d}:{:02d} {}-{}]".format(
                    lpsh, lpsm, lpss, src, tgt)
                msg += " {}/{} ({:.2f}%) sentences processed".format(
                    nb_written, len(dataset), 100 * nb_written / len(dataset))
                print(msg)
                if write_log:
                    with open(args.log_file, "a") as fout:
                        fout.write(msg + "\n")
            # Try reversing order
            if TEST_REVERSE:
                x, lengths, positions, langs = concat_batches(
                    sent2,
                    len2,
                    lang2_id,
                    sent1,
                    len1,
                    lang1_id,
                    params.pad_index,
                    params.eos_index,
                    reset_positions=True)
                x, lengths, positions, langs = to_cuda(x, lengths, positions,
                                                       langs)
                with torch.no_grad():
                    # Get sentence pair embedding
                    h = model('fwd',
                              x=x,
                              lengths=lengths,
                              positions=positions,
                              langs=langs,
                              causal=False)[0]
                    CLF_ID1, CLF_ID2 = 8, 9  # very hacky, use embeddings to make weights for the classifier
                    emb = (model.module
                           if params.multi_gpu else model).embeddings.weight
                    pred_rev = F.linear(h, emb[CLF_ID1].unsqueeze(0),
                                        emb[CLF_ID2, 0])
                    pred_rev = torch.sigmoid(pred_rev)
                    pred_rev = pred_rev.view(-1).cpu().numpy().tolist()
                    for p, pp in zip(pred, pred_rev):
                        diffs.append(p - pp)

        if TEST_REVERSE:
            print(
                "Average absolute diff between score(l1,l2) and score(l2,l1): {}"
                .format(np.mean(np.abs(diffs))))

        scores_file.close()
Exemplo n.º 5
0
    def train(self):
        params = self.params
        self.model.train()

        # training variables
        losses = []
        ns = 0  # number of sentences
        nw = 0  # number of words
        t = time.time()

        lang_id1 = params.lang2id[params.src_lang]
        lang_id2 = params.lang2id[params.trg_lang]

        for sent1, len1, sent2, len2, y, _, _ in self.dataloader['train']:
            self.global_step += 1
            sent1, len1 = truncate(sent1, len1, params.max_len,
                                   params.eos_index)
            sent2, len2 = truncate(sent2, len2, params.max_len,
                                   params.eos_index)
            x, lengths, positions, langs = concat_batches(sent1,
                                                          len1,
                                                          lang_id1,
                                                          sent2,
                                                          len2,
                                                          lang_id2,
                                                          params.pad_index,
                                                          params.eos_index,
                                                          reset_positions=True)

            bs = len(len1)

            # cuda
            x, y, lengths, positions, langs = to_cuda(x,
                                                      y,
                                                      lengths,
                                                      positions,
                                                      langs,
                                                      gpu=self.gpu)

            # loss
            output = self.model(x, lengths, positions, langs)
            loss = self.criterion(output, y)

            # backward / optimization
            self.optimizer_e.zero_grad()
            self.optimizer_p.zero_grad()
            loss.backward()
            self.optimizer_e.step()
            self.optimizer_p.step()
            losses.append(loss.item())

            # log
            if self.global_step % self.params.report_interval == 0:
                logger.info("GPU %i - Epoch %i - Global_step %i - Loss: %.4f" %
                            (self.gpu, self.epoch, self.global_step,
                             sum(losses) / len(losses)))
                nw, t = 0, time.time()
                losses = []

            if self.global_step % params.eval_interval == 0:
                if self.gpu == 0:
                    logger.info("XLM - Evaluating")
                    with torch.no_grad():
                        scores = self.eval()
                        if scores['acc'] > self.best_acc:
                            self.best_acc = scores['acc']
                            torch.save(
                                self.model.module,
                                os.path.join(params.save_model,
                                             'best_acc_model.pkl'))
                            with open(
                                    os.path.join(params.save_model,
                                                 'best_acc.note'), 'a') as f:
                                f.write(str(self.best_acc) + '\n')
                        with open(os.path.join(params.save_model, 'acc.note'),
                                  'a') as f:
                            f.write(str(scores['acc']) + '\n')
                        logger.info("acc - %i " % scores['acc'])
                    self.model.train()
Exemplo n.º 6
0
    def run_test(self):

        params = self.params
        result_path = params.test_result_path + '_{}'.format(self.gpu)
        self.model.eval()

        lang_id1 = params.lang2id[params.src_lang]
        lang_id2 = params.lang2id[params.trg_lang]

        proba_result = []
        src_text_list = []
        trg_text_list = []

        with torch.no_grad():

            for sent1, len1, sent2, len2, _, src_text, trg_text in tqdm(
                    self.dataloader['test']):
                sent1, len1 = truncate(sent1, len1, params.max_len,
                                       params.eos_index)
                sent2, len2 = truncate(sent2, len2, params.max_len,
                                       params.eos_index)
                x, lengths, positions, langs = concat_batches(
                    sent1,
                    len1,
                    lang_id1,
                    sent2,
                    len2,
                    lang_id2,
                    params.pad_index,
                    params.eos_index,
                    reset_positions=True)

                # cuda
                x, lengths, positions, langs = to_cuda(x,
                                                       lengths,
                                                       positions,
                                                       langs,
                                                       gpu=self.gpu)

                # forward
                output = self.model(x, lengths, positions, langs)
                proba = F.softmax(output, 1)[:, 1]

                proba_result.extend(proba.cpu().numpy())
                src_text_list.extend(src_text)
                trg_text_list.extend(trg_text)
                assert len(proba_result) == len(src_text_list)
                assert len(proba_result) == len(trg_text_list)

                if len(proba_result) > params.flush_frequency:
                    logger.info(" GPU %i - write out score..." % self.gpu)
                    with open(result_path, 'a') as f:
                        for i in range(len(proba_result)):
                            f.write('{}{}{}{}{}'.format(
                                src_text_list[i], params.delimeter,
                                trg_text_list[i], params.delimeter,
                                str(proba_result[i])) + os.linesep)
                        proba_result = []
                        src_text_list = []
                        trg_text_list = []

            # write out the remainings
            logger.info(" GPU %i - write out score..." % self.gpu)
            with open(result_path, 'a') as f:
                for i in range(len(proba_result)):
                    f.write('{}{}{}{}{}'.format(
                        src_text_list[i], params.delimeter, trg_text_list[i],
                        params.delimeter, str(proba_result[i])) + os.linesep)
                proba_result = []
                src_text_list = []
                trg_text_list = []