Exemplo n.º 1
0
def split_routine():
    while True:
        chunk = ""
        with chunk_q_cv:
            while chunk_q.empty():
                chunk_q_cv.wait()

            #get chunk from chunk_q
            chunk = chunk_q.get()
            chunk_q_cv.notify_all()

        #split chunk
        sents = ""
        if chunk == "EOF":
            sents = "EOF"
        else:
            sents = data.split_to_sentences(chunk)

        del chunk
        with sent_q_cv:
            while sent_q.full():
                sent_q_cv.wait()

            sent_q.put(sents)
            sent_q_cv.notify_all()

        if sents == "EOF":
            break

    print("Finished splitting file.")
    with log_lock:
        with open(logfile, 'a') as f_log:
            f_log.write('Finished splitting file on %s\n' %
                        (data.format_date(time.time())))
            f_log.flush()
Exemplo n.º 2
0
def batch_routine(word_to_ix):
    while True:
        sents = ""
        with sent_q_cv:
            while sent_q.empty():
                sent_q_cv.wait()

            #get sent chunk from sent_q
            sents = sent_q.get()
            sent_q_cv.notify_all()

        if sents == "EOF":
            break

        sents_by_len = {}
        for s in sents:
            n = len(s.split())
            if n in sents_by_len:
                sents_by_len[n].append(s)
            else:
                sents_by_len[n] = [s]

        del sents

        for k, s in sents_by_len.items():
            batches = create_batches(s)

            with batch_q_cv:
                while batch_q.full():
                    batch_q_cv.wait()

                batch_q.put(batches)
                batch_q_cv.notify_all()

    #batching is over, once training thread gets no batch from batch_q, end training
    end_training_event.set()
    print("Finished batching.")
    with log_lock:
        with open(logfile, 'a') as f_log:
            f_log.write('Finished batching on %s\n' %
                        (data.format_date(time.time())))
            f_log.flush()
Exemplo n.º 3
0
def read_routine(filename, chunk_dim):
    base = os.path.basename(filename)
    sb_filename = 'batches/%s.b0' % (base)
    if os.path.exists(sb_filename):
        chunk = "EOF"
        with chunk_q_cv:
            while chunk_q.full():
                chunk_q_cv.wait()

            chunk_q.put(chunk)
            chunk_q_cv.notify_all()
    else:
        print('Reading %s' % (filename))
        #read file in chunks
        with open(filename) as f_in:
            while True:
                chunk = f_in.readlines(chunk_dim)
                if not chunk:
                    chunk = "EOF"

                with chunk_q_cv:
                    while chunk_q.full():
                        chunk_q_cv.wait()

                    chunk_q.put(chunk)
                    chunk_q_cv.notify_all()

                if chunk == "EOF":
                    break

    print("Finished reading file.")
    with log_lock:
        with open(logfile, 'a') as f_log:
            f_log.write('Finished reading file on %s\n' %
                        (data.format_date(time.time())))
            f_log.flush()
Exemplo n.º 4
0
def train_routine(filename, print_every, save_every, num_epoch, max_sb_len):
    base = os.path.basename(filename)
    first = True
    stored_batches = list()
    sb_count = 0
    batch_count = 0
    epoch = 0
    while epoch < num_epoch:
        epoch += 1
        loss_acc = 0
        #first epoch has to wait for batches to be produced
        if first:
            while True:
                #if not set, eventually wait for batch to be produced
                if not end_training_event.is_set():
                    with batch_q_cv:
                        while batch_q.empty():
                            batch_q_cv.wait()

                #get batches from batch_q
                try:
                    batches = batch_q.get_nowait()
                    if first:
                        print("Starting training.")
                        with log_lock:
                            with open(logfile, 'a') as f_log:
                                f_log.write('Starting training on %s\n' %
                                            (data.format_date(time.time())))
                                f_log.flush()

                    batch_count, loss_acc = train(batches, batch_count,
                                                  loss_acc, epoch, print_every,
                                                  save_every)
                    stored_batches.append(batches)

                    if len(stored_batches) % max_sb_len == 0:
                        sb_filename = 'batches/%s.b%d' % (base, sb_count)
                        pkl.dump(stored_batches, open(sb_filename, 'wb'))
                        sb_count += 1
                        stored_batches = list()

                except Exception:
                    break
                finally:
                    first = False
                    with batch_q_cv:
                        batch_q_cv.notify_all()

            if len(stored_batches) > 0:
                sb_filename = 'batches/%s.b%d' % (base, sb_count)
                pkl.dump(stored_batches, open(sb_filename, 'wb'))
                sb_count += 1
                stored_batches = list()

        #batches already produced for next epochs
        else:
            if sb_count == 0:
                num_epoch += 1
                sb_count = 1

            i = 0
            while True:
                sb_filename = 'batches/%s.b%d' % (base, i)
                i += 1
                if os.path.exists(sb_filename):
                    stored_batches = pkl.load(open(sb_filename, 'rb'))

                    for batches in stored_batches:
                        batch_count, loss_acc = train(batches, batch_count,
                                                      loss_acc, epoch,
                                                      print_every, save_every)

                    stored_batches = list()

                else:
                    break

    #training over
    msg = ('%s - Epochs: %d Batches: %d Loss: %f' %
           (data.elapsed(start), num_epoch, batch_count, loss_acc /
            (batch_count + 1)))
    msg = ('%s\nTraining ended on %s.\n' %
           (msg, data.format_date(time.time())))
    print(msg)
    with log_lock:
        with open(logfile, 'a') as f_log:
            f_log.write('%s\n' % (msg))
            f_log.flush()

    bakfile = modelfile + ".bak"
    copyfile(modelfile, bakfile)
    torch.save(model.state_dict(), modelfile)
Exemplo n.º 5
0
           (msg, data.format_date(time.time())))
    print(msg)
    with log_lock:
        with open(logfile, 'a') as f_log:
            f_log.write('%s\n' % (msg))
            f_log.flush()

    bakfile = modelfile + ".bak"
    copyfile(modelfile, bakfile)
    torch.save(model.state_dict(), modelfile)


print("Initializing...")
start = time.time()
with open(logfile, 'w') as f_log:
    f_log.write('Inizialization on %s\n' % (data.format_date(start)))
    f_log.flush()

word_to_ix, ix_to_word = data.init_dictionary(dictionary_dim)
model = WordGuesser(hidden_units, context_dim, embedding_dim, len(word_to_ix),
                    batch_dim)
if len(sys.argv) == 3:
    modelfile = sys.argv[2]
    model.load_state_dict(torch.load(modelfile))

model.train()
model = model.cuda()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)
model.hidden = model.init_hidden()