def _get_job_words(self, alpha, work, job, neu1): if self.sg: return sum( train_sentence_sg(self, sentence, alpha, work) for sentence in job) else: return sum( train_sentence_cbow(self, sentence, alpha, work, neu1) for sentence in job)
def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" work = zeros(self.layer1_size, dtype=REAL) # each thread must have its own work memory neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL) while True: job = jobs.get() if job is None: # data finished, exit break # update the learning rate before every job alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words)) # how many words did we train on? out-of-vocabulary (unknown) words do not count if self.sg: job_words = sum(train_sentence_sg(self, sentence, alpha, work) for sentence in job) else: job_words = sum(train_sentence_cbow(self, sentence, alpha, work, neu1) for sentence in job) with lock: word_count[0] += job_words elapsed = time.time() - start if elapsed >= next_report[0]: logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" % (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0)) next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports
def _get_job_words(self, alpha, work, job, neu1): if self.sg: return sum(train_sentence_sg(self, sentence, alpha, work) for sentence in job) else: return sum(train_sentence_cbow(self, sentence, alpha, work, neu1) for sentence in job)