def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" work = matutils.zeros_aligned(self.layer1_size, dtype=REAL) # each thread must have its own work memory while True: job = jobs.get() if job is None: # data finished, exit break # update the learning rate before every job alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words)) # how many words did we train on? out-of-vocabulary (unknown) words do not count if self.sampler: # Count words is a separate step here job_words = sum(train_sentence_sampler(self, self.sampler(sentence), len(filter(None, sentence)), alpha, work) for sentence in job) else: job_words = sum(train_sentence(self, sentence, alpha, work) for sentence in job) with lock: word_count[0] += job_words elapsed = time.time() - start if elapsed >= next_report[0]: logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" % (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0)) next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports
def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" work = matutils.zeros_aligned( self.layer1_size, dtype=REAL) # each thread must have its own work memory while True: job = jobs.get() if job is None: # data finished, exit break # update the learning rate before every job alpha = max( self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words)) # how many words did we train on? out-of-vocabulary (unknown) words do not count job_words = sum( train_sentence(self, sentence, alpha, work) for sentence in job) with lock: word_count[0] += job_words elapsed = time.time() - start if elapsed >= next_report[0]: logger.info( "PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" % (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0)) next_report[ 0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports
def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" # each thread must have its own work memory work = zeros_aligned(self.layer1_size, dtype=REAL) prog_msg = "\rPROGRESS: {:5.2%} {:.5f}α {:6.0f}w/s" while True: job = jobs.get() if job is None: # data finished, exit break # update the learning rate before every job alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words)) # how many words did we train on? out-of-vocabulary (unknown) words do not count job_words = sum(train_sentence(self, sentence, alpha, work) for sentence in job) with lock: word_count[0] += job_words elapsed = time.time() - start if elapsed >= next_report[0]: print(prog_msg.format(word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0), end='', file=sys.stderr) next_report[0] = elapsed + 1.0 # don't flood the log, # wait at least a second between progress reports print(prog_msg.format(word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0), file=sys.stderr)