printHere = (counter % 50 == 0) current = batch[partition * batchSize:(partition + 1) * batchSize] _, _, _, newLoss, newWords, lossWords, lossPOS = doForwardPass( current, train=False) devLoss += newLoss devWords += newWords devLossWords += lossWords devLossPOS += lossPOS return devLoss / devWords, devLossWords / devWords, devLossPOS / devWords while True: # corpus = getNextSentence("train") corpus = CorpusIteratorFuncHead(language) corpus.permute() corpus = corpus.iterator(rejectShortSentences=True) while True: try: batch = map(lambda x: next(corpus), 10 * range(batchSize)) except StopIteration: break batch = sorted(batch, key=len) partitions = range(10) shuffle(partitions) for partition in partitions: counter += 1 printHere = (counter % 100 == 0) current = batch[partition * batchSize:(partition + 1) * batchSize]
yield input_indices, wordStartIndices + [len(input_indices) ], relevant_logprob_sum input_indices = [ 2 ] # Start of Segment (makes sure that first word can be predicted from this token) wordStartIndices = [] DEV_PERIOD = 5000 epochCount = 0 corpusBase = CorpusIterator(args.language, storeMorph=True) while failedDevRuns < args.stopAfterFailures: epochCount += 1 print >> sys.stderr, "Epoch " + str(epochCount) print "Starting new epoch, permuting corpus" corpusBase.permute() # corpus = getNextSentence("train") corpus = corpusBase.iterator(rejectShortSentences=False) stream = createStream(corpus) if counter > 5: # if counter % DEV_PERIOD == 0: newDevLoss, _ = computeDevLoss() # devLosses.append( devLosses.append(newDevLoss) # newDevLoss = devLosses[-1]-1 # print("DON'T STOP don't stop") print "New dev loss " + str(newDevLoss) + ". previous was: " + str( lastDevLoss)