current = batch[partition * batchSize:(partition + 1) * batchSize]

            _, _, _, newLoss, newWords, lossWords, lossPOS = doForwardPass(
                current, train=False)
            devLoss += newLoss
            devWords += newWords
            devLossWords += lossWords
            devLossPOS += lossPOS
    return devLoss / devWords, devLossWords / devWords, devLossPOS / devWords


while True:
    #  corpus = getNextSentence("train")
    corpus = CorpusIteratorFuncHead(language)
    corpus.permute()
    corpus = corpus.iterator(rejectShortSentences=True)

    while True:
        try:
            batch = map(lambda x: next(corpus), 10 * range(batchSize))
        except StopIteration:
            break
        batch = sorted(batch, key=len)
        partitions = range(10)
        shuffle(partitions)
        for partition in partitions:
            counter += 1
            printHere = (counter % 100 == 0)
            current = batch[partition * batchSize:(partition + 1) * batchSize]

            loss, baselineLoss, policy_related_loss, _, wordNumInPass, lossWords, lossPOS = doForwardPass(
       if wordNum > 0:
          crossEntropy = 0.99 * crossEntropy + 0.01 * (totalDepLength/wordNum)
       else:
          assert totalDepLength == 0
       numberOfWords = wordNum
       return (totalDepLength, numberOfWords, byType)



assert batchSize == 1

depLengths = []
if True:
  corpus = CorpusIteratorFuncHead(args.language,"train")
  corpusIterator = corpus.iterator()
  if corpus.length() == 0:
     quit()
  while True:
    try:
       batch = [next(corpusIterator)]
    except StopIteration:
       break
    partitions = range(1)
    
    for partition in partitions:
       counter += 1
       printHere = (counter % 200 == 0)
       current = batch[partition*batchSize:(partition+1)*batchSize]
       if len(current) == 0:
          continue
lr_lm = 0.1

crossEntropy = 10.0


def encodeWord(w):
    return stoi[w] + 3 if stoi[w] < vocab_size else 1


import torch.nn.functional

counter = 0
while True:
    corpus = CorpusIterator(args.language, partition="together")
    corpus.permute()
    corpus = corpus.iterator(rejectShortSentences=False)

    for current in corpus:
        if counter > 50000000:
            print("Quitting at counter " + str(counter))
            quit()
        counter += 1
        printHere = (counter % 50 == 0)
        current = [current]
        batchOrdered, logits = orderSentence(current[0], dhLogits, printHere)

        metadata = current[0][1]

        maxLength = len(batchOrdered)
        batchOrdered = [batchOrdered]
        if maxLength <= 2:
示例#4
0

#       print(chart[0][-1])
    fullProb = log(sum([exp(x) if x is not None else 0 for x in chart[0][-1]]))
    goldProb = goldProbability
    conditional = (fullProb - goldProb)
    return conditional, len(batchOrdered[0]), fullProb, goldProb

corpusDev = CorpusIteratorFuncHead(language, "dev")

conditionalTotal = 0
marginalTotal = 0
goldTotal = 0
lengthTotal = 0

for i, sentence in enumerate(corpusDev.iterator(rejectShortSentences=True)):
    conditional, length, marginal, gold = forward([sentence])
    conditionalTotal += conditional
    marginalTotal += marginal
    goldTotal += gold
    lengthTotal += length
    print(language, i, conditionalTotal / lengthTotal,
          marginalTotal / lengthTotal, goldTotal / lengthTotal)
    if i > 500:
        break
with open(
        "/u/scr/mhahn/cky/" + __file__ + "_" + language + "_" + model + "_" +
        BASE_DIR + ".txt", "w") as outFile:
    print >> outFile, conditionalTotal / lengthTotal
    print >> outFile, marginalTotal / lengthTotal
    print >> outFile, goldTotal / lengthTotal