Exemplo n.º 1
0
        try:
            numeric = [next(training_chars) for _ in range(args.batchSize)]
        except StopIteration:
            break
        printHere = (counter % 50 == 0)
        loss, charCounts = forward(numeric, printHere=printHere, train=True)
        backward(loss, printHere)
        trainChars += charCounts
        if printHere:
            print("Dev losses")
            print(devLosses)
            print("Chars per sec " + str(trainChars /
                                         (time.time() - startTime)))
    rnn_drop.train(False)

    dev_data = corpusIteratorWiki.dev("italian")
    print("Got data")
    dev_chars = prepareDataset(dev_data, train=True)

    dev_loss = 0
    dev_char_count = 0
    counter = 0

    while True:
        counter += 1
        try:
            numeric = [next(dev_chars) for _ in range(args.batchSize)]
        except StopIteration:
            break
        printHere = (counter % 50 == 0)
        loss, numberOfCharacters = forward(numeric,
Exemplo n.º 2
0
      if printHere:
          print((epoch,counter))
          print("Dev losses")
          print(devLosses)
          print("Chars per sec "+str(trainChars/(time.time()-startTime)))
          print(learning_rate)
          print(args)
      if counter % 20000 == 0 and epoch == 0:
        if args.save_to is not None:
           torch.save(dict([(name, module.state_dict()) for name, module in named_modules.items()]), MODELS_HOME+"/"+args.save_to+".pth.tar")


   rnn_drop.train(False)


   dev_data = corpusIteratorWiki.dev(args.language)
   print("Got data")
   dev_chars = prepareDatasetChunks(dev_data, train=False)


     
   dev_loss = 0
   dev_char_count = 0
   counter = 0
   hidden, beginning = None, None
   while True:
       counter += 1
       try:
          numeric = next(dev_chars)
       except StopIteration:
          break
Exemplo n.º 3
0
            ):  # this is actually not a hard assertion, it should just be quite unlikely in languages such as English
                print("WARNING",
                      list(zip(boundaries[j][i:], boundariesAll[j][i:])))


#                     if len(relevantWords[-1]) > 1:
#                       assert False

import time

devLosses = []
#for epoch in range(10000):
if True:
    training_data = corpusIteratorWikiWords.dev(args.language,
                                                removeMarkup=False)
    training_data_c = corpusIteratorWiki.dev(args.language, doShuffling=False)

    print("Got data")
    training_chars = prepareDatasetChunks(training_data,
                                          training_data_c,
                                          train=False)

    rnn_drop.train(False)
    startTime = time.time()
    trainChars = 0
    counter = 0
    while True:
        counter += 1
        try:
            numeric = [next(training_chars) for _ in range(args.batchSize)]
        except StopIteration: