vocsize = len(words2idx) nclasses = len(labels2idx) nsentences = len(test_lex) numpy.random.seed(s['seed']) random.seed(s['seed']) rnn = model( nh = s['nhidden'], nc = nclasses, ne = vocsize, de = s['emb_dimension'], cs = s['win'] ) rnn.load(folder) #select a few popular words show, me, movies, what, in #w_idx = word2idx['show']; #print w_idx #w_emb = rnn.emb[w_idx] #print w_emb #word->index->emb #find n nearest embeddings #emb->index->word predictions_test = [ map(lambda x: idx2label[x], \ rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\ for x in test_lex ] groundtruth_test = [ map(lambda x: idx2label[x], y) for y in test_y ] words_test = [ map(lambda x: idx2word[x], w) for w in test_lex] res_test = conlleval(predictions_test, groundtruth_test, words_test, sys.argv[6]) #print 'Test set performance -- F1: ', res_test['f1'], ' '*20 print res_test['a'],' ',res_test['p'],' ',res_test['r'],' ',res_test['f1'],' '
ne = vocsize, de = s['emb_dimension'], cs = s['win'] ) # train with early stopping on validation set best_f1 = -numpy.inf s['clr'] = s['lr'] for e in xrange(s['nepochs']): # shuffle shuffle([train_lex, train_y], s['seed']) s['ce'] = e tic = time.time() total_cost = 0 count = 0 for i in xrange(nsentences): cwords = contextwin(train_lex[i], s['win']) words = map(lambda x: numpy.asarray(x).astype('int32'),\ minibatch(cwords, s['bs'])) labels = train_y[i] for word_batch , label_last_word in zip(words, labels): total_cost += rnn.train(word_batch, label_last_word, s['clr']) count +=1 rnn.normalize() if s['verbose']: print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic), sys.stdout.flush() print '' print 'Learning rate: %2.4f'%(s['clr']) print 'Average Training Cost: %2.4f'%(total_cost/count) predictions_valid = [ map(lambda x: idx2label[x], \