def getSentiment(): dataSet = StanfordSentiment() tokens = dataSet.tokens() # Load the word vectors we trained earlier _, wordVectors, _ = load_saved_params() dimVectors = wordVectors.shape[1] # Load the test sentences sentences = dataSet.getTestSentences() print sentences nSentences = len(sentences) sentenceFeatures = np.zeros((nSentences, dimVectors)) sentenceLabels = np.zeros((nSentences, ), dtype=np.int32) for i in xrange(nSentences): words, sentenceLabels[i] = sentences[i] #print sentences[i] #print words #print sentenceLabels[i] sentenceFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) #train weights random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, nSentences) regularization = 0.00001 weights = sgd(lambda weights: softmax_wrapper( sentenceFeatures, sentenceLabels, weights, regularization), weights, 3.0, 10, PRINT_EVERY=10) #pred = np.sum((weights * sentenceFeatures.T).T, axis = 1) #pred = softmax(pred) #testAccuracy = accuracy(sentenceLabels, pred) prob = softmax(sentenceFeatures.dot(weights)) #prob = normalizeRows(prob) _, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights) pred = 1 / (1 + np.exp(-pred)) #for polarity in pred: #print categorify(polarity) #_, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights) for label in pred: print categorify(label) #pred = categorify(pred) print prob print pred
tokens = dataset.tokens() nWords = len(tokens) # Load the word vectors we trained earlier _, wordVectors0, _ = load_saved_params() wordVectors = (wordVectors0[:nWords, :] + wordVectors0[nWords:, :]) dimVectors = wordVectors.shape[1] # Load the train set trainset = dataset.getTrainSentences() nTrain = len(trainset) trainFeatures = np.zeros((nTrain, dimVectors)) trainLabels = np.zeros((nTrain, ), dtype=np.int32) for i in xrange(nTrain): words, trainLabels[i] = trainset[i] trainFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Prepare dev set features devset = dataset.getDevSentences() nDev = len(devset) devFeatures = np.zeros((nDev, dimVectors)) devLabels = np.zeros((nDev, ), dtype=np.int32) for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265)
tokens = dataset.tokens() nWords = len(tokens) # Load the word vectors we trained earlier _, wordVectors0, _ = load_saved_params() wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) dimVectors = wordVectors.shape[1] # Load the train set trainset = dataset.getTrainSentences() nTrain = len(trainset) trainFeatures = np.zeros((nTrain, dimVectors)) trainLabels = np.zeros((nTrain,), dtype=np.int32) for i in xrange(nTrain): words, trainLabels[i] = trainset[i] trainFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Prepare dev set features devset = dataset.getDevSentences() nDev = len(devset) devFeatures = np.zeros((nDev, dimVectors)) devLabels = np.zeros((nDev,), dtype=np.int32) for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265)