def load_caches(): polarity.load_cached_polarity() Sentence.load_cached_parses() Sentence.load_cached_concepts()
def main(): polarity.load_cached_polarity() original_read_input() results = load_results(SAMPlE_DATA_PATH) gold_samples = filter(lambda x: x["type"] == "manual_label" and x["food"] == foodName, results) positve_examples = filter(lambda x: x["rating"] in (1, 2) , gold_samples) negative_examples = filter(lambda x: x["rating"] in (-1, -2), gold_samples) neutral_examples = filter(lambda x: x["rating"] == 0, gold_samples) # for now, let's do the first 10 sentences gold_samples = gold_samples[0:280] for i in range(len(gold_samples)): print str(i) + '/' + str(len(gold_samples)) gold_sample = gold_samples[i] setup(gold_sample, i) cArray = [] for i in range(-15,15): cArray.append(math.pow(2,i)) gammaArray = [] for i in range(-5,0): gammaArray.append(math.pow(10,i)) coeffArray = [] for i in range(-5,0): coeffArray.append(math.pow(10,i)) # Set the parameters by cross-validation tuned_parameters = [{'kernel': ['rbf'], 'gamma': gammaArray, 'C': cArray}, {'kernel': ['poly'], 'gamma' : gammaArray, 'C': cArray, 'coef0': coeffArray}, {'kernel': ['linear'], 'C': cArray}] global x #x = preprocessing.scale(x) x[:,0] = 1 # negativeWeight = len(y) / float(len(filter(lambda x : x == -1, y))) # zeroWeight = len(y) / float(len(filter(lambda x : x == 0, y))) # positiveWeight = len(y) / float(len(filter(lambda x : x == 1, y))) negativeWeight = 1 zeroWeight = 1 positiveWeight = 1 X_train, X_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.5, random_state=0) clf = GridSearchCV(SVC(class_weight={-1: negativeWeight, 0: zeroWeight, 1: positiveWeight}), tuned_parameters, cv=5, scoring='f1', n_jobs=16 ) clf.fit(X_train, y_train) print("Best parameters set found on development set:") print() print(clf.best_estimator_) print() # print("Grid scores on development set:") # print() # for params, mean_score, scores in clf.grid_scores_: # print("%0.3f (+/-%0.03f) for %r" # % (mean_score, scores.std() / 2, params)) # print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) print()