confessions = db.parses.find({ "analyzed": { "$ne": ANALYZER_VERSION } }, limit=500) threshold = 0.2 for confession in confessions: for tree_id, raw_tree in enumerate(confession["trees"]): if raw_tree == "None": continue # get sentence categories tree = Tree.fromstring(raw_tree) categories = [(category, prob) for (category, prob) in classifier.classify(tree).items() if prob > threshold] # add to queue to get sentence sentiment predictor.add_tree({ "raw_tree": raw_tree, "tree_id": tree_id, "categories": categories, "confession_id": confession["_id"] }) # run sentiment predictor predictor.run() # aggregate by confession confession_results = {} for datum in predictor.trees: if datum["confession_id"] not in confession_results: confession_results[datum["confession_id"]] = [] confession_results[datum["confession_id"]].append(datum) for confession_id, sentences in confession_results.items(): categories = {} probabilities = {}
s = datum["sentiment"] if datum["gold_sentiment"] < 0: if s < 0: accurate += 1 else: inaccurate += 1 else: if s > 0: accurate += 1 else: inaccurate += 1 return accurate*1.0/(accurate+inaccurate) # predict sentence sentiments predictor = SentimentPredictor() for (parse_id, tree_id), sentiments in sentences.items(): confession = db.parses.find_one({ "_id": ObjectId(parse_id) }) tree = confession["trees"][tree_id] sentiment = sum(sentiments)*1.0/len(sentiments) predictor.add_tree({ "raw_tree": tree, "gold_sentiment": sentiment }) predictor.run() print score_accuracy(predictor.trees) # predict confession sentiments predictor = SentimentPredictor() for parse_id, sentiments in confessions.items(): confession = db.parses.find_one({ "_id": ObjectId(parse_id) }) confession["gold_sentiment"] = sum(sentiments)*1.0/len(sentiments) predictor.add_confession(confession) predictor.run() print score_accuracy(predictor.confessions)