Python Features.upsert示例

编程语言: Python

命名空间/包名称: rdt.data.mongo.features

类/类型: Features

方法/功能: upsert

hotexamples.com的示例: 2

Python Features.upsert - 已找到2个示例。这些是从开源项目中提取的最受好评的rdt.data.mongo.features.Features.upsert现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

find(4)

upsert(2)

常用方法

find (4)

upsert (2)

示例#1

显示文件

文件： classifier_edit.py 项目： juchiyama/bigdata_fall2015

from rdt.data.mongo.features import Features
import sys

if __name__ == "__main__":
	subreddit = sys.argv[1]
	fts = Features(host="localhost",port=27017,database="reddit_stream",collection="features")
	bgrams = list(fts.find({"subreddit" : subreddit}))[0]["bigrams"]
	# bgrams = list(filter(lambda x : True if x[0]))
	while 1:
		print(bgrams)
		print("what do you want remove?")
		word1 = input("enter the first word: ")
		word2 = input("enter the second word: ")
		bgrams = list(filter(lambda x : x[0][0] != word1 and x[0][1] != word2, bgrams))
		action = input("(w)rite, (q)uit, (c)ontinue")
		if action == "w":
			fts.upsert({"subreddit" : subreddit}, {"bigrams" : bgrams})
		if action == "q":
			break

示例#2

显示文件

文件： allsubreddits_bigram.py 项目： juchiyama/bigdata_fall2015

from rdt.data.mongo.features import Features
import rdt.job as job, nltk, sys
from nltk.corpus import stopwords

if __name__ == "__main__":
	stopwords = stopwords.words('english') + ['-','https', '%','[', ']', "''", "``",'--', "'s", ",", ".","-","(",")",":","n't", "?","!"]
	ft_db=Features(host="localhost",port=27017,database="reddit_stream",collection="features")
	job = job.AnnotatedSource(host="localhost",port=27017,database="reddit_stream",collection="big_combined")
	gen = job.to_words({}, remove_stopwords=True, limit=6000)
	finder = job.bigram_collocation_finder(gen)
	finder.apply_freq_filter(4)
	finder.apply_word_filter(lambda w: w in stopwords)
	bigram_measures = nltk.collocations.BigramAssocMeasures()
	scored = finder.score_ngrams(bigram_measures.raw_freq)
	ft_db.upsert({"subreddit" : "all"}, {"bigrams" : sorted(finder.ngram_fd.items(), key=lambda t:(-t[1], t[0])) })
	# print(sorted(finder.ngram_fd.items(), key=lambda t:(-t[1], t[0]))[:10])
	# print(len(finder.ngram_fd.items()))