Exemplo n.º 1
0
class NiceTestCase(unittest.TestCase):

	def setUp(self):
		self.t = self.assertTrue
		self.inst = self.assertIsInstance
		self.source = Source(host="localhost",port=27017,database='reddit_stream_test',collection='combined')
	def tearDown(self):	
		pass

	def test_no_bad_characters(self):
		print("\n")
		remov = re.compile("[0-9]")
		for doc in html.doc_iter(self.source.find().limit(1000)):
			print("".join(list(filter(lambda x : x in string.printable, doc["cleansed_text"]))))
Exemplo n.º 2
0
	def setUp(self):
		self.t = self.assertTrue
		self.inst = self.assertIsInstance
		self.source = Source(host="localhost",port=27017,database='reddit_stream_test',collection='combined')
Exemplo n.º 3
0
from rdt.data.mongo.source import Source
from rdt.data.mongo.bulkinserter import BulkInserter
if __name__ == "__main__":
	source = Source(host="localhost",port=27017,database="reddit_stream",collection="combined")
	with BulkInserter(source=Source(host="localhost",port=27017,database='reddit_stream_test',collection='load')) as bulk:
		for doc in source.find_clean(batch_size=1000,limit=2000):
			del doc["_id"]
			bulk.insert(doc)
Exemplo n.º 4
0
import apple_label as label
from rdt.data.mongo.source import Source

if __name__ == "__main__":
	is_good = Source(host="localhost", port=27017, database='reddit_stream_test',collection='is_good')
	is_bad = Source(host="localhost", port=27017, database='reddit_stream_test', collection='is_bad')

	print( "")
	print( "START!\n")

	try:
		utc = int(input("Enter a unix time stamp\nx > 0 to skip to time\nx < 1 to start at beginning\nx = "))
		if utc < 1:
			print( "dropping " + repr(is_apple))
			print( "is_apple.find().count(): " + str(is_good.count()))
			print( "dropping " + repr(is_not_apple))
			print("is_apple.find().count(): " + str(is_bad.count()))
			print("")

		print("Starting at utc " + str(utc))
		print("Hello, determine if they comments relate to Apple Inc!")
		print("---------------------")
		label.apple_finder(utc,maybe_apple, is_apple, is_not_apple)
	except ValueError:
		"Give me an int"
	except:
		"you pooped up"
Exemplo n.º 5
0
def subreddit(subreddit=None,batch_size=100):
	if subreddit is None:
		return None
	source = Source(host="localhost",port=27017,database="reddit_stream",collection="combined")
	cursor = source.find_clean({"subreddit" : subreddit},batch_size=batch_size)
	return cursor