예제 #1
0
def import_date1st(fp_date1st, db):
	"""Import 1st-date-in-text into MongoDB"""
	with open(fp_date1st) as fin:
		for line in fin:
			if line:
				doc_id, date = line.strip('\n').split('\t')
				db.date.update({u"_id":unicode(doc_id)},{'$set':{"firstraw":date, "firstrange":date2daterange(int(date))}})
	print "Finish importing 1st-date-in-texts."
예제 #2
0
def import_date(fp_date, db):
	"""Import date distribution into MongoDB"""
	# assume that 'date' collection is small enough to put in memory
	with open(fp_date) as fin:
		old_key = None # tracking doc_id
		tfdict = defaultdict(float) # date term frequency dictionary for each document
		for line in fin:
			if line:
				this_key, date, tf = line.strip('\n').split('\t')
				if this_key != old_key and old_key:
					# to successfully update, use unicode
					db.date.update({u"_id":unicode(old_key)}, {'$set':{"distribution":freq2prob(tfdict)}})
					tfdict = defaultdict(float)
				old_key = this_key
				# update date tf 
				tfdict[date2daterange(int(date))] += float(tf)
		# dont forget last doc
		db.date.update({u"_id":unicode(old_key)}, {'$set':{"distribution":freq2prob(tfdict)}})
	print "Finish importing date distributions."