def import_date1st(fp_date1st, db): """Import 1st-date-in-text into MongoDB""" with open(fp_date1st) as fin: for line in fin: if line: doc_id, date = line.strip('\n').split('\t') db.date.update({u"_id":unicode(doc_id)},{'$set':{"firstraw":date, "firstrange":date2daterange(int(date))}}) print "Finish importing 1st-date-in-texts."
def import_date(fp_date, db): """Import date distribution into MongoDB""" # assume that 'date' collection is small enough to put in memory with open(fp_date) as fin: old_key = None # tracking doc_id tfdict = defaultdict(float) # date term frequency dictionary for each document for line in fin: if line: this_key, date, tf = line.strip('\n').split('\t') if this_key != old_key and old_key: # to successfully update, use unicode db.date.update({u"_id":unicode(old_key)}, {'$set':{"distribution":freq2prob(tfdict)}}) tfdict = defaultdict(float) old_key = this_key # update date tf tfdict[date2daterange(int(date))] += float(tf) # dont forget last doc db.date.update({u"_id":unicode(old_key)}, {'$set':{"distribution":freq2prob(tfdict)}}) print "Finish importing date distributions."