def _load_items(self): weibos = [] count = 0 f = open('/media/data/original_data/csv/20130922_cut/1.csv') for line in f: weibo = itemLine2Dict(line) if weibo: weibos.append(weibo) count += 1 if count == 10000: break return weibos
def csv_input_pre_func(item): item = itemLine2Dict(item) return item
daily_profile_keywords_db = leveldb.LevelDB(os.path.join(LEVELDBPATH, 'linhao_profile_person_keywords_%s' % now_datestr), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) daily_profile_interact_db = leveldb.LevelDB(os.path.join(LEVELDBPATH, 'linhao_profile_person_interact_%s' % now_datestr), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) daily_profile_counts_db = leveldb.LevelDB(os.path.join(LEVELDBPATH, 'linhao_profile_person_counts_%s' % now_datestr), block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25)) csv_dir_path = '/media/sdc/original_data/csv/' source_path = csv_dir_path + '%s/' % now_datestr source_files = os.listdir(source_path) count = 0 ts = te = time.time() for f in source_files: print f f = open(source_path + f, 'r') for line in f: try: itemdict = itemLine2Dict(line) if itemdict: profile_person_cal(itemdict) if count % 10000 == 0: te = time.time() print count, '%s sec' % (te - ts), 'profile_person_cal', now_datestr ts = te count += 1 except Exception, e: print e