예제 #1
0
 def _load_items(self):
     weibos = []
     count = 0
     f = open('/media/data/original_data/csv/20130922_cut/1.csv')
     for line in f:
         weibo = itemLine2Dict(line)
         if weibo:
             weibos.append(weibo)
             count += 1
             if count == 10000:
                 break
     return weibos
예제 #2
0
def csv_input_pre_func(item):
    item = itemLine2Dict(item)
    return item
예제 #3
0
    daily_profile_keywords_db = leveldb.LevelDB(os.path.join(LEVELDBPATH, 'linhao_profile_person_keywords_%s' % now_datestr),
                                                block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
    daily_profile_interact_db = leveldb.LevelDB(os.path.join(LEVELDBPATH, 'linhao_profile_person_interact_%s' % now_datestr),
                                                block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))
    daily_profile_counts_db =  leveldb.LevelDB(os.path.join(LEVELDBPATH, 'linhao_profile_person_counts_%s' % now_datestr),
                                               block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))

    csv_dir_path = '/media/sdc/original_data/csv/'
    source_path = csv_dir_path + '%s/' % now_datestr
    source_files = os.listdir(source_path)
    
    count = 0
    ts = te = time.time()
    for f in source_files:
        print f
    	f = open(source_path + f, 'r')
    	for line in f:
            try:
    	        itemdict = itemLine2Dict(line)
                if itemdict:
                    profile_person_cal(itemdict)
            
                    if count % 10000 == 0:
                        te = time.time()
                        print count, '%s sec' % (te - ts), 'profile_person_cal', now_datestr
                        ts = te
                    count += 1
            except Exception, e:
                print e