create index site_index on filtered_data_index(site_type); create index keyword_index on filtered_data_index(keyword); create index timestamp_index on filtered_data_index(timestamp); create index site_sum_index on sumdata_byday(site_type); create index keyword_sum_index on sumdata_byday(keyword); create index timestamp_sum_index on sumdata_byday(timestamp); """ import os, sys from datetime import datetime from dbutils import DBUtils from strutils import get_timestamp print datetime.now() if len(sys.argv) < 2: timestamp = get_timestamp(day_delta=1) else: timestamp = sys.argv[1] db = DBUtils() c = db.execute_sql( "select count(*),site_type,keyword,timestamp where timestamp=%s group by keyword,site_type" % timestamp ) for one in c.fetchall(): count, site_type, timestamp = one values = {"timestamp": timestamp, "count": count, "keyword": keyword, "site_type": site_type} db.insert("sumdata_byday", values) db.close()
column = {} while 1: line = f.readline() if not line: f.close() break line = str(line).strip() if line == "@" and bool(column): repx = re.compile(".+_(?P<date>\d{8})_(?P<time>\d{4})\.txt$") date_dict = re.match(repx, filename).groupdict() column["date"] = date_dict["date"] column["time"] = date_dict["time"] if column.has_key("udid"): column["uid"] = column["udid"] del column["udid"] db.insert(table_name, column) column = {} continue repx = re.compile("^@(?P<name>\w+):(?P<value>.+)") match_str = re.match(repx, line) if match_str: tmp_dict = match_str.groupdict() name = tmp_dict["name"].lower() value = tmp_dict["value"].strip() try: value = filter_tags(value).strip() value = replace(value, re.compile("'|\""), "") value = smart_utf8(value) column[name] = value except Exception, e: print e