def process(self): where = "click_time >= %s and click_time < %s" % (time.mktime(FLAGS.start.timetuple()), time.mktime(FLAGS.end.timetuple())) click_sql = "select click_hash, creative_id, media_id, click_ip, click_time from conversion where %s" % where logger.debug("Executing %s", click_sql) click_items = list(self.statdb.execute(click_sql)) samples = [] for click_item in click_items: self.total += 1 outer_code = 'jn%s' % click_item[0] creative_id = click_item[1] pay_sql = "select num_iid, pay_time, trade_id, item_title, seller_nick, shop_title from taobao_report where outer_code='%s'" % outer_code pay_item = list(self.guangdb.execute(pay_sql)) item_sql = "select item.id, num_id, price, volume, votescore, votescore_s2, created, title, category, shop.name, shop.nick from item,shop where uctrac_creative_id=%s and item.shop_id=shop.id;" % creative_id item = list(self.guangdb.execute(item_sql)) if not item: self.notfound += 1 logger.warn("Item not matched creativeid %s %s-%s-%s/%s", creative_id, self.notfound, self.matched, self.total, len(click_items)) else: # price, volume, votescore, votescore_s2, created if pay_item: # positive self.matched += 1 samples.append((item[0][2], item[0][3], item[0][4], item[0][5], date2ts(item[0][6]), 1)) else: # negative samples.append((item[0][2], item[0][3], item[0][4], item[0][5], date2ts(item[0][6]), 0)) # write to files f = open(FLAGS.out_file, "w") f.write("price, volume, score, lctr, createts, y\n") for sample in samples: f.write("%s,%s,%s,%s,%s,%s\n" % (sample[0], sample[1], sample[2], sample[3], sample[4], sample[5])) f.close()
def file_validate(): images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*") images_1 = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*") now_time = datetime.datetime.now() ft = now_time - datetime.timedelta(minutes=32) ts_now = dateutils.date2ts(now_time) ts_front = dateutils.date2ts(ft) validate_mtime(images, ts_now, ts_front) validate_mtime(images_1, ts_now, ts_front)
def file_validate(): images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*") images_1 = glob.glob( "/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*") now_time = datetime.datetime.now() ft = now_time - datetime.timedelta(minutes=32) ts_now = dateutils.date2ts(now_time) ts_front = dateutils.date2ts(ft) validate_mtime(images, ts_now, ts_front) validate_mtime(images_1, ts_now, ts_front)
else: #get file's mofidy time mt = os.path.getmtime(f) # 以后每隔30分钟执行 if mt >= ts_front and mt <= ts_now: i += 1 image = Image.open(f) width, height = image.size f100 = f.replace("/big/", "/small4/") convert_img(f, f100, width, height) logger.info("%s:%s", i, f100) except IOError, e: logger.error("Open image failed %s:%s %s", i, f, e.message) continue logger.info("convert image total: %s", i) if __name__ == "__main__": log_init("CrawlLogger", "sqlalchemy.*") images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*") images_1 = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*") now_time = datetime.datetime.now() ft = now_time - datetime.timedelta(minutes=32) ts_now = dateutils.date2ts(now_time) ts_front = dateutils.date2ts(ft) validate_mtime(images, ts_now, ts_front) validate_mtime(images_1, ts_now, ts_front)
mt = os.path.getmtime(f) # 以后每隔30分钟执行 if mt >= ts_front and mt <= ts_now: i += 1 image = Image.open(f) width, height = image.size f100 = f.replace("/big/", "/small4/") convert_img(f, f100, width, height) logger.info("%s:%s", i, f100) except IOError, e: logger.error("Open image failed %s:%s %s", i, f, e.message) continue logger.info("convert image total: %s", i) if __name__ == "__main__": log_init("CrawlLogger", "sqlalchemy.*") images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*") images_1 = glob.glob( "/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*") now_time = datetime.datetime.now() ft = now_time - datetime.timedelta(minutes=32) ts_now = dateutils.date2ts(now_time) ts_front = dateutils.date2ts(ft) validate_mtime(images, ts_now, ts_front) validate_mtime(images_1, ts_now, ts_front)