Пример #1
0
 def process(self):
     where = "click_time >= %s and click_time < %s" % (time.mktime(FLAGS.start.timetuple()), time.mktime(FLAGS.end.timetuple()))
     click_sql = "select click_hash, creative_id, media_id, click_ip, click_time from conversion where %s" % where
     logger.debug("Executing %s", click_sql)
     click_items = list(self.statdb.execute(click_sql))
     samples = []
     for click_item in click_items:
         self.total += 1
         outer_code = 'jn%s' % click_item[0]
         creative_id = click_item[1]
         pay_sql = "select num_iid, pay_time, trade_id, item_title, seller_nick, shop_title from taobao_report where outer_code='%s'" % outer_code
         pay_item = list(self.guangdb.execute(pay_sql))
         item_sql = "select item.id, num_id, price, volume, votescore, votescore_s2, created, title, category, shop.name, shop.nick from item,shop where uctrac_creative_id=%s and item.shop_id=shop.id;" % creative_id
         item = list(self.guangdb.execute(item_sql))
         if not item:
             self.notfound += 1
             logger.warn("Item not matched creativeid %s %s-%s-%s/%s", creative_id, self.notfound, self.matched, self.total, len(click_items))
         else:
             # price, volume, votescore, votescore_s2, created
             if pay_item: # positive
                 self.matched += 1
                 samples.append((item[0][2], item[0][3], item[0][4], item[0][5], date2ts(item[0][6]), 1))
             else: # negative
                 samples.append((item[0][2], item[0][3], item[0][4], item[0][5], date2ts(item[0][6]), 0))
     # write to files
     f = open(FLAGS.out_file, "w")
     f.write("price, volume, score, lctr, createts, y\n")
     for sample in samples:
         f.write("%s,%s,%s,%s,%s,%s\n" % (sample[0], sample[1], sample[2], sample[3], sample[4], sample[5]))
     f.close()
def file_validate():
    images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*")
    images_1 = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*")

    now_time = datetime.datetime.now()
    ft = now_time - datetime.timedelta(minutes=32)
    ts_now = dateutils.date2ts(now_time)
    ts_front = dateutils.date2ts(ft)

    validate_mtime(images, ts_now, ts_front)
    validate_mtime(images_1, ts_now, ts_front)
Пример #3
0
def file_validate():
    images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*")
    images_1 = glob.glob(
        "/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*")

    now_time = datetime.datetime.now()
    ft = now_time - datetime.timedelta(minutes=32)
    ts_now = dateutils.date2ts(now_time)
    ts_front = dateutils.date2ts(ft)

    validate_mtime(images, ts_now, ts_front)
    validate_mtime(images_1, ts_now, ts_front)
            else:
                #get file's mofidy time
                mt = os.path.getmtime(f)
                # 以后每隔30分钟执行
                if mt >= ts_front and mt <= ts_now:
                    i += 1
                    image = Image.open(f)
                    width, height = image.size

                    f100 = f.replace("/big/", "/small4/")
                    convert_img(f, f100, width, height)

                    logger.info("%s:%s", i, f100)
        except IOError, e:
            logger.error("Open image failed %s:%s %s", i, f, e.message)
            continue
    logger.info("convert image total: %s", i)

if __name__ == "__main__":
    log_init("CrawlLogger", "sqlalchemy.*")

    images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*")
    images_1 = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*")

    now_time = datetime.datetime.now()
    ft = now_time - datetime.timedelta(minutes=32)
    ts_now = dateutils.date2ts(now_time)
    ts_front = dateutils.date2ts(ft)

    validate_mtime(images, ts_now, ts_front)
    validate_mtime(images_1, ts_now, ts_front)
Пример #5
0
                mt = os.path.getmtime(f)
                # 以后每隔30分钟执行
                if mt >= ts_front and mt <= ts_now:
                    i += 1
                    image = Image.open(f)
                    width, height = image.size

                    f100 = f.replace("/big/", "/small4/")
                    convert_img(f, f100, width, height)

                    logger.info("%s:%s", i, f100)
        except IOError, e:
            logger.error("Open image failed %s:%s %s", i, f, e.message)
            continue
    logger.info("convert image total: %s", i)


if __name__ == "__main__":
    log_init("CrawlLogger", "sqlalchemy.*")

    images = glob.glob("/space/wwwroot/image.guang.j.cn/ROOT/images/*/big/*.*")
    images_1 = glob.glob(
        "/space/wwwroot/image.guang.j.cn/ROOT/images_1/*/big/*.*")

    now_time = datetime.datetime.now()
    ft = now_time - datetime.timedelta(minutes=32)
    ts_now = dateutils.date2ts(now_time)
    ts_front = dateutils.date2ts(ft)

    validate_mtime(images, ts_now, ts_front)
    validate_mtime(images_1, ts_now, ts_front)