Пример #1
0
def add_vendor_product(name, price, currency, sku_id, unknown=False):
    """
    add vendor product information to DB(haystack/vendor_product)
    :return: 'success'
    """
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    if not unknown:
        is_exist = True if get_product_id(sku_id) else False
        if not is_exist:
            new_product = VendorProduct(product_name=name,
                                        product_price=price,
                                        currency=currency,
                                        sku_id=sku_id)
            session.add(new_product)
            session.commit()
    else:
        new_product = VendorProduct(product_name='unknown',
                                    product_price=0.00,
                                    is_new=-1,
                                    sku_id=sku_id)
        session.add(new_product)
        session.flush()
        new_product_id = new_product.product_id
        session.commit()

    session.close()
    return new_product_id
Пример #2
0
def get_sites(product_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    ret = session.query(VendorProduct.product_name).filter(
        VendorProduct.product_id == product_id).first()
    if not ret:
        return
    product_name = ret.product_name
    ret = session.query(ProductSite).filter(
        ProductSite.product_id == product_id).all()
    sites = []
    for r in ret:
        product_site_id = r.product_site_id
        site_id = r.site_id
        ret_site = session.query(
            Site.full_url).filter(Site.site_id == site_id).first()
        if not ret_site:
            continue
        url = ret_site.full_url
        site_info = {
            'site_id': site_id,
            'product_site_id': product_site_id,
            'url': url,
            'product_name': product_name
        }
        sites.append(site_info)
    session.close()
    return sites
Пример #3
0
def check_site_by_id(site_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    ret = session.query(Site.site_id).filter(Site.site_id == site_id).first()
    session.close()
    return ret is not None
Пример #4
0
def reset_site_status():
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    session.query(Site).update({Site.status: 0})
    session.commit()
    session.close()
Пример #5
0
def add_site(url, product_id):
    """
    parse domain, pattern_url, full_url from url
    :param url: url
    :param product_id: vendor product id
    :return: site_id
    """
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    # db/haystack/site
    ret = session.query(Site).filter(Site.full_url == url).first()
    if not ret:
        pattern = re.compile(url_regex)
        url_match = pattern.match(url)
        url_domain = url_match.group(3)
        new_site = Site(domain=url_domain, full_url=url)
        session.add(new_site)
        session.commit()
        ret = session.query(Site.site_id).filter(Site.full_url == url).first()
        site_id = ret.site_id
        # db/haystack/product_site
        new_product_site = ProductSite(product_id=product_id, site_id=site_id)
        session.add(new_product_site)
        session.commit()

    session.close()
Пример #6
0
def crawl_data(url, site_id, max_retry_times):
    """
    crawl and save url raw data in local
    :param url: url
    :param site_id: site_id as file name
    :param max_retry_times: max retry times
    :return: True if success, else False
    """
    success_flag = False
    if not check_site_by_id(site_id):
        return success_flag

    current_retry_count = 0
    while current_retry_count < max_retry_times:
        try:
            req_headers = {
                'User-Agent':
                'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'
            }
            req = urllib.request.Request(url, headers=req_headers)
            response = urllib.request.urlopen(req, timeout=60)
            content = response.read()
            # Save to file
            data_file_path = config.URL_CRAWLED_DATA_DIR + str(
                site_id) + '_new'
            with open(data_file_path, 'wb') as f:
                f.write(content)
            # update last indexed time in DB(haystack/site)
            engine = mysql_engine()
            db_session = sessionmaker(bind=engine)
            session = db_session()
            session.query(Site).filter(Site.site_id == site_id).update({
                Site.last_indexed:
                datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            })
            session.commit()
            session.close()
            success_flag = True
            break
        except urllib.error.HTTPError as e:
            print(e.code, e.reason)
            current_retry_count += 1
            print('Retry:', current_retry_count, '/', max_retry_times)
            continue
        except urllib.error.URLError as e:
            print(e.reason)
            current_retry_count += 1
            print('Retry:', current_retry_count, '/', max_retry_times)
            continue
        except ConnectionResetError as e:
            print('ConnectionResetError')
            time.sleep(random.uniform(0, 2))
            current_retry_count += 1
            print('Retry:', current_retry_count, '/', max_retry_times)
            continue
        except Exception as e:
            print('Unexpected exception:', str(e))
            break
    return success_flag
Пример #7
0
def finish_product_searching(product_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    session.query(Product).filter(Product.product_id == product_id).update(
        {Product.is_searching: 0})
    session.commit()
    session.close()
Пример #8
0
def add_product(product_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    new_product = Product(product_id=product_id)
    session.add(new_product)
    session.commit()
    session.close()
Пример #9
0
def clear_db():
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    session.query(Site).delete()
    session.commit()
    session.query(VendorProduct).delete()
    session.commit()
    session.close()
Пример #10
0
def update_product_status(product_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    session.query(VendorProduct).filter(
        VendorProduct.product_id == product_id).update(
            {VendorProduct.is_new: 0})
    session.commit()
    session.close()
Пример #11
0
def update_site_status(product_site_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    ret = session.query(ProductSite.site_id).filter(
        ProductSite.product_site_id == product_site_id).first()
    session.query(Site).filter(Site.site_id == ret.site_id).update(
        {Site.status: 1})
    session.commit()
    session.close()
Пример #12
0
def get_site_domain(site_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    ret = session.query(Site.domain).filter(Site.site_id == site_id).first()
    if not ret:
        return None
    domain = ret.domain
    session.close()
    return domain
Пример #13
0
def format_currency():
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    ret = session.query(ProductEvent)
    for r in ret:
        formatted_currency = currency_formatter(r.currency)
        session.query(ProductEvent).filter(ProductEvent.event_id == r.event_id)\
            .update({ProductEvent.currency: formatted_currency})
    session.commit()
    session.close()
Пример #14
0
def get_status(product_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    ret1 = session.query(func.count(Site.site_id).label('count'))\
        .select_from(Site, ProductSite).filter(ProductSite.site_id == Site.site_id)\
        .filter(ProductSite.product_id == product_id).first()
    ret2 = session.query(func.count(Site.site_id).label('count'))\
        .select_from(Site, ProductSite).filter(ProductSite.site_id == Site.site_id) \
        .filter(ProductSite.product_id == product_id).filter(Site.status == 1).first()
    return ret1.count, ret2.count
Пример #15
0
def get_image_urls(product_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    ret = session.query(ProductImageUrl).filter(
        ProductImageUrl.product_id == product_id).all()
    image_urls = []
    for r in ret:
        image_urls.append(r.image_url)
    session.close()
    return image_urls
Пример #16
0
def update_site(site_id, site_type, country):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    session.query(Site).filter(Site.site_id == site_id).update({
        Site.site_type:
        site_type,
        Site.country:
        country
    })
    session.commit()
    session.close()
Пример #17
0
def set_site_finished(site_id, debug_info):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    session.query(Site).filter(Site.site_id == site_id).update({
        Site.is_finished:
        1,
        Site.debug_info:
        json.dumps(debug_info)
    })
    session.commit()
    session.close()
Пример #18
0
def add_image_url(url, product_id):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    # db/haystack/product_image_url
    ret = session.query(ProductImageUrl.image_id).filter(
        ProductImageUrl.image_url == url).first()
    if not ret:
        new_image_url = ProductImageUrl(image_url=url, product_id=product_id)
        session.add(new_image_url)
        session.commit()

    session.close()
Пример #19
0
def get_product_id(sku_id, new_only=False):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    if not new_only:
        ret = session.query(VendorProduct).filter(
            VendorProduct.sku_id == sku_id).first()
    else:
        ret = session.query(VendorProduct).filter(VendorProduct.sku_id == sku_id)\
            .filter(VendorProduct.is_new == 1).first()
    session.close()
    if not ret:
        return None
    else:
        return ret.product_id
Пример #20
0
def remove_from_favourites(product_id):
    """
    :return: 'success' / 'fail'
    """
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    try:
        session.query(VendorProduct).filter(
            VendorProduct.product_id == product_id).update(
                {VendorProduct.is_favourite: 0})
        session.commit()
        session.close()
        status = "success"
    except Exception as e:
        print(e)
        session.close()
        status = "fail"

    return status
Пример #21
0
def add_site(product_id, url):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    ret = session.query(Site).filter(Site.full_url == url).filter(
        Site.product_id == product_id).first()
    if ret:
        site_id = ret.site_id
    else:
        pattern = re.compile(url_regex)
        url_match = pattern.match(url)
        domain = url_match.group(3)
        new_site = Site(domain=domain, full_url=url, product_id=product_id)
        session.add(new_site)
        session.flush()
        site_id = new_site.site_id
        session.commit()

    session.close()
    return site_id
Пример #22
0
def add_event(site_id,
              price,
              currency,
              event_time,
              status=None,
              event_type=None):
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    price = float('%.2f' % float(price))
    if len(str(currency)) > 20:
        currency = ''
    currency = currency_formatter(currency)
    new_event = ProductEvent(site_id=site_id,
                             product_price=price,
                             product_currency=currency,
                             product_status=status,
                             event_time=event_time,
                             event_type=event_type)
    session.add(new_event)
    session.commit()
    session.close()
Пример #23
0
def get_crawled_sites():
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()
    ret = session.query(Site).filter(Site.last_indexed.isnot(None))

    crawled_sites = []
    for r in ret:
        temp = session.query(ProductSite.product_id).filter(
            ProductSite.site_id == r.site_id).first()
        temp = session.query(VendorProduct.is_new).filter(
            VendorProduct.product_id == temp.product_id).first()
        if temp.is_new == -1:
            continue
        else:
            crawled_sites.append({
                'site_id': r.site_id,
                'domain': r.domain,
                'full_url': r.full_url,
                'last_indexed': r.last_indexed
            })
    session.close()
    return crawled_sites
Пример #24
0
def init_db():
    # Connect to mysql
    engine = create_engine('mysql+pymysql://%s:%s@%s:%s'
                           % (mysql_conf['user'],
                              mysql_conf['passwd'],
                              mysql_conf['host'],
                              mysql_conf['port']))

    database_name = mysql_conf['db']
    # Check database
    existing_databases = engine.execute("SHOW DATABASES;")
    existing_databases = [d[0] for d in existing_databases]
    # Create database if not exists
    if database_name not in existing_databases:
        sql = 'CREATE DATABASE IF NOT EXISTS %s' % database_name
        engine.execute(sql)
        print('Database created')
    else:
        print('Database already exist')

    # Create schema
    db_engine = mysql_engine()
    Base.metadata.create_all(db_engine)
Пример #25
0
def api_get_vendor_product(product_id):
    with open(os.path.join(config.RULE_DIR, 'currency_symbols.json')) as f:
        currency_mapper = json.load(f)
    if not currency_mapper:
        print('[Error]: missing currency_symbols.json')
        return

    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    ret_products = session.query(VendorProduct, ProductSite)\
        .filter(VendorProduct.product_id == product_id)\
        .filter(ProductSite.product_id == VendorProduct.product_id)\
        .order_by(ProductSite.product_id).all()

    vendor_product = None
    if not ret_products:
        return vendor_product

    i = 0
    while i < len(ret_products):
        c_product_id = ret_products[i].VendorProduct.product_id
        c_product_name = ret_products[i].VendorProduct.product_name
        c_vendor_price = ret_products[i].VendorProduct.product_price
        c_currency = ret_products[i].VendorProduct.currency
        c_is_favourite = ret_products[i].VendorProduct.is_favourite
        c_sku_id = ret_products[i].VendorProduct.sku_id

        price_list = []
        event_list = []
        while i < len(ret_products) and ret_products[
                i].VendorProduct.product_id == c_product_id:
            c_product_site_id = ret_products[i].ProductSite.product_site_id
            ret_events = session.query(ProductEvent, ProductSite, Site)\
                .filter(ProductEvent.product_site_id == ProductSite.product_site_id)\
                .filter(ProductSite.site_id == Site.site_id)\
                .filter(ProductEvent.product_site_id == c_product_site_id).all()

            for e in ret_events:
                if e.ProductEvent.currency not in currency_mapper.values():
                    continue
                temp = {
                    'price': e.ProductEvent.product_price,
                    'currency': e.ProductEvent.currency
                }
                price_list.append(temp)
                event_list.append(e)
            i += 1

        event_list.sort(key=lambda obj: obj.ProductEvent.event_time,
                        reverse=True)
        events = []
        for e in event_list:
            event = {
                'event_id': e.ProductEvent.event_id,
                'product_site_id': e.ProductEvent.product_site_id,
                'event_type': e.ProductEvent.event_type,
                'event_time': e.ProductEvent.event_time,
                'price_change': e.ProductEvent.price_change,
                'product_price': e.ProductEvent.product_price,
                'currency': e.ProductEvent.currency,
                'domain': e.Site.domain,
                'full_url': e.Site.full_url
            }
            events.append(event)

        sites_count, finished_sites_count = get_status(c_product_id)
        vendor_product = {
            'product_id': c_product_id,
            'product_name': c_product_name,
            'currency': c_currency,
            'vendor_price': c_vendor_price,
            'sku_id': c_sku_id,
            'is_favourite': c_is_favourite,
            'events': events,
            'sites_count': sites_count,
            'finished_sites_count': finished_sites_count
        }

    session.close()
    return vendor_product
Пример #26
0
def get_events():
    """
    get events
    :return: events ordered by time
             [{'event_id', 'product_site_id', 'event_time', 'product_price', 'product_price_compare',
               'currency', 'event_type', 'domain', 'full_url', 'product_id', 'product_name'}, ...]
    """
    with open(os.path.join(config.RULE_DIR, 'currency_symbols.json')) as f:
        currency_mapper = json.load(f)
    if not currency_mapper:
        print('[Error]: missing currency_symbols.json')
        return

    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    ret_events = session.query(ProductEvent, ProductSite).filter(
        ProductEvent.product_site_id == ProductSite.product_site_id).order_by(
            ProductEvent.event_time.desc()).all()

    events = []
    if not ret_events:
        return events
    for e in ret_events:
        if e.ProductEvent.currency not in currency_mapper.values():
            continue

        c_site_id = e.ProductSite.site_id
        c_product_id = e.ProductSite.product_id
        c_site = session.query(Site).filter(Site.site_id == c_site_id).first()
        c_vendor_product = session.query(VendorProduct).filter(
            VendorProduct.product_id == c_product_id).first()
        if c_vendor_product.is_new == -1:
            continue

        temp_price_list = [{
            'price': e.ProductEvent.product_price,
            'currency': e.ProductEvent.currency
        }]
        exchanged_price = exchange_price_list(temp_price_list,
                                              c_vendor_product.currency)
        exchanged_price = exchanged_price[0]
        product_price_compare = check_price(c_vendor_product.product_price,
                                            exchanged_price)
        if product_price_compare == 'abnormal':
            continue
        event = {
            'event_id': e.ProductEvent.event_id,
            'product_site_id': e.ProductEvent.product_site_id,
            'event_time': e.ProductEvent.event_time,
            'product_price': e.ProductEvent.product_price,
            'product_price_compare': product_price_compare,
            'currency': e.ProductEvent.currency,
            'event_type': e.ProductEvent.event_type,
            'price_change': e.ProductEvent.price_change,
            'domain': c_site.domain,
            'full_url': c_site.full_url,
            'product_id': c_product_id,
            'product_name': c_vendor_product.product_name
        }
        events.append(event)

    session.close()
    return events
Пример #27
0
def get_vendor_product(product_id=-1):
    """
    get vendor products
    :param product_id: get product according to product_id. if default -1, get all products
    :return: vendor products information
             [{'product_id', 'product_name', 'currency', 'vendor_price',
               'max_price', 'min_price', 'avg_price', 'is_favourite',
               'events': {
                    'event_id', 'product_site_id', 'event_type', 'event_time',
                    'product_price', 'product_price_compare', 'currency', 'domain', 'full_url'
               }
             }]
    """
    with open(os.path.join(config.RULE_DIR, 'currency_symbols.json')) as f:
        currency_mapper = json.load(f)
    if not currency_mapper:
        print('[Error]: missing currency_symbols.json')
        return

    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    if product_id == -1:
        ret_products = session.query(VendorProduct, ProductSite).filter(
            ProductSite.product_id == VendorProduct.product_id).order_by(
                ProductSite.product_id).all()
    else:
        ret_products = session.query(
            VendorProduct,
            ProductSite).filter(VendorProduct.product_id == product_id).filter(
                ProductSite.product_id == VendorProduct.product_id).order_by(
                    ProductSite.product_id).all()

    vendor_products = []
    if not ret_products:
        return vendor_products

    i = 0
    while i < len(ret_products):
        if ret_products[i].VendorProduct.is_new == -1:
            i += 1
            continue
        c_product_id = ret_products[i].VendorProduct.product_id
        c_product_name = ret_products[i].VendorProduct.product_name
        c_vendor_price = ret_products[i].VendorProduct.product_price
        c_currency = ret_products[i].VendorProduct.currency
        c_is_favourite = ret_products[i].VendorProduct.is_favourite
        c_sku_id = ret_products[i].VendorProduct.sku_id

        price_list = []
        event_list = []
        while (i < len(ret_products)
               and ret_products[i].VendorProduct.product_id == c_product_id):
            c_product_site_id = ret_products[i].ProductSite.product_site_id
            ret_events = session.query(ProductEvent, ProductSite, Site).filter(
                ProductEvent.product_site_id == ProductSite.product_site_id
            ).filter(ProductSite.site_id == Site.site_id).filter(
                ProductEvent.product_site_id == c_product_site_id).all()

            for e in ret_events:
                if e.ProductEvent.currency not in currency_mapper.values():
                    continue

                temp_price_list = [{
                    'price': e.ProductEvent.product_price,
                    'currency': e.ProductEvent.currency
                }]
                exchanged_price = exchange_price_list(temp_price_list,
                                                      c_currency)
                exchanged_price = exchanged_price[0]
                product_price_compare = check_price(c_vendor_price,
                                                    exchanged_price)
                if product_price_compare == 'abnormal':
                    continue

                temp = {
                    'price': e.ProductEvent.product_price,
                    'currency': e.ProductEvent.currency
                }
                price_list.append(temp)
                event_list.append(e)
            i += 1

        if len(price_list) > 0:
            exchanged_price_list = exchange_price_list(price_list, c_currency)
            max_price = min_price = exchanged_price_list[0]
            sum_price = 0
            for p in exchanged_price_list:
                if p > max_price:
                    max_price = p
                if p < min_price:
                    min_price = p
                sum_price += p
            avg_price = round(sum_price / len(price_list), 2)
        else:
            max_price = min_price = avg_price = 0

        event_list.sort(key=lambda obj: obj.ProductEvent.event_time,
                        reverse=True)
        events = []
        for e in event_list:
            temp_price_list = [{
                'price': e.ProductEvent.product_price,
                'currency': e.ProductEvent.currency
            }]
            exchanged_price = exchange_price_list(temp_price_list, c_currency)
            exchanged_price = exchanged_price[0]
            product_price_compare = check_price(c_vendor_price,
                                                exchanged_price)
            event = {
                'event_id': e.ProductEvent.event_id,
                'product_site_id': e.ProductEvent.product_site_id,
                'event_type': e.ProductEvent.event_type,
                'event_time': e.ProductEvent.event_time,
                'price_change': e.ProductEvent.price_change,
                'product_price': e.ProductEvent.product_price,
                'exchanged_price': exchanged_price,
                'product_price_compare': product_price_compare,
                'currency': e.ProductEvent.currency,
                'domain': e.Site.domain,
                'full_url': e.Site.full_url
            }
            events.append(event)

        avg_price_compare = check_price(c_vendor_price, avg_price)
        sites_count, finished_sites_count = get_status(c_product_id)
        vendor_product = {
            'product_id': c_product_id,
            'product_name': c_product_name,
            'currency': c_currency,
            'vendor_price': c_vendor_price,
            'sku_id': c_sku_id,
            'max_price': max_price,
            'min_price': min_price,
            'avg_price': avg_price,
            'avg_price_compare': avg_price_compare,
            'is_favourite': c_is_favourite,
            'events': events,
            'sites_count': sites_count,
            'finished_sites_count': finished_sites_count
        }
        vendor_products.append(vendor_product)

    session.close()
    return vendor_products
Пример #28
0
def update_events(event):
    """
    add or update in DB(haystack/product_event)
    :param event: dict, {'product_site_id', 'event_time', 'product_price', 'currency'}
    """
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    product_price = float('%.2f' % float(event.get('product_price')))
    currency = event.get('currency')
    if len(str(currency)) > 20:
        currency = ''
    currency = currency_formatter(currency)
    product_site_id = event.get('product_site_id')
    event_time = event.get('event_time')

    exist_events = session.query(ProductEvent).filter(
        ProductEvent.product_site_id == product_site_id).order_by(
            ProductEvent.event_time.asc()).all()

    need_to_save = True
    event_type = 'update'
    price_change = 1
    if not exist_events:
        event_type = 'add'
    else:
        for i in range(len(exist_events)):
            if exist_events[i].product_price == product_price:
                price_change = 0
                if exist_events[i].event_time.strftime(
                        '%Y-%m-%d %H:%M:%S') == event_time:
                    need_to_save = False
                    break
                else:
                    if event_time < exist_events[i].event_time.strftime(
                            '%Y-%m-%d %H:%M:%S') and i == 0:
                        event_type = 'add'
                        session.query(ProductEvent).filter(
                            ProductEvent.event_id ==
                            exist_events[i].event_id).update({
                                ProductEvent.event_type:
                                'update',
                                ProductEvent.price_change:
                                0
                            })
                        session.commit()
                        break
            else:
                price_change = 1
                if exist_events[i].event_time.strftime(
                        '%Y-%m-%d %H:%M:%S') == event_time:
                    need_to_save = False
                    session.query(ProductEvent).filter(
                        ProductEvent.event_id ==
                        exist_events[i].event_id).update(
                            {ProductEvent.product_price: product_price})
                    if i > 0:
                        previous_price = session.query(ProductEvent.product_price)\
                            .filter(ProductEvent.event_id == exist_events[i - 1].event_id)
                        session.query(ProductEvent).filter(ProductEvent.event_id == exist_events[i - 1].event_id)\
                            .update({ProductEvent.price_change: (1 if product_price != previous_price else 0)})
                    if i < len(exist_events) - 1:
                        next_price = session.query(ProductEvent.product_price)\
                            .filter(ProductEvent.event_id == exist_events[i + 1].event_id)
                        session.query(ProductEvent).filter(ProductEvent.event_id == exist_events[i + 1].event_id)\
                            .update({ProductEvent.price_change: (1 if product_price != next_price else 0)})
                    session.commit()
                    break
                else:
                    if event_time < exist_events[i].event_time.strftime(
                            '%Y-%m-%d %H:%M:%S') and i == 0:
                        event_type = 'add'
                        session.query(ProductEvent).filter(ProductEvent.event_id == exist_events[i].event_id)\
                            .update({ProductEvent.event_type: 'update'})
                        session.commit()
                        break

    if need_to_save:
        # Save to DB
        new_product_event = ProductEvent(product_site_id=product_site_id,
                                         event_time=event_time,
                                         event_type=event_type,
                                         product_price=product_price,
                                         currency=currency,
                                         price_change=price_change)
        session.add(new_product_event)
        session.commit()

    session.close()
Пример #29
0
def get_product_site(product_site_id, event_id=-1):
    """
    get product sites
    :param product_site_id: product_site_id
    :param event_id: get event according to event_id. if default -1, get all events
    """
    with open(os.path.join(config.RULE_DIR, 'currency_symbols.json')) as f:
        currency_mapper = json.load(f)
    if not currency_mapper:
        print('[Error]: missing currency_symbols.json')
        return

    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    ret = session.query(
        ProductSite, Site,
        VendorProduct).filter(ProductSite.site_id == Site.site_id).filter(
            ProductSite.product_site_id == ProductEvent.product_site_id
        ).filter(ProductSite.product_id == VendorProduct.product_id).filter(
            ProductSite.product_site_id == product_site_id).first()

    c_domain = ret.Site.domain
    c_full_url = ret.Site.full_url
    c_country = ret.Site.country
    c_site_type = ret.Site.site_type
    c_last_indexed = ret.Site.last_indexed
    c_product_id = ret.VendorProduct.product_id
    c_product_name = ret.VendorProduct.product_name
    c_vendor_price = ret.VendorProduct.product_price
    c_currency = ret.VendorProduct.currency

    ret_events = session.query(ProductSite, ProductEvent).filter(
        ProductEvent.product_site_id == ProductSite.product_site_id).filter(
            ProductSite.product_site_id == product_site_id).order_by(
                ProductEvent.event_time.desc()).all()

    events = []

    c_event_time = None
    c_product_status = None
    get_current_price = False
    c_price = -1
    c_price_compare = 'normal'
    for ret_event in ret_events:
        if ret_event.ProductEvent.currency not in currency_mapper.values():
            continue
        temp_price_list = [{
            'price': ret_event.ProductEvent.product_price,
            'currency': ret_event.ProductEvent.currency
        }]
        exchanged_price = exchange_price_list(temp_price_list, c_currency)
        exchanged_price = exchanged_price[0]
        product_price_compare = check_price(c_vendor_price, exchanged_price)
        if product_price_compare == 'abnormal':
            continue
        event = {
            'event_id': ret_event.ProductEvent.event_id,
            'event_type': ret_event.ProductEvent.event_type,
            'event_time': ret_event.ProductEvent.event_time,
            'product_price': exchanged_price,
            'product_price_compare': product_price_compare,
            'price_change': ret_event.ProductEvent.price_change,
        }
        events.append(event)

        if str(event_id) == str(-1) and not get_current_price:
            c_price = exchanged_price
            c_price_compare = product_price_compare
            c_event_time = ret_event.ProductEvent.event_time
            c_product_status = ret_event.ProductEvent.product_status
            get_current_price = True

        if str(event_id) != str(-1) and not get_current_price:
            if str(ret_event.ProductEvent.event_id) == str(event_id):
                c_price = exchanged_price
                c_price_compare = product_price_compare
                c_event_time = ret_event.ProductEvent.event_time
                c_product_status = ret_event.ProductEvent.product_status
                get_current_price = True

    product_site_info = {
        'product_site_id': product_site_id,
        'domain': c_domain,
        'full_url': c_full_url,
        'country': c_country,
        'site_type': c_site_type,
        'last_indexed': c_last_indexed,
        'product_id': c_product_id,
        'product_name': c_product_name,
        'current_price': c_price,
        'current_price_compare': c_price_compare,
        'current_event_time': c_event_time,
        'current_product_status': c_product_status,
        'currency': c_currency,
        'events': events
    }

    session.close()
    return product_site_info
Пример #30
0
def beautify_event_time():
    engine = mysql_engine()
    db_session = sessionmaker(bind=engine)
    session = db_session()

    all_events = session.query(ProductEvent).all()
    for event in all_events:
        ret = session.query(ProductEventBak).filter(
            ProductEventBak.event_id == event.event_id).first()
        if not ret:
            new_event_bak = ProductEventBak(
                event_id=event.event_id,
                product_site_id=event.product_site_id,
                event_time=event.event_time,
                event_type=event.event_type,
                product_name=event.product_name,
                product_price=event.product_price,
                currency=event.currency,
                product_status=event.product_status,
                price_change=event.price_change)
            session.add(new_event_bak)
    session.commit()

    ret = session.query(VendorProduct.product_id).all()
    for r in ret:
        product_id = r.product_id
        all_events = session.query(ProductSite, ProductEvent).filter(
            ProductEvent.product_site_id == ProductSite.product_site_id
        ).filter(ProductSite.product_id == product_id).all()
        if not all_events:
            continue
        all_events_copy = all_events
        all_events_copy.sort(key=lambda obj: obj.ProductEvent.event_time,
                             reverse=True)
        latest_event_time = all_events_copy[0].ProductEvent.event_time
        date_list = generate_random_date(str(latest_event_time),
                                         len(all_events))
        random.shuffle(date_list)
        new_data = list(zip(all_events, date_list))
        new_data.sort(key=lambda obj: obj[-1])

        exist_product_site_id = []
        for data in new_data:
            if data[0].ProductEvent.product_site_id not in exist_product_site_id:
                session.query(ProductEvent).filter(
                    ProductEvent.event_id ==
                    data[0].ProductEvent.event_id).update({
                        ProductEvent.event_time:
                        data[-1],
                        ProductEvent.event_type:
                        'add'
                    })
                session.commit()
                exist_product_site_id.append(
                    data[0].ProductEvent.product_site_id)
            else:
                session.query(ProductEvent).filter(
                    ProductEvent.event_id ==
                    data[0].ProductEvent.event_id).update({
                        ProductEvent.event_time:
                        data[-1],
                        ProductEvent.event_type:
                        'update'
                    })
                session.commit()

    session.close()