Пример #1
0
def get_result_task(count_page, url_task):
    """
    Получаем данные для одного задания (ссылки со всеми пагинациями)
    :param count_page:
    :param url_task:
    :return:
    """
    next_pagination = True
    result = []
    for i in range(1, int(count_page) + 1):
        if next_pagination:  # Проверяем нужно ли парсить следующие страницы
            log.info('Parsing page# ' + str(i) + ' of ' + str(count_page))
            page_url = url_task + '&p=' + str(i)
            try:
                page_data = get_page_data(page_url, 1)
            except:
                page_data = [], True
                error_message = 'Error get_page_data' + '\n ' + page_url
                text_handler(EXEPTION_CHAT, error_message)
                log.error(error_message)

            result += page_data[0]
            next_pagination = page_data[1]
            time_sleep()
        else:
            break
    return result
Пример #2
0
def send_mes_to_bot(item):
    """
    Подготавливаем текст для телеграм бота
    :param item:
    :return:
    """
    from main import log

    first_row = ''  # ID
    second_row = ''  # Name
    third_row = ''  # price
    fours_row = ''  # price_history
    five_row = ''  # address
    six_row = ''  # params
    seven_row = ''  # url

    if item['type_update'] == 'update':
        if item['item_price'] >= [(item['sql_price'], )]:
            first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + '  ' + \
                        emoji_down + emoji_down + emoji_top_green + '\n\n'

            third_row = 'Старая цена = ' + str(num_conversion(item['old_price'])) + ' руб. /' + \
                        ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n'
        else:
            first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + '  ' + \
                        emoji_top + emoji_top + emoji_down_red + '\n\n'

            third_row = 'Старая цена = ' + str(num_conversion(item['item_price'][0][0])) + ' руб. /'+ \
                        ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n'

        fours_row = 'Изменения цен \n' + str(item['price_history_srt']) + '\nРазница: ' + \
                    item['difference_price'] + ' (' + item['percent_difference_price'] + '%)\n\n'

    elif item['type_update'] == 'new':
        first_row = 'Новое объявление ' + str(item['sql_avito_id']) + '\n\n'
        third_row = 'Цена: ' + str(item['sql_price']) + ' руб.\n\n'
    else:
        log.error('type_update = NONETYPE ' + str(item['sql_avito_id']))
    second_row = str(item['sql_name']) + '\n\n'
    five_row = 'Адрес: ' + str(item['sql_address']) + '\n\n'
    six_row = 'Параметры: ' + str(item['sql_params']) + '\n\n'
    seven_row = 'Ссылка ' + str(item['sql_url']) + '\n\n'
    none_type_of = [
        'Личные вещи', 'Работа', 'Для дома и дачи', 'Предложение услуг',
        'Электроника', 'Животные', 'Готовый бизнес и оборудование'
    ]
    if item['sql_type_of'] == 'Недвижимость':
        mes_to_bot = first_row + third_row + fours_row + five_row + seven_row
    elif item['sql_type_of'] == 'Транспорт':
        mes_to_bot = first_row + second_row + third_row + fours_row + six_row + seven_row
    elif item['sql_type_of'] == 'Хобби и отдых':
        mes_to_bot = first_row + second_row + third_row + fours_row + five_row + six_row + seven_row
    elif item['sql_type_of'] in none_type_of:
        mes_to_bot = first_row + second_row + third_row + fours_row + seven_row
    else:
        log.error('sql_type_of = NONETYPE ' + str(item['sql_avito_id']))
        mes_to_bot = 'sql_type_of = NONETYPE ' + str(item['sql_avito_id'])
    text_handler(item['sql_chat'], mes_to_bot)
Пример #3
0
def get_soup_from_page(page_url, count_try):
    """
    Получаем SOUP для любой страницы
    :param page_url:
    :param count_try:
    :return:
    """
    session = get_session()
    r = session.get(page_url)
    next_parsing = True
    if r.status_code == 403:
        error_message = 'Error: ' + str(
            r.status_code) + ' \nTime to sleep. Exit.'
        text_handler(EXEPTION_CHAT, error_message)
        log.error(error_message)
        soup = None
        next_parsing = False
    elif r.status_code == 429 and count_try < 2:
        error_message = 'Error: ' + str(
            r.status_code
        ) + ' \nToo many request. Sleep 10min. \nTry № ' + str(
            count_try) + '\n' + str(page_url)
        text_handler(EXEPTION_CHAT, error_message)
        log.error(error_message)
        time.sleep(600)
        soup = get_soup_from_page(page_url, count_try + 1)
    elif r.status_code == 429 and count_try < 4:
        error_message = 'Error: ' + str(
            r.status_code
        ) + ' \nToo many request. Sleep 15min. \nTry № ' + str(
            count_try) + '\n' + str(page_url)
        text_handler(EXEPTION_CHAT, error_message)
        log.error(error_message)
        time.sleep(900)
        soup = get_soup_from_page(page_url, count_try + 1)
    elif r.status_code != 200 and count_try < 4:
        error_message = 'Error: ' + str(
            r.status_code) + ' Try № ' + str(count_try) + '\n' + str(page_url)
        text_handler(EXEPTION_CHAT, error_message)
        log.error(error_message)
        time_sleep(get_random_time())
        soup = get_soup_from_page(page_url, count_try + 1)
    elif count_try > 4:
        error_message = 'Error: ' + str(r.status_code) + ' Try ended'
        text_handler(EXEPTION_CHAT, error_message)
        log.warn(error_message)
        soup = None
    else:
        soup = BeautifulSoup(r.text, 'html.parser')
    return soup, next_parsing
Пример #4
0
def get_count_page(soup, url_task):
    """
    Получаем список страниц пагинации
    :param soup:
    :param url_task:
    :return:
    """
    try:
        pagination = soup.find('div', {"data-marker": "pagination-button"})
        pagination.find('span', {"data-marker": "pagination-button/prev"}).decompose()
        pagination.find('span', {"data-marker": "pagination-button/next"}).decompose()
        count_page = pagination.find_all('span')[-1].text
    except:
        count_page = 1
        error_message = 'Error pagination' + '\n ' + url_task
        text_handler(EXEPTION_CHAT, error_message)
        log.error(error_message)
    return count_page
Пример #5
0
def get_page_rows(soup, type_of):
    """
    Получаем таблицу с объявлениями
    :param soup:
    :param type_of:
    :return:
    """
    table = soup.find('div', {"data-marker": "catalog-serp"})

    if table:  # Удаляем рекламные блоки
        if table.find('div', {"data-marker": "witcher/block"}):
            table.find('div', {"data-marker": "witcher/block"}).decompose()
        rows = table.find_all('div', {"data-marker": "item"})
        result = get_item_data(rows, type_of)

    else:
        error_message = 'Error not table' + str(soup) + str(table)
        log.error(error_message)
        text_handler(EXEPTION_CHAT, 'Error not table// Check LOGS')
        result = []
    return result
Пример #6
0
def get_page_data(page_url, count_try):
    """
    Получаем страницу с объявлениями
    :param page_url:
    :param count_try:
    :return:
    """
    next_pagination = True
    soup = get_soup_from_page(page_url, count_try)
    result = []
    if not soup[1]:
        error_message = 'Next parsing none ' + str(page_url)
        log.error(error_message)
        text_handler(EXEPTION_CHAT, error_message)
        return result, False

    if not soup[0]:
        error_message = 'Soup is None ' + str(page_url)
        log.error(error_message)
        text_handler(EXEPTION_CHAT, error_message)
        return result, False

    try:
        type_of = soup[0].find('div', {"data-marker": "breadcrumbs"}).find_all('span', {"itemprop": "itemListElement"})[
            1].find('a').text
    except:
        type_of = 'None Type'
        log.warn('type_of = None Type')

    if soup[0].find_all('div', attrs={"class": re.compile(r"items-items")}):
        if len(soup[0].find_all('div', attrs={"class": re.compile(r"items-items")})) > 1:
            log.warn('Found another offers | Break pagination ' + str(page_url))
            next_pagination = False
    try:
        result = get_page_rows(soup[0], type_of)
    except:
        error_message = 'Error get_page_rows' + '\n ' + page_url
        text_handler(EXEPTION_CHAT, error_message)
        log.error(error_message)
    return result, next_pagination
Пример #7
0
def write_sqlite3(url):
    """
    Записываем данные в SQLite
    :param url:
    :return:
    """
    items = []
    sql_city = url[1][0]
    sql_chat = url[1][1]
    sql_urls_id = url[1][2]
    conn = sqlite3.connect(ROUTE_DB)
    with conn:
        cur = conn.cursor()
        cur.execute('UPDATE offers SET status=0 WHERE urls_id=?',
                    (sql_urls_id, ))  # Обнуляем у всех объявлений статус
        for i in range(0, len(url[0])):
            if url[0][i] is not None:
                sql_avito_id = url[0][i]['avito_id']
                sql_name = url[0][i]['name']
                sql_price = url[0][i]['price']
                sql_address = url[0][i]['address']
                sql_url = url[0][i]['url']
                sql_type_of = url[0][i]['type_of']
                sql_params = url[0][i]['params']

                price_history = []
                price_now = {
                    "data": str(get_date_time()),
                    "price": str(sql_price)
                }

                cur.execute('SELECT avito_id FROM offers WHERE avito_id=?',
                            (sql_avito_id, ))

                item_id = cur.fetchall()
                if item_id == [
                    (sql_avito_id, )
                ]:  # Ищем ID в бд, и если не находим то пишем сообщение в телегу
                    cur.execute('SELECT price FROM offers WHERE avito_id=?',
                                (sql_avito_id, ))

                    item_price = cur.fetchall()
                    old_price = item_price[0][0]

                    cur.execute(
                        'SELECT price_history FROM offers WHERE avito_id=?',
                        (sql_avito_id, ))

                    price_history = json.loads(cur.fetchall()[0][0])
                    price_history.append(price_now)
                    price_history_dumps = json.dumps(price_history)

                    price_history_srt = ''

                    start_count = 0
                    if len(price_history) > 0:
                        if len(price_history) > 9:
                            start_count = len(price_history) - 9
                        for i in range(start_count, len(price_history)):
                            if i == 0:
                                price_history_srt = price_history_srt + \
                                                    'Дата: ' + int(price_history[i]['data']) + '  ' + \
                                                    'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб.\n'
                            else:
                                percent_price_history = calculation_percent(
                                    int(price_history[i - 1]['price']),
                                    int(price_history[i]['price']))
                                price_history_srt = price_history_srt + \
                                                    'Дата: ' + int(price_history[i]['data']) + '  ' + \
                                                    'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб.  ' + \
                                                    '(' + percent_price_history + '%)\n'

                        difference_price = calculation_different_price(
                            int(price_history[0]['price']),
                            int(price_now['price']))
                        percent_difference_price = calculation_percent(
                            int(price_history[0]['price']),
                            int(price_now['price']))

                    if item_price == [
                        (sql_price, )
                    ]:  # Сравниваем цены, и если есть отличие то обновляем их
                        cur.execute(
                            "UPDATE offers SET status=1, updated_date=?,urls_id=?, type_of=?, params=? WHERE avito_id=?",
                            (str(get_date_time()), sql_urls_id, sql_type_of,
                             sql_params, sql_avito_id))
                        continue
                    else:
                        items.append({
                            'item_price': item_price,
                            'sql_chat': sql_chat,
                            'sql_avito_id': sql_avito_id,
                            'sql_name': sql_name,
                            'old_price': old_price,
                            'sql_price': sql_price,
                            'price_history_srt': price_history_srt,
                            'difference_price': difference_price,
                            'percent_difference_price':
                            percent_difference_price,
                            'sql_address': sql_address,
                            'sql_url': sql_url,
                            'sql_params': sql_params,
                            'sql_type_of': sql_type_of,
                            'type_update': 'update'
                        })

                        cur.execute(
                            "UPDATE offers SET price=?, old_price=?, updated_date=?, price_history=?, status=1, urls_id=?, type_of=?, params=? WHERE avito_id=?",
                            (sql_price, old_price, str(get_date_time()),
                             str(price_history_dumps), sql_urls_id,
                             sql_type_of, sql_params, sql_avito_id))
                        log.info('Price update | ' + str(sql_avito_id))

                else:
                    items.append({
                        'item_price': None,
                        'sql_chat': sql_chat,
                        'sql_avito_id': sql_avito_id,
                        'sql_name': sql_name,
                        'old_price': None,
                        'sql_price': sql_price,
                        'price_history_srt': None,
                        'difference_price': None,
                        'percent_difference_price': None,
                        'sql_address': sql_address,
                        'sql_url': sql_url,
                        'sql_params': sql_params,
                        'sql_type_of': sql_type_of,
                        'type_update': 'new'
                    })
                    log.info('No ID -> New Offer | ' + str(sql_avito_id))

                    price_history.append(price_now)
                    price_history_dumps = json.dumps(price_history)
                    cur.execute(
                        "INSERT OR IGNORE INTO offers ('avito_id','name','price','price_history','address','url','created_date','updated_date','status','city','urls_id','type_of','params') VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
                        (sql_avito_id, sql_name, sql_price,
                         str(price_history_dumps), sql_address, sql_url,
                         str(get_date_time()), str(get_date_time()), 1,
                         sql_city, sql_urls_id, sql_type_of, sql_params))
            else:
                error_message = 'Error: write Sql_item, item is None ' + str(
                    sql_urls_id)
                text_handler(EXEPTION_CHAT, error_message)
                log.error(error_message)
        parse_items_to_send(items)
    conn.commit()
    conn.close()