def load_upcoming_events(self, ts=None):
        
        logger.info('Load youtube upcoming events for detail parsing (online_ts and cover)') 
        cnx = mysql.connector.connect(**db)
        cursor = cnx.cursor(dictionary=True, buffered=True)
        select_sql = "SELECT id, web_url from crawled_lives " \
                     " where list_crawler_ts > %(crawl_query_ts)s AND " \
                     " online_ts is NULL AND" \
                     " site = 'youtube'"
    
        today = datetime.datetime.now()
        param = {
            "crawl_query_ts":get_query_ts(today - datetime.timedelta(days=2)),
            "online_query_ts":get_query_ts(today),
        }
        try:
            cursor.execute(select_sql, param)
            logger.info(cursor.statement)
    
            all_upcoming_events = list(cursor.fetchall())

            logger.info('Load %d youtube upcoming events from DB for detail parsing.', len(all_upcoming_events))
            return all_upcoming_events
        except:
            logger.error('error! SQL=%s', cursor.statement, exc_info=True)
        finally:
            cnx.close()        
    def _load_existing_upcoming_events(self, referer_url, ts=None):
        """Load existing events of livestream for filtering"""

        logger.info("Load existing upcoming events with referer_url %s for filtering.", referer_url)

        cnx = mysql.connector.connect(**db)
        cursor = cnx.cursor(dictionary=True, buffered=True)

        select_sql = (
            "SELECT web_url from crawled_lives "
            " where list_crawler_ts > %(crawl_query_ts)s AND "
            " online_ts is NULL or online_ts > %(online_query_ts)s AND"
            " site = 'livestream' AND "
            " referer = %(referer_url)s"
        )

        today = datetime.datetime.now()

        param = {
            "crawl_query_ts": get_query_ts(today - datetime.timedelta(days=2)),
            "online_query_ts": get_query_ts(today),
            "referer_url": referer_url,
        }

        try:
            cursor.execute(select_sql, param)
            all_upcoming_events = cursor.fetchall()
            logger.info(
                "Load %d existing upcoming events from DB with referer_url %s.", len(all_upcoming_events), referer_url
            )
            return set([event["web_url"] for event in all_upcoming_events])
        except:
            logger.error("Error while executing SQL=%s", cursor.statement, exc_info=True)
        finally:
            cnx.close()
 def update_db(self, live_events):
     
     logger.info('Update youtube upcoming events with (online_ts and cover)') 
     cnx = mysql.connector.connect(**db)
     cursor = cnx.cursor(buffered=True)
     update_sql = "UPDATE crawled_lives " \
                  " SET " \
                  " detail_crawler=%(detail_crawler)s, " \
                  " detail_crawler_ts=%(detail_crawler_ts)s, " \
                  " online_ts=%(online_ts)s, " \
                  " cover_url=%(cover_url)s, " \
                  " cover=%(cover)s, " \
                  " owner_avatar = %(avatar)s, " \
                  " category = %(category)s, " \
                  " description = %(description)s, " \
                  " more_info = %(more_info)s " \
                  " WHERE web_url = %(web_url)s "
     to_update = {
         "detail_crawler":"youtube_detail",
         "detail_crawler_ts":get_query_ts(),
     }
 
     try:
         for event in live_events:
             event.update(to_update)
             event['cover'] = fetch_image(event['cover_url']) if event['cover_url'] else None
             event['avatar'] = fetch_image(event['avatar_url']) if event['avatar_url'] else None
             cursor.execute(update_sql, event)
             logger.debug('event(%s) is updated.' % (event['web_url'], ))
         cnx.commit()
     except:
         logger.error("error! SQL=" + cursor.statement, exc_info=True)
     finally:
         cnx.close()
    def _parse_event_detail(self, event_list):
        """
        Parse upcoming event detail.
        
        Livestream event detail page has two templates.
        """

        sub_browser = webdriver.Firefox()

        valid_event_list = []
        for event in event_list:
            try:
                sub_browser.get(event["web_url"])

                if not self._is_region_available(sub_browser):
                    logger.info("web_url %s not region available", event["web_url"])
                    continue

                if self._required_login(sub_browser):
                    logger.info("web_url %s require login", event["web_url"])
                    continue

                if self._required_password(sub_browser):
                    logger.info("web_url %s require password", event["web_url"])
                    continue

                if not self._is_sepecial_page(sub_browser):
                    logger.info("web_url %s belong to old template", event["web_url"])

                    WebDriverWait(sub_browser, 5).until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, "div#event-meta"))
                    )
                    sub_event = getattr(sub_browser, ED_EXTRACTOR["sub_event"][0])(ED_EXTRACTOR["sub_event"][1])
                    category_elem = getattr(sub_event, ED_EXTRACTOR["sub_event_category"][0])(
                        ED_EXTRACTOR["sub_event_category"][1]
                    )
                    category = category_elem.text

                    sub_owner_elem = getattr(sub_browser, ED_EXTRACTOR["sub_event_owner"][0])(
                        ED_EXTRACTOR["sub_event_owner"][1]
                    )
                    avatar_url = sub_owner_elem.get_attribute("src")

                    cover_elem = getattr(sub_browser, ED_EXTRACTOR["sub_event_cover"][0])(
                        ED_EXTRACTOR["sub_event_cover"][1]
                    )
                    cover_url = cover_elem.get_attribute("src")

                    number_events_elem = getattr(sub_browser, ED_EXTRACTOR["sub_event_owner_events"][0])(
                        ED_EXTRACTOR["sub_event_owner_events"][1]
                    )
                    number_events = number_events_elem.text

                    number_followers_elem = getattr(sub_browser, ED_EXTRACTOR["sub_event_owner_follower"][0])(
                        ED_EXTRACTOR["sub_event_owner_follower"][1]
                    )
                    number_followers = number_followers_elem.text

                else:
                    logger.info("web_url %s belong to new template", event["web_url"])

                    info = sub_browser.find_element_by_xpath('//a[@ng-if="enable_drawers_embed"]')
                    info.click()
                    sub_browser.implicitly_wait(5)

                    WebDriverWait(sub_browser, 5).until(
                        EC.presence_of_element_located(
                            (
                                By.XPATH,
                                '//div[@class="event_date_category"]/a[@class="event_category ng-binding ng-scope"]',
                            )
                        )
                    )

                    category_elem = sub_browser.find_element_by_xpath(
                        '//div[@class="event_date_category"]/a[@class="event_category ng-binding ng-scope"]'
                    )
                    category = category_elem.text.strip()

                    sub_owner_elem = sub_browser.find_element_by_xpath('//a[@class="owner_avatar"]/img')
                    avatar_url = sub_owner_elem.get_attribute("src")

                    events_followers_elem = sub_browser.find_elements_by_xpath(
                        '//div[@class="account_details"]/pluralize-with-html/span'
                    )
                    number_events = events_followers_elem[0].text
                    number_followers = events_followers_elem[1].text

                    cover_elem = sub_browser.find_element_by_xpath('//div[@class="event_poster_wrapper ng-scope"]')
                    cover_url = cover_elem.get_attribute("style")
                    m = re.search(r'"//(img.*)"', cover_url)
                    if m:
                        cover_url = r"http://" + m.group(1)
                    else:
                        cover_url = None

                event.update(
                    {
                        "cover_url": cover_url,
                        "owner_avatar_url": avatar_url,
                        "site": "livestream",
                        "category": category,
                        "list_crawler": "livestream_live",
                        "detail_crawler": "livestream_live",
                        "detail_crawler_ts": get_query_ts(),
                        "more_info": json.dumps(
                            {
                                "events": int(re.sub("[, ]", "", number_events)),
                                "followers": int(re.sub("[, ]", "", number_followers)),
                            }
                        ),
                    }
                )
                logger.info("Successfully parse event %s with url %s", event["title"], event["web_url"])
                valid_event_list.append(event)

            except Exception, e:
                logger.error(e.message, exc_info=True)