示例#1
0
def __crawl(driver):
    while True:
        while len(subHtmlUrlQueue) > 0:
            url = subHtmlUrlQueue.popleft()
            if url in visitedUrl:
                continue
            try:
                visitedUrl[url] = 1
                logger.info("visit page %s" % url)
                driver.get(url)
                WebDriverWait(
                    driver,
                    20).until(lambda x: x.find_elements_by_tag_name("script"))
                driver.switch_to.frame("contentFrame")
                atags = driver.find_elements_by_tag_name("a")
                hrefs = [a.get_attribute("href") for a in atags]
                links = filter_link(hrefs)
                song_links = get_song_link(links)
                logger.debug("get song_links %s" % len(song_links))
                notsong_links = get_nonsong_link(links)
                songUrlQueue.extend(song_links)
                subHtmlUrlQueue.extend(notsong_links)
            except Exception as e:
                logger.error("", exc_info=True)
                continue
        else:
            logger.info("empty page queue")
            time.sleep(1)
示例#2
0
def fetch_yahoo_responses() -> List[Tuple]:
    tickers: List[List] = []
    for model in [IncomeStatement, BalanceSheetStatement, CashFlowStatement]:
        tickers.append(
            fetch_isins_not_updated_financials(model))  # type: ignore
    tickers_unique: List[Tuple] = union_of_list_elements(*tickers)
    logger.info('Fetching financials from %s stocks' % len(tickers_unique))
    responses: List[Tuple[Any, ...]] = []
    for ticker_tuple in tickers_unique:
        if len(ticker_tuple) == 2:
            isin: str = ticker_tuple[0]
            yahoo_ticker: str = ticker_tuple[1]
            try:
                response = fetch_yahoo_data(
                    yahoo_ticker,
                    'balanceSheetHistory,incomeStatementHistory,cashflowStatementHistory'
                )
                logger.info('Succeeded getting ticker, isin: %s, %s' %
                            (yahoo_ticker, isin))
            except Exception:
                logger.error(
                    'Something went wrong getting ticker, isin: %s, %s' %
                    (yahoo_ticker, isin))
                logger.error(format_exc())
                continue
            responses.append((response, isin))
        else:
            continue
    return responses
示例#3
0
def session_scope():
    try:
        session = Session()
        yield session
        session.commit()
    except Exception as e:
        logger.error("", exc_info=True)
        session.rollback()
    finally:
        session.close()
 def load_data(data: List[Base]) -> None:
     if len(data) > 0:
         session = Session()
         for idx, record in enumerate(data):
             try:
                 session.merge(record)
             except Exception:
                 logger.info('Something went wrong: %s' % record)
                 logger.error(format_exc())
                 continue
             logger.debug(record)
             if idx > 0 and idx % 100 == 0:
                 session.commit()
                 logger.info('Chunked commit at %s records' % idx)
         session.commit()
         logger.info('Chunked commit at %s records' % idx)
         session.close()
     else:
         logger.info('No data to load')
示例#5
0
def crawl_songs():
    while True:
        while len(songUrlQueue) > 0:
            song_link = songUrlQueue.popleft()
            if song_link in visitedUrl:
                continue
            logger.info("visit song %s" % song_link)
            try:
                visitedUrl[song_link] = 1
                sid = song_link[song_link.find("=") + 1:]
                if songService.is_existed(sid):
                    logger.info("%s has existed" % sid)
                    continue
                info = extract.getSongInfo(song_link, song_driver)
                if not info:
                    continue
                songService.add(info)
            except Exception as e:
                logger.error("", exc_info=True)
                continue
        else:
            logger.info("empty song queue")
            time.sleep(1)
示例#6
0
 def execute(*args, **kws):
     try:
         return realf(*args, **kws)
     except Exception as e:
         logger.error(repr(e), exc_info=True)
     return None