def makeDataFrame():
    crawler = NaverCrawler.create(targetName='KPI200')
    date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    kospi200 = crawler.crawling(dateData=date)
    kospi200Close = {
        NaverDate.formatDate(item.date): item.close
        for item in kospi200
    }

    worldDate = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    worldCrawler = NaverWorldCrawler.create(targetName='SPI@SPX')
    sp500 = worldCrawler.crawling(dateData=worldDate)

    sp500Close = {
        NaverDate.formatDate(item.date): item.close
        for item in sp500
    }

    data = {'S&P500': sp500Close, 'KOSPI200': kospi200Close}
    df = pd.DataFrame(data)
    df = df.fillna(method='ffill')
    if df.isnull().values.any():
        df = df.fillna(method='bfill')
    return df
def k10FromDate():
    prices = dict()
    top10 = topK(10)
    date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    for code in top10:
        crawler = NaverStockCrawler.create(code)
        data = crawler.crawling(date)
        prices[code] = {
            NaverDate.formatDate(item.date): item.close
            for item in data
        }
    df = pd.DataFrame(prices)
    df = df.fillna(method='ffill')
    if df.isnull().values.any():  #그래도 구멍이 남아 있으면
        df = df.fillna(method='bfill')
    return df
def showGraphK10KOSPI200():
    k10_price = k10FromDate()
    k10_info = makeK10()
    k10_historical_mc = k10_price * k10_info['Outstanding'] * k10_info[
        'Floating']
    k10 = pd.DataFrame()
    k10['k10 Market Cap'] = k10_historical_mc.sum(axis=1)
    k10['k10'] = k10['k10 Market Cap'] / k10['k10 Market Cap'][0] * 100

    crawler = NaverCrawler.create(targetName='KPI200')
    date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    kospi200 = crawler.crawling(dateData=date)
    kospi200Close = {
        NaverDate.formatDate(item.date): item.close
        for item in kospi200
    }
    k200 = pd.DataFrame({'K200': kospi200Close})

    plt.figure(figsize=(10, 5))
    plt.plot(k10['k10'])
    plt.plot(k200['K200'] / k200['K200'][0] * 100)
    plt.legend(loc=0)
    plt.grid(True, color='0.7', linestyle=':', linewidth=1)
示例#4
0
    def crawling(self, dateData=''):
        driver = webdriver.PhantomJS(
            'C:/Users/lsj/Downloads/phantomjs-2.1.1-windows/bin/phantomjs.exe')
        driver.get(self.makeWorldUrl())
        data = []
        pageNo = '1'
        isRunning = True
        while (isRunning):
            elePage = driver.find_element_by_link_text(pageNo)
            if not elePage:
                break
            elePage.click()
            pageNo = elePage.text

            text = driver.page_source
            soup = bs4.BeautifulSoup(text, 'lxml')
            table = soup.find(class_='tb_status2 tb_status2_t2').find('tbody')
            rows = filter(lambda val: type(val) == bs4.element.Tag,
                          table.children)
            #자식들에서 td태그를 찾음
            tds = map(lambda row: row.find_all('td'), list(rows))
            #1차원으로 변경
            flattenTds = list(itertools.chain(*tds))
            #없는 자식들 제거
            tdsf = filter(lambda td: type(td) == bs4.element.Tag, flattenTds)
            #텍스트 추출
            values = map(lambda value: value.stripped_strings, tdsf)
            #1차원으로 변경
            strings = list(itertools.chain(*values))
            #6개씩 자름
            splitData = [strings[i:i + 6] for i in range(0, len(strings), 6)]
            for one in splitData:
                date = NaverDate.formatDate(date=one[0])
                # print(dateData.startDate)
                print(date)
                # print(dateData.endDate)
                if dateData.startDate <= date and date <= dateData.endDate:
                    resultData = NaverWorldResultData.create(date=one[0],
                                                             close=one[1],
                                                             diff=one[2],
                                                             open=one[3],
                                                             high=one[4],
                                                             low=one[5])
                    data.append(resultData)
                elif dateData.startDate > date:
                    isRunning = False
                    break
            # print('pageNo:' + str(pageNo))
            # for value in data:
            # print(value)
            # print(data)
            eleNext = driver.find_elements_by_css_selector('#dayPaging .next')
            nextPageNo = str(int(pageNo) + 1)
            print(nextPageNo)
            if len(eleNext) > 0 and int(pageNo) % 10 == 0:
                eleNext[0].click()
                wait = WebDriverWait(driver, 10)
                wait.until(
                    EC.presence_of_element_located((By.LINK_TEXT, nextPageNo)))
                driver.implicitly_wait(1)
            pageNo = nextPageNo
            if len(driver.find_elements_by_link_text(pageNo)) == 0:
                break
        driver.close()
        return data