def get_share_code_from_naver(): from scraping.web_scraping import create_soup from openpyxl import Workbook import time wb = Workbook() ws = wb.active ws.title = "네이버 업종 종목코드" ws.append(['종목코드', '종목명', '업종코드', '업종명']) # 업종코드 딕셔너리 mk_code_dict = get_market_code_from_naver() # key = 업종명, value = 업종코드 for key, value in mk_code_dict.items(): print(f"Start {value} Category") # time.sleep(2) url = f'https://finance.naver.com/sise/sise_group_detail.nhn?type=upjong&no={value}' soup = create_soup(url) table = soup.select('#contentarea > div.box_type_l')[1].select( 'tbody > tr > td.name') for line in table: share_name = line.text share_code = line.a['href'].replace("/item/main.nhn?code=", "") # 마켓과 종목 코드를 엑셀파일로 저장 ws.append([share_code, share_name, value, key]) wb.save("stock_data\\종목코드.xlsx") print("Total Code file Completed")
def get_theme_share_info(): import csv import time import datetime from scraping.web_scraping import create_soup print("Start get share code") print(time.strftime("%Y-%m-%d %H:%M:%S")) csv_file = open( f"stock_data\\theme_share_info_{datetime.date.today()}.csv", "w", encoding='utf-8-sig', newline="") csv_writer = csv.writer(csv_file) # 헤더 header = (['테마명', '테마코드', '종목명', '종목코드', '종목정보', '종목링크']) csv_writer.writerow(header) # 테마 코드 리스트 th_code_list = get_theme_code_from_naver() # th_sh_info = [] # 테마명, 테마코드, 테마설명, 종목명, 종목코드, 종목 설명 리스트화 for th_code in th_code_list: print(f"theme code: {th_code}") # time.sleep(3) url = f'https://finance.naver.com/sise/sise_group_detail.nhn?type=theme&no={th_code}' soup = create_soup(url) # 테마명, 테마 설명 가져오기 th_name = soup.select_one( '#contentarea_left > table > tr > td > div > div > strong' ).get_text() th_info = soup.select( '#contentarea_left > table > tr > td')[1].p.get_text() # th_sh_info.append([th_name, th_code, '', '', th_info]) # 리스트에 추가 # ws.append([th_name, th_code, '', '', th_info]) # 엑셀에 저장 csv_writer.writerow([th_name, str(th_code), '', '', th_info]) # 테마별 종목명, 종목코드 sh_data = soup.find('tbody').select('tr')[:-2] for tag in sh_data: # sh_link = "https://finance.naver.com/" + tag.select('td')[0].a['href'] sh_name = tag.select('td')[0].text sh_code = tag.select('td')[0].a['href'].replace( "/item/main.nhn?code=", "") sh_info = tag.select('td')[1].p.text sh_link = "https://finance.naver.com/item/main.nhn?code=" + sh_code # th_sh_info.append([th_name, th_code, sh_name, sh_code, sh_info, sh_link]) # 리스트에 추가 # ws.append([th_name, th_code, sh_name, sh_code, sh_info, sh_link]) # 엑셀에 저장 csv_writer.writerow([ th_name, str(th_code), sh_name, str(sh_code), sh_info, sh_link ]) csv_file.close() print("Share code done") print(time.strftime("%Y-%m-%d %H:%M:%S"))
def get_etn_code_from_wise(): from scraping.web_scraping import create_soup soup = create_soup("http://comp.wisereport.co.kr/ETF/lookup.aspx") table = soup.select_one('table.table').tbody.select('tr') etn_code = [] for tag in table: td = tag.select('td') if td[0].text == "ETN": etn_code.append(td[1].text.strip()) # else: # etf_list.append([data.text.strip() for data in td]) print("ETN code done") return etn_code
def get_market_code_from_naver(): from scraping.web_scraping import create_soup soup = create_soup( 'https://finance.naver.com/sise/sise_group.nhn?type=upjong') table = soup.select('table.type_1 > tr > td > a') # 기타 항목 제거 위해 딕셔너리로 반환 mk_code_dict = {} for line in table: market_name = line.text market_code = line['href'].replace( "/sise/sise_group_detail.nhn?type=upjong&no=", "") mk_code_dict[market_name] = market_code del mk_code_dict['기타'] print(mk_code_dict) print('mk_code_dict completed') # mk_code_dict를 리턴 {업종명: 업종코드} return mk_code_dict
def get_theme_code_from_naver(): import time from scraping.web_scraping import create_soup theme_code_list = [] # 테마 코드 리스트로 저장 print("Start get share code") for page in range(1, 7): print(f"On page : {page}") url = f"https://finance.naver.com/sise/theme.nhn?&page={page}" soup = create_soup(url) theme_data = soup.select_one("table.type_1").select("td.col_type1") for line in theme_data: theme_code = line.select_one("a")["href"].replace( "/sise/sise_group_detail.nhn?type=theme&no=", "") theme_code_list.append(theme_code) print("Theme code done") # 테마 코드 리스트를 리턴 return theme_code_list
def get_theme_fluctuation(): import csv import time import datetime from scraping.web_scraping import create_soup csv_file = open( f"stock_data\\테마데일리등락률\\theme_fluctuation_{datetime.date.today()}.csv", "w", encoding='utf-8-sig', newline="") csv_writer = csv.writer(csv_file) # period = datetime.date.today() # 헤더 header = (['테마명', '테마코드', '전일대비 등락률']) csv_writer.writerow(header) # 테마명 테마코드 전일대비등락률 print("데일리 테마별 등락률 스크랩 시작") for page in range(1, 7): print(f"On page : {page}") # 네이버 금융 테마별 시세 url = f"https://finance.naver.com/sise/theme.nhn?&page={page}" time.sleep(1.5) soup = create_soup(url) theme_data = soup.select("td.col_type1") flucs = soup.select("td.col_type2") for i in range(len(theme_data)): theme_name = theme_data[i].text.strip() theme_code = theme_data[i].a["href"].replace( "/sise/sise_group_detail.nhn?type=theme&no=", "") fluc = flucs[i].text.strip().replace("+", "").replace("%", "") print(theme_name, theme_code, fluc) print(i) csv_writer.writerow([theme_name, theme_code, fluc]) print("테마별 등락률 완료") print()
def get_market_fluctuation(): import csv import datetime from scraping.web_scraping import create_soup csv_file = open( f"stock_data\\업종데일리등락률\\market_fluctuation_{datetime.date.today()}.csv", "w", encoding='utf-8-sig', newline="") csv_writer = csv.writer(csv_file) # period = datetime.date.today() # 헤더 header = (['업종명', '업종코드', '전일대비 등락률']) csv_writer.writerow(header) # 종목코드 종목명 전일대비등락률 print("데일리 업종별 등락률 스크랩 시작") # 네이버 금융 업종 url = "https://finance.naver.com/sise/sise_group.nhn?type=upjong" soup = create_soup(url) theme_data = soup.select_one("table.type_1").select("tr")[3:] for line in theme_data: td = line.select('td') if len(td) <= 1: # 의미없는 자료 제거 continue market_name = td[0].text.strip() market_code = td[0].a["href"].replace( "/sise/sise_group_detail.nhn?type=upjong&no=", "") fluc = td[1].text.strip().replace('%', '').replace('+', '') # link = "https://finance.naver.com/sise/sise_group_detail.nhn?type=theme&no=" + market_code csv_writer.writerow([market_name, market_code, fluc]) print("업종별 등락률 완료") print()
def get_etf_info_from_naver(): import time import datetime import csv from scraping.web_scraping import create_soup from selenium import webdriver from bs4 import BeautifulSoup # # CSV csv_file = open(f'stock_data\\etf_{datetime.date.today()}.csv', 'w', encoding='utf-8-sig', newline='') csv_writer = csv.writer(csv_file) csv_writer.writerow([ 'ETF이름', '링크', 'ETF코드', '운용사', '수수료', '시가총액(억 원)', '수익률', '', '', '', '구성종목 TOP 10' ]) csv_writer.writerow([ '', '', '', '', '', '', '1개월', '3개월', '6개월', '1년', '1위', '', '2위', '', '3위', '', '4위', '', '5위', '', '6위', '', '7위', '', '8위', '', '9위', '', '10위', '' ]) # ETF 코드 etf_code = get_etf_code_from_wise() # 헤드리스 셀레니움 options = webdriver.ChromeOptions() options.headless = True options.add_argument("window-size=1920x1080") options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36" ) driver = webdriver.Chrome(options=options) print(f"Get ETF info :") print(time.strftime("%Y-%m-%d %H:%M:%S")) for code in etf_code: print(f"Start : {code}") url = f'https://finance.naver.com/item/coinfo.nhn?code={code}' driver.get(url) driver.implicitly_wait(3) soup = create_soup(url) # 이름, 링크, 운용사, 수수료 etf_name = soup.select_one( '#middle > div.h_company > div.wrap_company > h2 > a').text etf_link = f'https://finance.naver.com/item/coinfo.nhn?code={code}' company = soup.select_one('#tab_con1').select_one( 'div:nth-child(4)').select('td')[1].text commission = soup.select_one('#tab_con1').select_one( 'div:nth-child(4)').select('td')[0].em.text.replace('%', '') each_etf_info = [etf_name, etf_link, str(code), company, commission] # 1차 리스트 저장 # iframe으로 이동 driver.switch_to.frame('coinfo_cp') time.sleep(1) soup_iframe = BeautifulSoup(driver.page_source, "lxml") # 시가총액 capital = soup_iframe.select_one('#status_grid_body').select( 'tr')[5].td.text.replace(',', '')[:-2] each_etf_info.append(capital) # 수익률 earnings = soup_iframe.select_one('#status_grid_body').select( 'tr')[-1].td.select('span') for line in earnings: earning = line.text.replace('%', '').replace('+', '') each_etf_info.append(earning) # 구성종목 top_10 = soup_iframe.select_one('#CU_grid_body').select('tr')[:10] for line in top_10: sh_name = line.select('td')[0].text percent = line.select('td')[2].text if percent == '-': percent = '' else: percent = percent each_etf_info.extend([sh_name, percent]) # 리스트 진짜 이방법밖에 없는거 실화..? 더 찾아보기 # each_etf_info.append(sh_name) # 3차 리스트 저장 # each_etf_info.append(percent) # csv에 입력 csv_writer.writerow(each_etf_info) driver.quit() print(f"ETF info completed") print(time.strftime("%Y-%m-%d %H:%M:%S"))