reload(sys) sys.setdefaultencoding('utf-8') # prepare: remove old files in 2_dividents raw_sh = "../../../data/raw/sz/" fileNameTemplate = raw_sh + "2_dividents/{0}.html" urlTemplate = "http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/{0}.phtml" downloadHeaders = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Host': 'http://vip.stock.finance.sina.com.cn', 'Referer': 'http://vip.stock.finance.sina.com.cn/corp/go.php/vISSUE_ShareBonus/stockid/000637.phtml', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } codes = securitydb.get_security_code() for code in codes: # download the first report url = urlTemplate.format(code) fileName = fileNameTemplate.format(code) net_functions.download(url, None, fileName, sleepSeconds=5)
'Host': 'query.sse.com.cn', 'Referer': 'http://www.sse.com.cn/assortment/stock/list/info/announcement/index.shtml?productId=600000', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } fileName_template = raw_sh + "2_annual_url/{0}_{1}_{2}.json" ''' example: {"beginDate":"2017-01-01","endDate":"2018-01-01","isNew":"1","isPagination":"true","jsonCallBack":"","keyWord":"","pageHelp":{"beginPage":1,"cacheSize":1,"data":[{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_nzy.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报摘要","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告摘要","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null},{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_n.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null}],"endDate":null,"endPage":5,"objectResult":null,"pageCount":1,"pageNo":1,"pageSize":25,"searchDate":null,"sort":null,"startDate":null,"total":2},"productId":"600000","reportType":"YEARLY","reportType2":"DQBG","result":[{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_nzy.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报摘要","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告摘要","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null},{"INDEXCLASS":null,"PLAN_Date":null,"PLAN_Year":null,"ROWNUM":null,"ROWNUM_":null,"SSEDate":"2017-04-01","SSETime":null,"SSETimeStr":null,"URL":"\/disclosure\/listedinfo\/announcement\/c\/2017-04-01\/600000_2016_n.pdf","author":null,"book_Name":null,"bulletinHeading":null,"bulletinType":null,"bulletin_No":null,"bulletin_Type":"年报","bulletin_Year":"2016","category_A":null,"category_B":null,"category_C":null,"category_D":null,"chapter_No":null,"companyAbbr":null,"dispatch_Organ":null,"file_Serial":null,"finish_Time":null,"initial_Date":null,"isChangeFlag":null,"journal_Issue":null,"journal_Name":null,"journal_Section":null,"journal_Year":null,"keyWord":null,"key_Word":null,"language":null,"lemma_CN":null,"lemma_EN":null,"publishing_Comp":null,"question":null,"question_Class":null,"read_Status":null,"save_Time":null,"section":null,"security_Code":"600000","source":null,"spareVolEnd":null,"title":"浦发银行2016年年度报告","title_ETC":null,"title_PY":null,"unit_Code":null,"unit_Type":null}]} ''' infos = securitydb.get_security_info() current_year = datetime.date.today().year for info in infos: code = info['code'] listing_year = info['listingdate'].year startYear = listing_year while True: url = url_template.format(code, "{0}-01-01".format(startYear), "{0}-12-31".format(startYear + 2)) fileName = fileName_template.format(code, startYear, startYear + 2) download(url, download_headers, fileName) startYear = startYear + 3 if startYear > current_year: break
'Host': 'query.sse.com.cn', 'Referer': 'http://www.sse.com.cn/assortment/stock/list/info/turnover/index.shtml?COMPANY_CODE=601668', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } year = local_functions.get_current_year() year1 = str(year - 1) year2 = str(year - 2) fileName_template = raw_sh + "5_price/{0}_{1}.json" search_info_arr = [{ 'inYear': year1, 'inMonth': year1 + '12', 'searchDate': year1 + '-12-31' }, { 'inYear': year2, 'inMonth': year2 + '12', 'searchDate': year2 + '-12-31' }] codes = securitydb.get_valuable_security_code() for code in codes: for search_info in search_info_arr: url = url_template.format(code, search_info['inMonth'], search_info['inYear'], search_info['searchDate']) fileName = fileName_template.format(code, search_info['inYear']) net_functions.download(url, download_headers, fileName, 5)
download_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Host': 'money.finance.sina.com.cn', 'Referer': 'http://money.finance.sina.com.cn/corp/go.php/vFD_ProfitStatement/stockid/600006/ctrl/2013/displaytype/4.phtml', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } year = local_functions.get_current_year() codes = securitydb.get_valuable_security_code() for code in codes: # download the first report url = url_template.format(code, year) fileName = fileName_template.format(code, year) net_functions.download(url, None, fileName) # get other report urls page = local_functions.read_html(fileName) year_urls = local_functions.get_report_link_year_and_urls(page) # download other reports for year, url in year_urls: fileName = fileName_template.format(code, year) net_functions.download(url, download_headers, fileName, 1)
reload(sys) sys.setdefaultencoding('utf-8') url_shA = "http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=1" url_shB = "http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=2" raw_sh = "../../../data/raw/sh/" downloadHeaders = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Host': 'query.sse.com.cn', 'Referer': 'http://www.sse.com.cn/assortment/stock/list/share', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } url_file_arr = [{ 'url': url_shA, 'file': raw_sh + "1_security/shA.xls" }, { 'url': url_shB, 'file': raw_sh + "1_security/shB.xls" }] for url_file in url_file_arr: net_functions.download(url_file['url'], downloadHeaders, url_file['file'])
import local_functions import securitydb import sys reload(sys) sys.setdefaultencoding('utf-8') urlTemplate = "http://query.sse.com.cn/security/stock/queryCompanyStockStruct.do?jsonCallBack=&isPagination=false&companyCode={0}" raw_sh = "../../../data/raw/sh/" downloadHeaders = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Host': 'query.sse.com.cn', 'Referer': 'http://www.sse.com.cn/assortment/stock/list/info/capital/index.shtml?COMPANY_CODE=603993', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } fileNameTemplate = raw_sh + "6_stock_struct/{0}.json" codes = securitydb.get_valuable_security_code() for code in codes: url = urlTemplate.format(code) fileName = fileNameTemplate.format(code) net_functions.download(url, downloadHeaders, fileName, 10)