def getStartDate(OUTPUT_FILE): if S.RESUME_FILE: startdt = getLastDate(OUTPUT_FILE) if startdt is None or len(startdt) == 0: # File is likely to be empty, hence scrape from beginning startdt = S.ABS_START else: startdt = S.ABS_START return startdt
def mvpUpdateMPV(counter, scode): if scode == 0: inputfl = S.DATA_DIR + counter + '.csv' else: inputfl = S.DATA_DIR + counter + "." + scode + ".csv" lastdt = getLastDate(inputfl) fname = S.DATA_DIR + S.MVP_DIR + counter csvfl = fname + ".csv" mvpdt = getLastDate(csvfl) if mvpdt is None: print "Skip empty file:", counter return False days = getBusDaysBtwnDates(mvpdt, lastdt) if days <= 0: print "Already latest: ", counter, lastdt return False lines = tail2(inputfl, days) for eod in lines: if updateMPV(counter, scode, eod): chartdays = load_mvp_args(True, False) if mvpSynopsis(counter, scode, chartdays): if 1 == 0: # 2018-12-21 skip to speed up daily download chartdays = load_mvp_args(False, True) mvpChart(counter, scode, chartdays)
def scrapeKlse(procmode, force_update, resume, i3onSat): ''' Determine if can use latest price found in i3 stocks page Conditions: 1. latest eod record in csv file is not today 2. latest eod record in csv file is 1 trading day behind that of investing.com latest eod ''' lastdt = getLastDate(S.DATA_DIR + 'YTL.4677.csv') if force_update or resume: dates = [] dates.append(lastdt) dates.append(getToday('%Y-%m-%d')) else: if 1 == 1: # use i3 instead of investing.com due to delayed updating of EOD since 4Q 2018 dates = checkI3LastTradingDay(lastdt, i3onSat) else: dates = checkInvComLastTradingDay(lastdt) if dates is None or (len(dates) == 1 and dates[0] == lastdt): if procmode: print "Post updating mode ON" print lastdt, dates return print "Already latest. Post-updating now." postUpdateProcessing() else: if procmode: print "Post updating mode OFF" if len(dates) == 2 and dates[1] > lastdt and dates[0] == lastdt: useI3latest = True else: useI3latest = False S.DBG_ALL = True if useI3latest: print "Scraping from i3 latest ..." preUpdateProcessing() list1 = writeLatestPrice(dates[1], True, resume) else: print "Scraping from i3 recent ..." # I3 only keeps 1 month of EOD, while investing.com cannot do more than 5 months # Have enhanced investing.com code to break down downloads by every 3 months if 1 == 0: list1 = scrapeI3(loadKlseCounters(klse)) else: list1 = writeLatestPrice(dates[1], True, resume, dates[1]) list2 = scrapeKlseRelated('scrapers/investingcom/klse.idmap') if len(list2): pypath = os.environ['PYTHONPATH'].split(os.pathsep) if any("klsemvp" in s for s in pypath): from analytics.mvp import mpvUpdateKlseRelated mpvUpdateKlseRelated() if S.USEMONGO: # do not perform upsert ops due to speed eodlist = list2 + list1 dbUpdateLatest(eodlist)
# download only selected counters stocklist = formStocklist(stocks, klse) else: # Full download using klse.txt stocklist = loadKlseCounters(klse) cookie, crumb = getYahooCookie('https://uk.finance.yahoo.com/quote/AAPL/') for shortname in sorted(stocklist.iterkeys()): stock_code = stocklist[shortname] if len(stock_code) > 0: # OUTPUT_FILE = '../../data/yahoo/' + shortname + "." + stock_code + ".csv" OUTPUT_FILE = getDataDir( S.DATA_DIR) + 'yahoo/' + shortname + "." + stock_code + ".csv" if S.RESUME_FILE: lastdt = getLastDate(OUTPUT_FILE) if len(lastdt) == 0: # File is likely to be empty, hence scrape from beginning lastdt = S.ABS_START print shortname, stock_code print "Scraping", shortname, stock_code, lastdt, '$' q = YahooQuote(cookie, crumb, shortname, stock_code + ".KL", lastdt, "2018-02-01") if len(q.getCsvErr()) > 0: st_code, st_reason = q.getCsvErr().split(":") rtn_code = int(st_code) print rtn_code, st_reason else: print q writeCsv = True if writeCsv:
def scrapeKlseRelated(klsemap, WRITE_CSV=True, dbg=False): idmap = loadIdMap(klsemap) # counters = 'USDMYR.2168,FTFBM100.0200,FTFBMKLCI.0201,FTFBMMES.0202,FTFBMSCAP.0203,FTFBM70.0204,FTFBMEMAS.0205' counterlist = S.KLSE_RELATED.split(',') eodlist = [] for i in counterlist: counter = i.split('.') shortname = counter[0] stock_code = counter[1] rtn_code = 0 OUTPUT_FILE = getDataDir( S.DATA_DIR) + shortname + "." + stock_code + ".csv" TMP_FILE = OUTPUT_FILE + 'tmp' if S.RESUME_FILE: lastdt = getLastDate(OUTPUT_FILE) if len(lastdt) == 0: # File is likely to be empty, hence scrape from beginning lastdt = S.ABS_START else: lastdt = S.ABS_START enddt = getToday('%Y-%m-%d') if lastdt == enddt: print "Skipped downloaded:", counter continue print 'Scraping {0},{1}: lastdt={2}, End={3}'.format( shortname, stock_code, lastdt, enddt) while True: startdt = lastdt if getDaysBtwnDates(lastdt, enddt) > 22 * 3: # do 3 months at a time stopdt = getDayOffset(startdt, 22 * 3) lastdt = getNextBusinessDay(stopdt) else: stopdt = enddt eod = InvestingQuote(idmap, shortname, startdt, stopdt) if dbg: for item in eod: print item if len(eod.getCsvErr()) > 0: print eod.getCsvErr() elif isinstance(eod.response, unicode): dfEod = eod.to_df() if isinstance(dfEod, pd.DataFrame): if dbg: print dfEod[:5] if WRITE_CSV: dfEod.index.name = 'index' dfEod.to_csv(TMP_FILE, index=False, header=False) dates = pd.to_datetime(dfEod["Date"], format='%Y%m%d') dfEod["Date"] = dates.dt.strftime('%Y-%m-%d').tolist() elist = dfEod.values.tolist()[0] eodlist.append(','.join(map(str, unpackEOD(*elist)))) else: print "ERR:" + dfEod + ": " + shortname + "," + lastdt rtn_code = -2 if WRITE_CSV: appendCsv(rtn_code, OUTPUT_FILE) if stopdt == enddt: break return eodlist