예제 #1
0
def getStartDate(OUTPUT_FILE):
    if S.RESUME_FILE:
        startdt = getLastDate(OUTPUT_FILE)
        if startdt is None or len(startdt) == 0:
            # File is likely to be empty, hence scrape from beginning
            startdt = S.ABS_START
    else:
        startdt = S.ABS_START
    return startdt
예제 #2
0
def mvpUpdateMPV(counter, scode):
    if scode == 0:
        inputfl = S.DATA_DIR + counter + '.csv'
    else:
        inputfl = S.DATA_DIR + counter + "." + scode + ".csv"
    lastdt = getLastDate(inputfl)
    fname = S.DATA_DIR + S.MVP_DIR + counter
    csvfl = fname + ".csv"
    mvpdt = getLastDate(csvfl)
    if mvpdt is None:
        print "Skip empty file:", counter
        return False
    days = getBusDaysBtwnDates(mvpdt, lastdt)
    if days <= 0:
        print "Already latest: ", counter, lastdt
        return False
    lines = tail2(inputfl, days)
    for eod in lines:
        if updateMPV(counter, scode, eod):
            chartdays = load_mvp_args(True, False)
            if mvpSynopsis(counter, scode, chartdays):
                if 1 == 0:  # 2018-12-21 skip to speed up daily download
                    chartdays = load_mvp_args(False, True)
                    mvpChart(counter, scode, chartdays)
예제 #3
0
def scrapeKlse(procmode, force_update, resume, i3onSat):
    '''
    Determine if can use latest price found in i3 stocks page
    Conditions:
      1. latest eod record in csv file is not today
      2. latest eod record in csv file is 1 trading day behind
         that of investing.com latest eod
    '''
    lastdt = getLastDate(S.DATA_DIR + 'YTL.4677.csv')
    if force_update or resume:
        dates = []
        dates.append(lastdt)
        dates.append(getToday('%Y-%m-%d'))
    else:
        if 1 == 1:
            # use i3 instead of investing.com due to delayed updating of EOD since 4Q 2018
            dates = checkI3LastTradingDay(lastdt, i3onSat)
        else:
            dates = checkInvComLastTradingDay(lastdt)
    if dates is None or (len(dates) == 1 and dates[0] == lastdt):
        if procmode:
            print "Post updating mode ON"
            print lastdt, dates
            return

        print "Already latest. Post-updating now."
        postUpdateProcessing()
    else:
        if procmode:
            print "Post updating mode OFF"

        if len(dates) == 2 and dates[1] > lastdt and dates[0] == lastdt:
            useI3latest = True
        else:
            useI3latest = False
            S.DBG_ALL = True

        if useI3latest:
            print "Scraping from i3 latest ..."
            preUpdateProcessing()
            list1 = writeLatestPrice(dates[1], True, resume)
        else:
            print "Scraping from i3 recent ..."
            # I3 only keeps 1 month of EOD, while investing.com cannot do more than 5 months
            # Have enhanced investing.com code to break down downloads by every 3 months
            if 1 == 0:
                list1 = scrapeI3(loadKlseCounters(klse))
            else:
                list1 = writeLatestPrice(dates[1], True, resume, dates[1])

        list2 = scrapeKlseRelated('scrapers/investingcom/klse.idmap')
        if len(list2):
            pypath = os.environ['PYTHONPATH'].split(os.pathsep)
            if any("klsemvp" in s for s in pypath):
                from analytics.mvp import mpvUpdateKlseRelated
                mpvUpdateKlseRelated()

        if S.USEMONGO:
            # do not perform upsert ops due to speed
            eodlist = list2 + list1
            dbUpdateLatest(eodlist)
예제 #4
0
        #  download only selected counters
        stocklist = formStocklist(stocks, klse)
    else:
        # Full download using klse.txt
        stocklist = loadKlseCounters(klse)

    cookie, crumb = getYahooCookie('https://uk.finance.yahoo.com/quote/AAPL/')

    for shortname in sorted(stocklist.iterkeys()):
        stock_code = stocklist[shortname]
        if len(stock_code) > 0:
            # OUTPUT_FILE = '../../data/yahoo/' + shortname + "." + stock_code + ".csv"
            OUTPUT_FILE = getDataDir(
                S.DATA_DIR) + 'yahoo/' + shortname + "." + stock_code + ".csv"
            if S.RESUME_FILE:
                lastdt = getLastDate(OUTPUT_FILE)
                if len(lastdt) == 0:
                    # File is likely to be empty, hence scrape from beginning
                    lastdt = S.ABS_START
            print shortname, stock_code
            print "Scraping", shortname, stock_code, lastdt, '$'
            q = YahooQuote(cookie, crumb, shortname, stock_code + ".KL",
                           lastdt, "2018-02-01")
            if len(q.getCsvErr()) > 0:
                st_code, st_reason = q.getCsvErr().split(":")
                rtn_code = int(st_code)
                print rtn_code, st_reason
            else:
                print q
                writeCsv = True
                if writeCsv:
예제 #5
0
def scrapeKlseRelated(klsemap, WRITE_CSV=True, dbg=False):
    idmap = loadIdMap(klsemap)
    # counters = 'USDMYR.2168,FTFBM100.0200,FTFBMKLCI.0201,FTFBMMES.0202,FTFBMSCAP.0203,FTFBM70.0204,FTFBMEMAS.0205'
    counterlist = S.KLSE_RELATED.split(',')
    eodlist = []
    for i in counterlist:
        counter = i.split('.')
        shortname = counter[0]
        stock_code = counter[1]
        rtn_code = 0
        OUTPUT_FILE = getDataDir(
            S.DATA_DIR) + shortname + "." + stock_code + ".csv"
        TMP_FILE = OUTPUT_FILE + 'tmp'
        if S.RESUME_FILE:
            lastdt = getLastDate(OUTPUT_FILE)
            if len(lastdt) == 0:
                # File is likely to be empty, hence scrape from beginning
                lastdt = S.ABS_START
        else:
            lastdt = S.ABS_START
        enddt = getToday('%Y-%m-%d')
        if lastdt == enddt:
            print "Skipped downloaded:", counter
            continue
        print 'Scraping {0},{1}: lastdt={2}, End={3}'.format(
            shortname, stock_code, lastdt, enddt)
        while True:
            startdt = lastdt
            if getDaysBtwnDates(lastdt,
                                enddt) > 22 * 3:  # do 3 months at a time
                stopdt = getDayOffset(startdt, 22 * 3)
                lastdt = getNextBusinessDay(stopdt)
            else:
                stopdt = enddt
            eod = InvestingQuote(idmap, shortname, startdt, stopdt)
            if dbg:
                for item in eod:
                    print item
            if len(eod.getCsvErr()) > 0:
                print eod.getCsvErr()
            elif isinstance(eod.response, unicode):
                dfEod = eod.to_df()
                if isinstance(dfEod, pd.DataFrame):
                    if dbg:
                        print dfEod[:5]
                    if WRITE_CSV:
                        dfEod.index.name = 'index'
                        dfEod.to_csv(TMP_FILE, index=False, header=False)
                    dates = pd.to_datetime(dfEod["Date"], format='%Y%m%d')
                    dfEod["Date"] = dates.dt.strftime('%Y-%m-%d').tolist()
                    elist = dfEod.values.tolist()[0]
                    eodlist.append(','.join(map(str, unpackEOD(*elist))))
                else:
                    print "ERR:" + dfEod + ": " + shortname + "," + lastdt
                    rtn_code = -2

            if WRITE_CSV:
                appendCsv(rtn_code, OUTPUT_FILE)

            if stopdt == enddt:
                break

    return eodlist