示例#1
0
def gather_sense():
    A = fetch_rawsql(
        "select iex_id, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id"
    )
    print("Done fetching finnews articles ... " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    documents, IEX_ids = doc_to_vec(A)
    print("Done cleaning finnews articles ... " +
          time.strftime('%Y-%m-%d %H:%M:%S'))

    if RETRAIN_GLOBAL_TV:
        TV = TfidfVectorizer(tokenizer=LemmaTokenizer(),
                             max_df=0.3,
                             min_df=2,
                             max_features=1000,
                             stop_words='english')
        tv = TV.fit_transform(documents)
        assure_folder(FILE_GLOBAL_TV)
        joblib.dump(TV, FILE_GLOBAL_TV)
        joblib.dump(TV, FILE_GLOBAL_TV_LST)
    else:
        #TV = joblib.load(FILE_GLOBAL_TV)
        TV = joblib.load(FILE_GLOBAL_TV_LST)
        tv = TV.transform(documents)
    print("Done corpus vectorizing ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
    return A, IEX_ids, TV, tv
示例#2
0
def refresh_sp500():
    assure_required_table()
    print("fetching sp500 list...")
    resp = requests.get(
        'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'html5lib')
    table = soup.find('table', {'class': 'wikitable sortable'})
    for row in table.findAll('tr')[1:]:
        r = row.findAll('td')
        #ticker = r[1].text.rstrip()
        ticker = r[0].text.rstrip()
        #print("working on "+ticker+"...")
        Log.info("SP500", "working on " + ticker + "...")
        check = fetch_rawsql(
            "select symbol from ff_scan_symbols where symbol='" + ticker + "'")
        if len(check) > 0:
            rawsql = "update ff_scan_symbols set last_updated=now() WHERE symbol='" + ticker + "'"
            runsql(rawsql)
        else:
            rawsql = "insert into ff_scan_symbols (symbol, last_updated, security, sector, subsec, hq, first_added) values (%s, %s, %s, %s, %s, %s, %s)"
            val = (ticker,
                   datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                   r[0].text, r[3].text, r[4].text, r[5].text,
                   (r[6].text if r[6].text and len(r[6].text) == 10 else None))
            runsql(rawsql, val)
    dbcommit()
示例#3
0
def assure_required_tables():
    if not table_exist("ff_tweets"):
        runsql(FF_TWEETS_DDL)
    if not table_exist("ff_scan_symbols"):
        refresh_sp500()
    r = fetch_rawsql(
        "select min(datediff(now(),last_updated)) d from ff_scan_symbols")[0]
    if r['d'] > 7:
        refresh_sp500()
示例#4
0
def assure_tables():
    if not table_exist("ff_status_scan"):
        runsql(FF_STATUS_SCAN)
        runsql("create index ff_status_scan_i1 on ff_status_scan (batched)")
        runsql("create index ff_status_scan_i2 on ff_status_scan (status)")
    else:
        a = fetch_rawsql("show table status like 'ff_status_scan'")
        a = a[0]['Collation']
        if a != 'utf8_unicode_ci':
            runsql(
                "ALTER TABLE ff_status_scan CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci"
            )
    # maintenance related on collation correction
    if table_exist("ff_scan_symbols"):
        a = fetch_rawsql("show table status like 'ff_scan_symbols'")
        a = a[0]['Collation']
        if a != 'utf8_unicode_ci':
            runsql(
                "ALTER TABLE ff_scan_symbols CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci"
            )
示例#5
0
def do_scan():
    status_scan_starttime = datetime.datetime.now()
    assure_tables()

    symbols = fetch_rawsql(
        "select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<14"
    )
    s = [i['symbol'] for i in symbols]
    r = requests.get(FF_SCAN_HOST + "/api/v1/misc/nonsp/")
    if r.status_code != 200:
        print("Failed to retrieve list of non-SP500 symbols")
    else:
        r = r.json()
        for i in r['symbol_list']:
            if i in s or i in NON_SCAN_LIST:
                continue
            symbols += [{'symbol': i, 'sector': ''}]

    # stop when:
    # 1. data not stablized, before 9:45am
    # 2. more than 1 hour after trading stopped
    # 3. none-trading days
    enow = utc.localize(datetime.datetime.utcnow()).astimezone(eastern)
    etime = enow.hour + enow.minute / 60
    if enow.weekday() < 5 and etime > 9.75 and etime < 17.5:
        has_recent_scans = fetch_rawsql(
            "select count(1) cnt from ff_status_scan where timestampdiff(minute, scanned, '"
            + get_eastern() + "') < " + str(MINUTES_TRADING_SCANS))[0]['cnt']
        if has_recent_scans:
            print("Already scanned status in the last " +
                  str(MINUTES_TRADING_SCANS) +
                  " minutes. Cooldown even if FORCE_RUN")
            dbclose()
            sys.exit()
    elif not FORCE_RUN:
        print("Not during trading hours - skipped status scan")
        dbclose()
        sys.exit()

    # Clear scan cache only when scan actually runs
    if table_exist("ff_status_scan_a1"):
        runsql("drop table ff_status_scan_a1")
    runsql(FF_STATUS_SCAN_A1)

    n_rotten = 0
    for s0 in symbols:
        s = s0['symbol']
        print("Scanning " + s + " ...")
        try:
            q = requests.get("https://api.iextrading.com/1.0/stock/" + s +
                             "/quote")
            q = q.json()
        except:
            print("- bad IEX call for " + s + " ... skipped")
            continue
            """
            print("Unexpected error:", sys.exec_info()[0])
            raise
    		"""
        # check required keys
        if not ('calculationPrice' in q and 'latestPrice' in q
                and 'changePercent' in q and 'marketCap' in q
                and 'latestUpdate' in q and 'primaryExchange' in q):
            print("- bad IEX return format for " + s + " ... skipped")
            continue
        if 'Arca' in q['primaryExchange']:
            print("- not scanning ETF " + s + " ... skipped")
            continue
        if q['marketCap'] < MIN_MARKETCAP:
            print("- MarketCap too low to scan,  for " + s + " ... skipped")
            continue

        # double-check if IEX actually returns the latest quote
        updated = utc.localize(
            datetime.datetime.utcfromtimestamp(q['latestUpdate'] //
                                               1000)).astimezone(eastern)
        print(enow, updated)
        diff = enow - updated
        dmin = (diff.days * 86400 + diff.seconds) // 60
        if dmin > MINUTES_FRESH_SCANS and not FORCE_RUN:
            print("- rotten/older IEX return data for " + s + " ... skipped")
            n_rotten += 1
            if n_rotten >= MAX_ROTTEN:
                sys.exit()
            continue

        # Upon the first IEX fresh data of today, swap out yesterday's benchmark ceil-floor tables (from get_stock_adj)
        # *The benchmark tables should only be updated at the time market opens
        if s == symbols[0]['symbol'] and q['calculationPrice'] == 'tops':
            if table_exist("ff_stock_cf_w00") and table_exist(
                    "ff_stock_cf_w0"):
                runsql("drop table ff_stock_cf_w0")
                runsql(
                    "create table ff_stock_cf_w0 as select * from ff_stock_cf_w00"
                )
                runsql(
                    "create index ff_stock_cf_w0_i1 on ff_stock_cf_w0 (symbol)"
                )
                runsql("drop table ff_stock_cf_w00")

        # FF_scans - Volume
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/vol/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if r['I'] > 90 and 'high' in r['rarity']:
                msg = VOL_TEMPLATE[np.random.choice(
                    len(VOL_TEMPLATE),
                    p=[e['prob'] for e in VOL_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&RARITY", r['rarity'])
                msg = msg.replace("&VOLUME", r['volume_human'])
                msg = msg.replace("&Z", "{0:.1f}".format(r['z']))
                msg = msg.replace("&URL", FF_SCAN_HOST + "fb/vol/" + s + "/")
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                #ctx = str({"v": r['volume_human'], "z": "{0:.1f}".format(r['z'])})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'vol', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF vol scan for " + s + " ... skipped")
            print(r)

        # FF_scans - Volatility + Fluctuation
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/pfl/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if r['I'] > 90 and 'large' in r['rarity']:
                msg = PFL_TEMPLATE[np.random.choice(
                    len(PFL_TEMPLATE),
                    p=[e['prob'] for e in PFL_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&RARITY", r['rarity'])
                msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/vol/" + s + "/")
                msg = msg.replace("&FLUX", str(r['hl_ratio']) + "%")
                msg = msg.replace("&L", str(r['low']))
                msg = msg.replace("&H", str(r['high']))
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                #ctx = str({"f": r['hl_ratio'], "l": r['low'], "h": r['high']})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'pfl', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF pfl scan for " + s + " ... skipped")
            print(r)

        # FF_scans - Single Day Change
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/chg/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if s0['sector']:
                val = (s, s0['sector'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"),
                       enow.strftime("%Y-%m-%d %H:%M:%S"))
                runsql(INSERT_A1, val)
            if r['I'] > 90 and 'large' in r['rarity']:
                msg = CHG_TEMPLATE[np.random.choice(
                    len(CHG_TEMPLATE),
                    p=[e['prob'] for e in CHG_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&RARITY", r['rarity'])
                msg = msg.replace("&CHG", str(abs(round(r['chg'] * 100, 2))))
                msg = msg.replace("&MOVE", r['move'])
                msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/chg/" + s + "/")
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                msg = msg.replace(
                    "&HAVE", ("is having" if r['status'] == "live" else "had"))
                msg = msg.replace(
                    "&JS", (("roar" if "extreme" in r['rarity'] else
                             ("jump" if "very" in r['rarity'] else "gain"))
                            if r['chg'] > 0 else
                            ("crash" if "extreme" in r['rarity'] else
                             ("dive" if "very" in r['rarity'] else "drop"))))
                #ctx = str({"c": r['chg'], "m": r['move'], "js": r['js']})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'chg', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF chg scan for " + s + " ... skipped")
            print(r)

        # FF_scans - Ceiling Floor Penetration
        while True:
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/cfp/" + s + "/",
                             headers=headers)
            if r.status_code != 429:
                break
            time.sleep(5)
        try:
            r = r.json()
            if r['I'] > 90 and r['target'] == 'quoted':
                msg = CFP_TEMPLATE[np.random.choice(
                    len(CFP_TEMPLATE),
                    p=[e['prob'] for e in CFP_TEMPLATE],
                    size=1)[0]]["tweet"]
                msg = msg.replace("&STOCK", "$" + s)
                msg = msg.replace("&TARGET", r['target'])
                msg = msg.replace("&MOVE", r['move'])
                msg = msg.replace("&URL", FF_SCAN_HOST + "/fb/dme/" + s + "/")
                msg = msg.replace("&FH", FF_SCAN_HOST + "/fh/" + s + "/")
                #ctx = str({"t": r['target'], "m": r['move']})
                ctx = str(r)
                val = (s, q['latestPrice'], q['changePercent'], q['marketCap'],
                       updated.strftime("%Y-%m-%d %H:%M:%S"), get_eastern(),
                       enow.strftime("%Y-%m-%d %H:%M:%S"), 'cfp', r['rarity'],
                       ctx, msg, r['I'])
                runsql(INSERT_SQL, val)
        except Exception as e:
            print(e)
            print("- error FF cfp scan for " + s + " ... skipped")
            print(r)

    runsql("drop table if exists ff_status_scan_w1")
    runsql(
        "create table ff_status_scan_w1 as select a.*, @rn:=@rn+1 rn from (select batched, count(1) n_item, count(distinct symbol) n_symbol from ff_status_scan group by batched order by batched desc) a, (select @rn:=0) b"
    )
    runsql(
        "create index ff_status_scan_w1_i1 on ff_status_scan_w1 (n_symbol, rn)"
    )
    runsql("drop table if exists ff_status_scan_w2")
    runsql(
        "create table ff_status_scan_w2 as select stype, if(changePct>=0,'u','d') ch, count(1) n from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched and a.marketcap>"
        + str(MIN_MARKETCAP) + " group by stype, if(changePct>=0,'u','d')")
    runsql("drop table if exists ff_status_scan_w3")
    runsql("create table ff_status_scan_w3 as select * from ff_status_scan_a1")
    runsql("drop table if exists ff_status_scan_w4")
    runsql(
        "create table ff_status_scan_w4 as select max(batched) batched, sector, count(1) cnt, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc"
    )
    if not table_exist("ff_status_scan_a2"):
        runsql(
            "create table ff_status_scan_a2 as select * from ff_status_scan_w4"
        )
        runsql(
            "create index ff_status_scan_a2_i1 on ff_status_scan_a2 (batched)")
    else:
        runsql("insert into ff_status_scan_a2 select * from ff_status_scan_w4")
    runsql("drop table if exists ff_status_scan_w5")
    runsql(
        "create table ff_status_scan_w5 as select d.sector, stype, count(1) n, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, any_value(d.cnt) n_sector, any_value(round(count(1)/d.cnt,4)) s_ratio from ff_status_scan a join (select batched as max_batched from ff_status_scan_w1 order by rn limit 1) b on a.batched=b.max_batched join (select symbol, sector from ff_scan_symbols where datediff(now(),last_updated)<60) c on a.symbol=c.symbol right join ff_status_scan_w4 d on c.sector=d.sector group by d.sector, a.stype order by sector, s_ratio desc"
    )
    runsql(
        "create index ff_status_scan_w5_i1 on ff_status_scan_w5 (sector, s_ratio)"
    )
    runsql("drop table if exists ff_status_scan_w6")
    runsql(
        "create table ff_status_scan_w6 as select max(batched) batched, sector, any_value(if(locate(' ',sector)>0, concat(substr(sector,1,1),substr(sector,locate(' ',sector)+1,2)), substr(sector,1,3))) sector3, count(1) cnt, sum(if(changePct>=0,1,0)) n_u, sum(if(changePct<0,1,0)) n_d, sum(marketcap) sm, round(sum(changePct*marketcap)) cm, round(sum(abs(changePct)*marketcap)) am, round(sum(changePct*marketcap)/sum(marketcap)*100,2) marketchgPct from ff_status_scan_w3 group by sector having cnt>20 order by marketchgPct desc"
    )
    runsql("drop table if exists ff_status_scan_w7")
    runsql(
        "create table ff_status_scan_w7 as select a.security, a.subsec, a.hq, b.*, changePct*marketcap marketeff, abs(changePct*marketcap) marketabs from (select symbol, security, subsec, hq from ff_scan_symbols where datediff(now(),last_updated)<60) a, ff_status_scan_w3 b where a.symbol=b.symbol order by marketabs"
    )
    runsql(
        "create index ff_status_scan_w7_i1 on ff_status_scan_w7 (sector, marketabs)"
    )
    runsql("drop table if exists ff_status_scan_w8")
    runsql(
        "create table ff_status_scan_w8 as select b.batched_date, a.* from ff_status_scan_a2 a,(select date(batched) batched_date, max(batched) batched_time from ff_status_scan_a2 group by date(batched)) b where a.batched=b.batched_time"
    )
    runsql(
        "create index ff_status_scan_w8_i1 on ff_status_scan_w8 (sector, batched_date)"
    )
    runsql(
        "create index ff_status_scan_w8_i2 on ff_status_scan_w8 (batched_date)"
    )
    cache_baking()

    status_scan_endtime = datetime.datetime.now()

    timedetail = "status_scan job is running from {0} to {1}, and total runging time is {2}".format(
        status_scan_starttime, status_scan_endtime,
        (status_scan_endtime - status_scan_starttime).seconds)
    print(timedetail)
    Log.info("status_scan.py", timedetail)
示例#6
0
def cache_baking():
    print("Caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
    runsql("drop table if exists ff_status_scan_o1")
    rawsql = "create table ff_status_scan_o1 as select symbol, stype, infov, w_micd from \
          (select a.*, td+w_mic*md w_micd, @rn:=if(@s=symbol,@rn+1,0) rn, @s:=symbol s from \
          (select a.symbol, marketCap, updated, scanned, stype, rarity, context, infov, if(stype='vol',if(md>1,1,md),1) md, if(td>" + str(
        AGING_TIME_FACTOR
    ) + "," + str(AGING_TIME_FACTOR) + ",td)*rand()*" + str(
        AGING_RAND_SCALE
    ) + " td, (log10(marketCap)-9)*(infov-90)*(log10(abs(changePct*10000)+1)) w_mic from \
            (select a.*, (hour(updated)*60+minute(updated))/" + str(
        AGING_MATURE_HOUR * 60
    ) + " md, if(timestampdiff(minute,batched,utc_timestamp())<200,0,(timestampdiff(minute,batched,utc_timestamp())-200)/480) td from ff_status_scan a) a, \
              (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b where a.batched=b.max_batched order by w_mic desc limit " + str(
        N_OVERALL_CACHE * 4) + ") a, \
            (select @s:='',@rn:=0) b order by marketCap desc, w_micd desc \
          ) a where rn=0 order by w_micd desc limit " + str(N_OVERALL_CACHE)
    runsql(rawsql)
    runsql("drop table if exists ff_status_scan_o2")
    runsql(
        "create table ff_status_scan_o2 as select a.*, c.w_micd from ff_status_scan a, (select batched as max_batched from ff_status_scan_w1 where n_symbol>=20 order by rn limit 1) b, ff_status_scan_o1 c where a.batched=b.max_batched and a.symbol=c.symbol order by w_micd desc"
    )
    runsql("drop table if exists ff_status_scan_o3")
    runsql(
        "create table ff_status_scan_o3 as select (@rn:=if(@s=symbol,@rn,@rn+1))*1 rn, @s:=symbol symbol, quote, changePct, marketcap, stype, context, infov, w_micd from ff_status_scan_o2 a, (select @rn:=0, @s:='') b"
    )
    S = fetch_rawsql(
        "select rn, symbol, max(quote) quote, max(changePct) changePct, max(marketcap) marketcap, max(w_micd) w_micd from ff_status_scan_o3 group by rn, symbol"
    )
    if S:
        for s in S:
            r = requests.get(FF_SCAN_HOST + "/api/v1/fame66/w52/" +
                             s['symbol'] + "/",
                             headers=headers)
            if r.status_code == 200:
                r = r.json()
                if 'datasets' in r:
                    ctx = r['datasets'][0]
                    ctx = str(ctx)
                    if len(ctx) <= 255:
                        runsql(
                            "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                            (s['rn'], s['symbol'], s['quote'], s['changePct'],
                             s['marketcap'], 'f6w', ctx, 0, s['w_micd']))
                    else:
                        print("ctx longer than 255 for fame66 support of " +
                              s['symbol'])
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/f66/" +
                             s['symbol'] + "/",
                             headers=headers)
            if r.status_code == 200:
                r = r.json()
                if 'bits' in r:
                    a = [i['pr'] for i in r['bits']]
                    b = [i['c_diff'] for i in r['bits']]
                    ctx = str({"pr": a, "c_diff": b})
                    if len(ctx) <= 255:
                        runsql(
                            "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                            (s['rn'], s['symbol'], s['quote'], s['changePct'],
                             s['marketcap'], 'f66', ctx, 0, s['w_micd']))
                    else:
                        print("ctx longer than 255 for fame66 chart of " +
                              s['symbol'])
            r = requests.get(FF_SCAN_HOST + "/api/v1/famebits/ahd/" +
                             s['symbol'] + "/",
                             headers=headers)
            if r.status_code == 200:
                r = r.json()
                if 'target' in r:
                    if r['target'] == 'self':
                        ctx = str(r)
                        if len(ctx) <= 255:
                            runsql(
                                "insert into ff_status_scan_o3 (rn, symbol, quote, changePct, marketcap, stype, context, infov, w_micd) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                                (s['rn'], s['symbol'], s['quote'],
                                 s['changePct'], s['marketcap'], 'ahd', ctx,
                                 r['I'], s['w_micd']))
                        else:
                            print("ctx longer than 255 for Fame Ahead of " +
                                  s['symbol'])
    runsql("drop table if exists ff_status_scan_o4")
    runsql(
        "create table ff_status_scan_o4 as select * from ff_status_scan_o3 order by rn"
    )
    runsql("create index ff_status_scan_o4_i1 on ff_status_scan_o4 (rn)")
    runsql(
        "create index ff_status_scan_o4_i2 on ff_status_scan_o4 (stype, infov, marketcap)"
    )
    print("Done caching Baking list ... " + time.strftime('%Y-%m-%d %H:%M:%S'))
示例#7
0
def incremental_sense():
    print("Started incrementals " + time.strftime('%Y-%m-%d %H:%M:%S'))
    runsql("drop table if exists ff_finnews_d0")
    runsql(
        "create table ff_finnews_d0 as select iex_id, any_value(published) published, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 week) and greatest(length(article),length(iex_summary))>500 group by iex_id"
    )
    runsql("create index ff_finnews_d0_i1 on ff_finnews_d0 (iex_id)")
    runsql("drop table if exists ff_finnews_d1")
    runsql(
        "create table ff_finnews_d1 as select a.iex_id, a.title, a.article, b.iex_id dummy from ff_finnews_d0 a left join ff_finnews_b1 b on a.iex_id=b.iex_id having dummy is null"
    )
    runsql("create index ff_finnews_d1_i1 on ff_finnews_d1 (iex_id)")
    A = fetch_rawsql("select iex_id, title, article from ff_finnews_d1")
    print("Done fetching incremental articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    documents, IEX_ids = doc_to_vec(A)
    print("Done cleaning incremental articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    TV = joblib.load(FILE_GLOBAL_TV_LST)
    tv = TV.transform(documents)
    T = TV.get_feature_names()
    print("Done vectorizing incremental articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))

    dig_docs(A, IEX_ids, TV, tv, is_incremental=True)

    runsql("drop table if exists ff_finnews_d2")
    runsql(
        "create table ff_finnews_d2 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), raw_sense float)"
    )
    IEX_SYMBOL = fetch_rawsql(
        "select a.symbol, a.iex_id, a.published from ff_finnews_iex a, ff_finnews_d1 b where a.iex_id=b.iex_id"
    )
    ALL_LOCAL, _X, _y = focus_prep(IEX_SYMBOL, IEX_ids, tv, is_train=False)
    print("Done preparing incremental articles for scoring " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    for term in TERMS:
        focus_models(term, ALL_LOCAL, [], [], is_train=False)
        print("Done scoring incremental articles for " + term + " : " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

    runsql("create index ff_finnews_d2_i1 on ff_finnews_d2 (iex_id)")
    runsql("drop table if exists ff_finnews_d3")
    runsql(
        "create table ff_finnews_d3 as select a.*, published, title from ff_finnews_d2 a, ff_finnews_d0 b where a.iex_id=b.iex_id"
    )
    runsql(
        "create index ff_finnews_d3_i1 on ff_finnews_d3 (symbol, iex_id, model_type, term)"
    )
    runsql("drop table if exists ff_finnews_d4")
    runsql(
        "create table ff_finnews_d4 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \
           , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \
           , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \
           from ff_finnews_d3 group by symbol, iex_id, model_type")
    runsql(
        "create index ff_finnews_d4_i1 on ff_finnews_d4 (symbol, iex_id, model_type)"
    )
    runsql("drop table if exists ff_finnews_d5")
    runsql(
        "create table ff_finnews_d5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \
           , sum( if(model_type='L', 1/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense, 1/log10(ifnull(b.n_doc,0)+2)/(1/log10(ifnull(b.n_doc,0)+2)+1) * con_sense) ) con_sense \
           , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \
           , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \
           from ff_finnews_d4 a left join ff_finnews_b4 b on a.symbol = b.symbol group by symbol, iex_id"
    )
    runsql("drop table if exists ff_finnews_d6")
    runsql(
        "create table ff_finnews_d6 as select ifnull(b.bin,if(con_sense>max_max,99,0)) bin , a.* from ff_finnews_d5 a inner join (select max(max_con) max_max, min(min_con) min_min from ff_finnews_b7) c left join ff_finnews_b7 b on a.con_sense>b.min_con and a.con_sense<=b.max_con"
    )
    runsql("drop table if exists ff_finnews_d7")
    runsql(
        "create table ff_finnews_d7 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_d6 a, ff_finnews_b7 b where a.bin=b.bin"
    )
    runsql("drop table if exists ff_finnews_d8")
    runsql(
        "create table ff_finnews_d8 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_d7 a, ff_finnews_b11 b where a.symbol=b.symbol"
    )
    print("Done incremental sensing " + time.strftime('%Y-%m-%d %H:%M:%S'))
    final_sense(is_incremental=True)
示例#8
0
def build_sense():
    print("Started Fame Sense building ... " +
          time.strftime('%Y-%m-%d %H:%M:%S'))
    A, IEX_ids, TV, tv = gather_sense()
    dig_docs(A, IEX_ids, TV, tv)
    profile_training()

    print("Started focus models " + time.strftime('%Y-%m-%d %H:%M:%S'))
    A = fetch_rawsql(
        "select iex_id, any_value(iex_title) title, any_value(if(length(article)>length(iex_summary),article,iex_summary)) article from ff_finnews_iex where published>=date_sub(now() , interval 1 year) and greatest(length(article),length(iex_summary))>500 group by iex_id"
    )
    print("Done fetching focus articles " + time.strftime('%Y-%m-%d %H:%M:%S'))
    documents, IEX_ids = doc_to_vec(A)
    print("Done cleaning focus articles " + time.strftime('%Y-%m-%d %H:%M:%S'))
    tv = TV.transform(documents)
    print("Done vectorizing focus articles " +
          time.strftime('%Y-%m-%d %H:%M:%S'))

    runsql("drop table if exists ff_finnews_b0")
    runsql(
        "create table ff_finnews_b0 (symbol varchar(10), iex_id varchar(30), term varchar(10), model_type varchar(4), y float, chg_term float, raw_sense float)"
    )
    for term in TERMS:
        IEX_SYMBOL = fetch_rawsql(
            "select iex_id, symbol, published, date_same, chg_" + term +
            " chg from ff_finnews_w7")
        ALL_LOCAL, OVERALL_X, OVERALL_y = focus_prep(IEX_SYMBOL, IEX_ids, tv)
        print("Done preparing focus articles for " + term + " training " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

        focus_models(term, ALL_LOCAL, OVERALL_X, OVERALL_y, is_train=True)
        print("Finished local sense on term " + term + " ... " +
              time.strftime('%Y-%m-%d %H:%M:%S'))

    runsql("create index ff_finnews_b0_i1 on ff_finnews_b0 (iex_id)")
    runsql("drop table if exists ff_finnews_b1")
    runsql(
        "create table ff_finnews_b1 as select iex_id, any_value(published) published, any_value(date(published)) published_date, any_value(iex_source) iex_source, any_value(iex_title) iex_title from ff_finnews_iex where published>=date_sub(now() , interval 6 month) and greatest(length(article),length(iex_summary))>500 group by iex_id"
    )
    runsql("create index ff_finnews_b1_i1 on ff_finnews_b1 (iex_id)")
    runsql("drop table if exists ff_finnews_b2")
    runsql(
        "create table ff_finnews_b2 as select a.*, b.published, b.published_date, b.iex_source, b.iex_title from ff_finnews_b0 a, ff_finnews_b1 b where a.iex_id=b.iex_id"
    )
    runsql(
        "create index ff_finnews_b2_i1 on ff_finnews_b2 (symbol, iex_id, model_type, term)"
    )
    runsql("drop table if exists ff_finnews_b3")
    runsql(
        "create table ff_finnews_b3 as select symbol, iex_id, model_type, any_value(published) published, sum(if(raw_sense>=0,1,0)) n_up, sum(if(raw_sense<0,1,0)) n_down \
           , max(raw_sense) max_sense, min(raw_sense) min_sense, sum(raw_sense)/(max(raw_sense)-min(raw_sense)) con_sense \
           , avg(if(raw_sense>=0,raw_sense,null)) pos_sense, avg(if(raw_sense<0,raw_sense,null)) neg_sense \
           from ff_finnews_b2 group by symbol, iex_id, model_type")
    runsql(
        "create index ff_finnews_b3_i1 on ff_finnews_b3 (symbol, iex_id, model_type)"
    )
    runsql("drop table if exists ff_finnews_b4")
    runsql(
        "create table ff_finnews_b4 as select symbol, count(distinct iex_id) n_doc from ff_finnews_b3 group by symbol"
    )
    runsql("create index ff_finnews_b4_i1 on ff_finnews_b4 (symbol)")
    runsql("drop table if exists ff_finnews_b5")
    runsql(
        "create table ff_finnews_b5 as select a.symbol, a.iex_id, any_value(published) published, any_value(b.n_doc) n_doc, sum(n_up) n_up, sum(n_down) n_down \
           , sum( if(model_type='L', 1/(1/log10(b.n_doc+2)+1) * con_sense, 1/log10(b.n_doc+2)/(1/log10(b.n_doc+2)+1) * con_sense) ) con_sense \
           , if(sum(n_up)>0,sum(n_up*pos_sense)/sum(n_up),null) pos_sense, if(sum(n_down)>0,sum(n_down*neg_sense)/sum(n_down),null) neg_sense \
           , sum(if(model_type='G',n_up,0)) n_g_up, sum(if(model_type='G',n_down,0)) n_g_down, sum(if(model_type='L',n_up,0)) n_l_up, sum(if(model_type='L',n_down,0)) n_l_down \
           from ff_finnews_b3 a, ff_finnews_b4 b where a.symbol = b.symbol group by symbol, iex_id"
    )
    runsql("drop table if exists ff_finnews_b6")
    runsql(
        "create table ff_finnews_b6 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) bin, a.* from (select a.*, cnt from (select * from ff_finnews_b5) a, (select count(1) cnt from ff_finnews_b5) b order by con_sense) a, (select @rn:=0) b"
    )
    runsql("create index ff_finnews_b6_i1 on ff_finnews_b6 (bin)")
    runsql("drop table if exists ff_finnews_b7")
    runsql(
        "create table ff_finnews_b7 select a.bin, a.min_con, ifnull(b.min_con,(select max(con_sense) from ff_finnews_b6)) max_con from (select bin bin, min(con_sense) min_con from ff_finnews_b6 group by bin) a left join (select bin-1 bin, min(con_sense) min_con from ff_finnews_b6 group by bin-1) b on a.bin=b.bin"
    )
    runsql("drop table if exists ff_finnews_b8")
    runsql(
        "create table ff_finnews_b8 as select a.*, date(published) published_date, (con_sense-b.min_con)/(b.max_con-b.min_con) bin_float from ff_finnews_b6 a, ff_finnews_b7 b where a.bin=b.bin"
    )
    runsql(
        "create index ff_finnews_b8_i1 on ff_finnews_b8 (symbol, published)")
    runsql(
        "create index ff_finnews_b8_i2 on ff_finnews_b8 (symbol, published_date)"
    )
    runsql("drop table if exists ff_stock_date6m")
    runsql("create table ff_stock_date6m as select * from \
           (select adddate('1970-01-01',t4.i*10000 + t3.i*1000 + t2.i*100 + t1.i*10 + t0.i) dest_date from \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t0, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t1, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t2, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t3, \
           (select 0 i union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t4) v \
           where dest_date between date_sub(curdate(), interval 6 month) and curdate()"
           )
    runsql("drop table if exists ff_finnews_b9")
    runsql(
        "create table ff_finnews_b9 as select distinct(symbol) symbol, dest_date from ff_finnews_b6, ff_stock_date6m"
    )
    runsql(
        "create index ff_finnews_b9_i1 on ff_finnews_b9 (symbol, dest_date)")
    runsql("drop table if exists ff_finnews_b10")
    runsql(
        "create table ff_finnews_b10 as select symbol, dest_date, sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50 dense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 dense_base \
           , ifnull((sum(power(0.8,greatest(dlag,0.01))*(bin+bin_float))+0.26*50)/(sum(power(0.8,greatest(dlag,0.01)))+0.26),50) dense_sense \
           , sum(power(0.8,greatest(dlag,0.01))*con_sense) condense_sum, sum(power(0.8,greatest(dlag,0.01)))+0.26 condense_base \
           , ifnull((sum(power(0.8,greatest(dlag,0.01))*con_sense))/(sum(power(0.8,greatest(dlag,0.01)))+0.26),0) condense_sense \
           from (select a.symbol, a.dest_date, b.bin, b.bin_float, round(TIMESTAMPDIFF(HOUR,published,dest_date)/24,2) dlag, con_sense from ff_finnews_b9 a left join ff_finnews_b8 b on a.symbol=b.symbol and DATEDIFF(dest_date, b.published) between 0 and 13) a group by symbol, dest_date"
    )
    runsql(
        "create index ff_finnews_b10_i1 on ff_finnews_b10 (symbol, dest_date)")
    runsql("drop table if exists ff_finnews_b11")
    runsql(
        "create table ff_finnews_b11 as select a.*, a.dense_sense-b.dense_sense dense_diff, a.condense_sense-b.condense_sense condense_diff from \
           (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10) ) a, \
           (select * from ff_finnews_b10 where dest_date = (select max(dest_date) from ff_finnews_b10 where dest_date<(select max(dest_date) from ff_finnews_b10))) b \
           where a.symbol=b.symbol order by abs(dense_diff) desc")
    runsql("create index ff_finnews_b11_i1 on ff_finnews_b11 (symbol)")
    runsql("drop table if exists ff_finnews_b12")
    runsql(
        "create table ff_finnews_b12 as select a.symbol, iex_id, published, dest_date, n_doc, n_up, n_down, con_sense, pos_sense, neg_sense, n_g_up, n_g_down, n_l_up, n_l_down, bin+bin_float bin, dense_sense, bin+bin_float-dense_sense bin_diff, con_sense-condense_sense con_sense_diff from ff_finnews_b8 a, ff_finnews_b10 b where a.symbol=b.symbol and date_add(dest_date,interval 1 day)=published_date"
    )
    runsql(
        "create index ff_finnews_b12_i1 on ff_finnews_b12 (symbol, dest_date)")
    runsql(
        "create index ff_finnews_b12_i2 on ff_finnews_b12 (symbol, published)")
    runsql("create index ff_finnews_b12_i3 on ff_finnews_b12 (symbol, iex_id)")
    runsql("drop table if exists ff_finnews_b13")
    runsql(
        "create table ff_finnews_b13 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dbin, a.* from (select a.*, cnt from (select * from ff_finnews_b12) a, (select count(1) cnt from ff_finnews_b12) b order by bin_diff) a, (select @rn:=0) b"
    )
    runsql("create index ff_finnews_b13_i1 on ff_finnews_b13 (dbin)")
    runsql("drop table if exists ff_finnews_b14")
    runsql(
        "create table ff_finnews_b14 select a.dbin, a.min_con, ifnull(b.min_con,(select max(bin_diff) from ff_finnews_b13)) max_con from (select dbin dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin) a left join (select dbin-1 dbin, min(bin_diff) min_con from ff_finnews_b13 group by dbin-1) b on a.dbin=b.dbin"
    )
    runsql("drop table if exists ff_finnews_b15")
    runsql(
        "create table ff_finnews_b15 as select @rn:=@rn+1 rn, dest_date from (select * from ff_stock_date6m order by dest_date desc) a, (select @rn:=0) b"
    )
    runsql("create index ff_finnews_b15_i1 on ff_finnews_b15 (dest_date)")
    runsql("drop table if exists ff_finnews_b16")
    runsql(
        "create table ff_finnews_b16 as select a.*, c.dest_date dest_ref, a.dense_sense-c.dense_sense dense_diff, a.condense_sense-c.condense_sense condense_diff from ff_finnews_b10 a, ff_finnews_b15 b, ff_finnews_b10 c, ff_finnews_b15 d where a.symbol=c.symbol and a.dest_date=b.dest_date and c.dest_date=d.dest_date and b.rn+1=d.rn"
    )
    runsql(
        "create index ff_finnews_b16_i1 on ff_finnews_b16 (symbol, dest_date)")
    runsql("drop table if exists ff_finnews_b17")
    runsql(
        "create table ff_finnews_b17 as select @rn:=@rn+1 rn, floor(@rn/(cnt+1)*100) dcon_bin, a.* from (select a.*, cnt from (select * from ff_finnews_b16) a, (select count(1) cnt from ff_finnews_b16) b order by condense_diff) a, (select @rn:=0) b"
    )
    runsql("drop table if exists ff_finnews_b18")
    runsql(
        "create table ff_finnews_b18 select a.dcon_bin, a.min_con, ifnull(b.min_con,(select max(condense_diff) from ff_finnews_b17)) max_con from (select dcon_bin dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin) a left join (select dcon_bin-1 dcon_bin, min(condense_diff) min_con from ff_finnews_b17 group by dcon_bin-1) b on a.dcon_bin=b.dcon_bin"
    )
    runsql("drop table if exists ff_finnews_b19")
    runsql(
        "create table ff_finnews_b19 as select rn, symbol, dest_date, dense_sense, condense_sense, dest_ref, dense_diff, condense_diff, a.dcon_bin+(condense_diff-b.min_con)/(b.max_con-b.min_con) dcon_bin from ff_finnews_b17 a, ff_finnews_b18 b where a.dcon_bin=b.dcon_bin"
    )
    print("Done focus sensing " + time.strftime('%Y-%m-%d %H:%M:%S'))
    final_sense(is_incremental=False)
    file_cleanup()
示例#9
0
    if table_exist("ff_stock_adj"):
        runsql("drop table ff_stock_adj")
    runsql(FF_STOCK_ADJ_DDL)
    refresh_sp500()
    if not table_exist("ff_stock_summary_hist"):
        runsql(FF_STOCK_SUMMARY_HIST_DDL)
        runsql(FF_STOCK_SUMMARY_HIST_I1)
        runsql(FF_STOCK_SUMMARY_HIST_I2)
    if not table_exist("ff_stock_summary_term"):
        runsql(FF_STOCK_SUMMARY_TERM_DDL)
        runsql(FF_STOCK_SUMMARY_TERM_I1)


assure_required_tables()

symbols = fetch_rawsql(
    "select symbol from ff_scan_symbols where datediff(now(),last_updated)<60")
symbols = [i['symbol'] for i in symbols]

#symbols = [symbols[0]]
for s in symbols:
    try:
        d = data.DataReader(
            s, 'google',
            cdate - relativedelta(years=10) - relativedelta(months=2),
            datetime.datetime.now())
        d = d.where((pd.notnull(d)), None)
        t1 = d['Close'].values.tolist()
        t2 = d.index.tolist()
        t3 = d['High'].values.tolist()
        t4 = d['Low'].values.tolist()
        t5 = d['Volume'].values.tolist()
示例#10
0
def assure_required_tables():
    if not table_exist("ff_tweets"):
        runsql(FF_TWEETS_DDL)
    if not table_exist("ff_scan_symbols"):
        refresh_sp500()
    r = fetch_rawsql(
        "select min(datediff(now(),last_updated)) d from ff_scan_symbols")[0]
    if r['d'] > 7:
        refresh_sp500()


# Start of task
assure_required_tables()

symbols = fetch_rawsql(
    "select symbol from ff_scan_symbols where datediff(now(),last_updated) < 60 and ifnull(TIMESTAMPDIFF(minute, last_scanned, now()),9999) > 1440 order by last_scanned desc"
)
if len(symbols) == 0:
    sys.exit()

symbol = symbols[0]['symbol']
print("Working on " + symbol + " : " + time.strftime('%Y-%m-%d %H:%M:%S'))
sid = 0
current_time = datetime.datetime.now(datetime.timezone.utc)
#current_time = datetime.datetime.now()

for d in range(6, 0, -1):
    dt = current_time - datetime.timedelta(days=d)
    print("  Fetching up to " + dt.strftime('%Y-%m-%d'))
    tq = t.search.tweets(q="$" + symbol,
                         lang="en",
示例#11
0
        runsql(
            "create index ff_finnews_iex_i3 on ff_finnews_iex (last_updated)")
        runsql("create index ff_finnews_iex_i4 on ff_finnews_iex (published)")


# Start of task
assure_required_tables()

headers = {
    'User-Agent':
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.90 Safari/537.36'
}

C = {}

symbols = fetch_rawsql("select symbol from ff_scan_symbols")
symbols = [i['symbol'] for i in symbols]

#symbols = ['AAPL','AMD','GOOG','NVDA','QCOM']
#symbols = symbols[:5]

n_insert_attempt = 0
n_insert_success = 0
for s in symbols:
    s = s.replace("-", ".")
    try:
        P = requests.get("https://api.iextrading.com/1.0/stock/" + s +
                         "/news/last/100")
        P = P.json()
    except:
        print("[e] IEX error and skipped '" + s + "' ... " +