Пример #1
0
def test_day_low_high(quotation_fixture):
    # NB: dates and stock must correspond to quotation_fixture
    df = day_low_high('ABC', all_dates=['2021-01-01', '2021-01-02', '2021-01-03'])
    assert set(df.columns) == set(['day_low_price', 'day_high_price', 'last_price', 'volume', 'date'])
    result = pd.Series(df['day_high_price'] - df['day_low_price'])
    expected_result = pd.Series({"2021-01-01": 0.3, "2021-01-02": 0.30, "2021-01-03": 0.3})
    pd.testing.assert_series_equal(result, expected_result, check_names=False)
Пример #2
0
def analyse_point_scores(stock: str, sector_companies, all_stocks_cip: pd.DataFrame, rules=None):
    """
    Visualise the stock in terms of point scores as described on the stock view page. Rules to apply
    can be specified by rules (default rules are provided by rule_*())

    Points are lost for equivalent downturns and the result plotted. All rows in all_stocks_cip will be
    used to calculate the market average on a given trading day, whilst only sector_companies will
    be used to calculate the sector average. A utf-8 base64 encoded plot image is returned
    """
    assert len(stock) >= 3
    assert all_stocks_cip is not None
    if rules is None:
        rules = default_point_score_rules()
    rows = []
    points = 0
    day_low_high_df = day_low_high(stock, all_dates=all_stocks_cip.columns)
    state = { 'day_low_high_df': day_low_high_df,  # never changes each day, so we init it here
              'all_stocks_change_in_percent_df': all_stocks_cip,
              'stock': stock,
              'daily_range_threshold': 0.20, # 20% at either end of the daily range gets a point
            }
    for date in all_stocks_cip.columns:
        market_avg = all_stocks_cip[date].mean()
        sector_avg = all_stocks_cip[date].filter(items=sector_companies).mean()
        stock_move = all_stocks_cip.at[stock, date]
        state.update({ 'market_avg': market_avg, 'sector_avg': sector_avg,
                       'stock_move': stock_move, 'date': date })
        points += sum(map(lambda r: r(state), rules))
        rows.append({ 'points': points, 'stock': stock, 'date': date })

    df = pd.DataFrame.from_records(rows)
    df['date'] = pd.to_datetime(df['date'])
    point_score_plot = plot_series(df, x='date', y='points')
    return point_score_plot
Пример #3
0
def plot_point_scores(stock: str, sector_companies, all_stocks_cip: pd.DataFrame, rules):
    """
    Visualise the stock in terms of point scores as described on the stock view page. Rules to apply
    can be specified by rules (default rules are provided by rule_*())

    Points are lost for equivalent downturns and the result plotted. All rows in all_stocks_cip will be
    used to calculate the market average on a given trading day, whilst only sector_companies will
    be used to calculate the sector average. A utf-8 base64 encoded plot image is returned
    """
    assert len(stock) >= 3
    assert all_stocks_cip is not None
    assert rules is not None and len(rules) > 0

    rows = []
    points = 0
    day_low_high_df = day_low_high(stock, all_dates=all_stocks_cip.columns)
    state = {
        "day_low_high_df": day_low_high_df,  # never changes each day, so we init it here
        "all_stocks_change_in_percent_df": all_stocks_cip,
        "stock": stock,
        "daily_range_threshold": 0.20,  # 20% at either end of the daily range gets a point
    }
    net_points_by_rule = defaultdict(int)
    for date in all_stocks_cip.columns:
        market_avg = all_stocks_cip[date].mean()
        sector_avg = all_stocks_cip[date].filter(items=sector_companies).mean()
        stock_move = all_stocks_cip.at[stock, date]
        state.update(
            {
                "market_avg": market_avg,
                "sector_avg": sector_avg,
                "stock_move": stock_move,
                "date": date,
            }
        )
        points += sum(map(lambda r: r(state), rules))
        for r in rules:
            k = r.__name__
            if k.startswith("rule_"):
                k = k[5:]
            net_points_by_rule[k] += r(state)
        rows.append({"points": points, "stock": stock, "date": date})

    df = pd.DataFrame.from_records(rows)
    df["date"] = pd.to_datetime(df["date"])
    point_score_plot = plot_series(df, x="date", y="points")

    rows = []
    for k, v in net_points_by_rule.items():
        rows.append({"rule": str(k), "net_points": v})
    df = pd.DataFrame.from_records(rows)
    net_rule_contributors_plot = (
        p9.ggplot(df, p9.aes(x="rule", y="net_points"))
        + p9.labs(x="Rule", y="Contribution to points by rule")
        + p9.geom_bar(stat="identity")
        + p9.theme(axis_text_y=p9.element_text(size=7), subplots_adjust={"left": 0.2})
        + p9.coord_flip()
    )
    return point_score_plot, plot_as_inline_html_data(net_rule_contributors_plot)
Пример #4
0
def test_day_low_high(
    quotation_fixture,
):  # pylint: disable=unused-argument,redefined-outer-name
    # NB: dates and stock must correspond to quotation_fixture
    df = day_low_high("ABC", all_dates=["2021-01-01", "2021-01-02", "2021-01-03"])
    assert set(df.columns) == set(
        ["day_low_price", "day_high_price", "last_price", "volume", "date"]
    )
    result = pd.Series(df["day_high_price"] - df["day_low_price"])
    expected_result = pd.Series(
        {"2021-01-01": 0.3, "2021-01-02": 0.30, "2021-01-03": 0.3}
    )
    pd.testing.assert_series_equal(result, expected_result, check_names=False)
Пример #5
0
def detect_outliers(stocks: list, all_stocks_cip: pd.DataFrame, rules=None):
    """
    Returns a dataframe describing those outliers present in stocks based on the provided rules.
    """
    if rules is None:
        rules = default_point_score_rules()
    str_rules = { str(r):r for r in rules }
    rows = []
    stocks_by_sector_df = stocks_by_sector() # NB: ETFs in watchlist will have no sector
    stocks_by_sector_df.index = stocks_by_sector_df['asx_code']
    for stock in stocks:
        #print("Processing stock: ", stock)
        try:
           sector = stocks_by_sector_df.at[stock, 'sector_name']
           sector_companies = list(stocks_by_sector_df.loc[stocks_by_sector_df['sector_name'] == sector].asx_code)
           # day_low_high() may raise KeyError when data is currently being fetched, so it appears here...
           day_low_high_df = day_low_high(stock, all_stocks_cip.columns)
        except KeyError:
           warning(None, "Unable to locate watchlist entry: {} - continuing without it".format(stock))
           continue
        state = {
            'day_low_high_df': day_low_high_df,  # never changes each day, so we init it here
            'all_stocks_change_in_percent_df': all_stocks_cip,
            'stock': stock,
            'daily_range_threshold': 0.20, # 20% at either end of the daily range gets a point
        }
        points_by_rule = defaultdict(int)
        for date in all_stocks_cip.columns:
            market_avg = all_stocks_cip[date].mean()
            sector_avg = all_stocks_cip[date].filter(items=sector_companies).mean()
            stock_move = all_stocks_cip.at[stock, date]
            state.update({ 'market_avg': market_avg, 'sector_avg': sector_avg,
                           'stock_move': stock_move, 'date': date })
            for rule_name, rule in str_rules.items():
                points_by_rule[rule_name] += rule(state)
        d = { 'stock': stock }
        d.update(points_by_rule)
        rows.append(d)
    df = pd.DataFrame.from_records(rows)
    df = df.set_index('stock')
    print(df)
    from pyod.models.iforest import IForest
    clf = IForest()
    clf.fit(df)
    scores = clf.predict(df)
    results = [row[0] for row, value in zip(df.iterrows(), scores) if value > 0]
    #print(results)
    print("Found {} outlier stocks".format(len(results)))
    return results
Пример #6
0
def detect_outliers(stocks: list, all_stocks_cip: pd.DataFrame, rules=None):
    """
    Returns a dataframe describing those outliers present in stocks based on the provided rules.
    All_stocks_cip is the "change in percent" for at least the stocks present in the specified list
    """
    if rules is None:
        rules = default_point_score_rules()
    str_rules = {str(r): r for r in rules}
    rows = []
    stocks_by_sector_df = (stocks_by_sector()
                           )  # NB: ETFs in watchlist will have no sector
    stocks_by_sector_df.index = stocks_by_sector_df["asx_code"]
    for stock in stocks:
        # print("Processing stock: ", stock)
        try:
            sector = stocks_by_sector_df.at[stock, "sector_name"]
            sector_companies = list(stocks_by_sector_df.loc[
                stocks_by_sector_df["sector_name"] == sector].asx_code)
            # day_low_high() may raise KeyError when data is currently being fetched, so it appears here...
            day_low_high_df = day_low_high(stock, all_stocks_cip.columns)
        except KeyError:
            warning(
                None,
                "Unable to locate watchlist entry: {} - continuing without it".
                format(stock),
            )
            continue
        state = {
            "day_low_high_df":
            day_low_high_df,  # never changes each day, so we init it here
            "all_stocks_change_in_percent_df": all_stocks_cip,
            "stock": stock,
            "daily_range_threshold":
            0.20,  # 20% at either end of the daily range gets a point
        }
        points_by_rule = defaultdict(int)
        for date in all_stocks_cip.columns:
            market_avg = all_stocks_cip[date].mean()
            sector_avg = all_stocks_cip[date].filter(
                items=sector_companies).mean()
            stock_move = all_stocks_cip.at[stock, date]
            state.update({
                "market_avg": market_avg,
                "sector_avg": sector_avg,
                "stock_move": stock_move,
                "date": date,
            })
            for rule_name, rule in str_rules.items():
                try:
                    points_by_rule[rule_name] += rule(state)
                except TypeError:  # handle nan's in dataset safely
                    pass
        d = {"stock": stock}
        d.update(points_by_rule)
        rows.append(d)
    df = pd.DataFrame.from_records(rows)
    df = df.set_index("stock")
    # print(df)
    clf = IForest()
    clf.fit(df)
    scores = clf.predict(df)
    results = [
        row[0] for row, value in zip(df.iterrows(), scores) if value > 0
    ]
    # print(results)
    print("Found {} outlier stocks".format(len(results)))
    return results