Пример #1
0
    def get_queryset(self, **kwargs):
        # user never run this view before?
        if kwargs == {}:
            print("WARNING: no form parameters specified - returning empty queryset")
            return Quotation.objects.none()

        self.sector = kwargs.get("sector", self.sector)
        self.sector_id = int(Sector.objects.get(sector_name=self.sector).sector_id)
        wanted_stocks = all_sector_stocks(self.sector)
        print("Found {} stocks matching sector={}".format(len(wanted_stocks), self.sector))
        mrd = latest_quotation_date('ANZ')
        report_top_n = kwargs.get('report_top_n', None)
        report_bottom_n = kwargs.get('report_bottom_n', None)
        if report_top_n is not None or report_bottom_n is not None:
            cip_sum = selected_cached_stocks_cip(wanted_stocks, Timeframe(past_n_days=90)).transpose().sum().to_frame(name="percent_cip")
            #print(cip_sum)
            top_N = set(cip_sum.nlargest(report_top_n, "percent_cip").index) if report_top_n is not None else set()
            bottom_N = set(cip_sum.nsmallest(report_bottom_n, "percent_cip").index) if report_bottom_n is not None else set()
            wanted_stocks = top_N.union(bottom_N)
        print("Requesting valid quotes for {} stocks".format(len(wanted_stocks)))
        self.qs = valid_quotes_only(mrd).filter(asx_code__in=wanted_stocks)
        if len(self.qs) < len(wanted_stocks):
            got = set([q.asx_code for q in self.qs.all()])
            missing_stocks = wanted_stocks.difference(got)
            warning(self.request, f"could not obtain quotes for all stocks as at {mrd}: {missing_stocks}")
        return self.qs
Пример #2
0
def analyse_sector(stock, sector, all_stocks_cip, window_size=14):
    assert all_stocks_cip is not None

    sector_companies = all_sector_stocks(sector) if sector else [] # ETFs dont have a sector for now...
    if len(sector_companies) > 0:
       cip = all_stocks_cip.filter(items=sector_companies, axis='index')
       cip = cip.fillna(0.0)
       #assert len(cip) == len(sector_companies) # may fail when some stocks missing due to delisted etc.
       rows = []
       cum_sum = defaultdict(float)
       stock_versus_sector = []
       # identify the best performing stock in the sector and add it to the stock_versus_sector rows...
       best_stock_in_sector = cip.sum(axis=1).nlargest(1).index[0]
       for day in sorted(cip.columns, key=lambda k: datetime.strptime(k, "%Y-%m-%d")):
           for asx_code, daily_change in cip[day].iteritems():
               cum_sum[asx_code] += daily_change
           n_pos = len(list(filter(lambda t: t[1] >= 5.0, cum_sum.items())))
           n_neg = len(list(filter(lambda t: t[1] < -5.0, cum_sum.items())))
           n_unchanged = len(cip) - n_pos - n_neg
           rows.append({ 'n_pos': n_pos, 'n_neg': n_neg, 'n_unchanged': n_unchanged, 'date': day})
           stock_versus_sector.append({ 'group': stock, 'date': day, 'value': cum_sum[stock] })
           stock_versus_sector.append({ 'group': 'sector_average', 'date': day, 'value': pd.Series(cum_sum).mean() })
           if stock != best_stock_in_sector:
               stock_versus_sector.append({ 'group': '{} (best in {})'.format(best_stock_in_sector, sector), 'value': cum_sum[best_stock_in_sector], 'date': day})
       df = pd.DataFrame.from_records(rows)

       sector_momentum_plot = plot_sector_performance(df, sector, window_size=window_size)
       stock_versus_sector_df = pd.DataFrame.from_records(stock_versus_sector)
       c_vs_s_plot = plot_company_versus_sector(stock_versus_sector_df, stock, sector)
       point_score_plot = analyse_point_scores(stock, sector_companies, all_stocks_cip)
    else:
       c_vs_s_plot = sector_momentum_plot = point_score_plot = None

    return c_vs_s_plot, sector_momentum_plot, point_score_plot
Пример #3
0
def show_sector_outliers(request, sector_id=None, n_days=30):
    validate_user(request.user)
    assert isinstance(sector_id, int) and sector_id > 0

    stocks = all_sector_stocks(
        Sector.objects.get(sector_id=sector_id).sector_name)
    return show_outliers(request, stocks, n_days=n_days)
Пример #4
0
def test_all_sectors(all_sector_fixture):
    # since company_details_factory gives a single ANZ company details record, this test will work...
    ret = all_sectors()
    #print(ret)
    assert ret == [('Financials', 'Financials')]
    # and check the reverse is true: financials -> ANZ
    all_sector_stocks.cache_clear()
    assert all_sector_stocks('Financials') == set(['ANZ'])
Пример #5
0
    def recalc_queryset(self, **kwargs):
        if kwargs == {} or not any(
            ["name" in kwargs, "activity" in kwargs, "sector" in kwargs]):
            return Quotation.objects.none()

        wanted_name = kwargs.get("name", "")
        wanted_activity = kwargs.get("activity", "")
        if len(wanted_name) > 0 or len(wanted_activity) > 0:
            matching_companies = find_named_companies(wanted_name,
                                                      wanted_activity)
        else:
            matching_companies = all_stocks()
        sector = kwargs.get("sector", self.DEFAULT_SECTOR)
        sector_id = int(Sector.objects.get(sector_name=sector).sector_id)
        sector_stocks = all_sector_stocks(sector)
        if kwargs.get("sector_enabled", False):
            matching_companies = matching_companies.intersection(sector_stocks)
        print("Found {} companies matching: name={} or activity={}".format(
            len(matching_companies), wanted_name, wanted_activity))

        report_top_n = kwargs.get("report_top_n", None)
        report_bottom_n = kwargs.get("report_bottom_n", None)
        self.timeframe = Timeframe(past_n_days=90)
        ld = LazyDictionary()
        ld["sector"] = sector
        ld["sector_id"] = sector_id
        ld["sector_companies"] = sector_stocks
        if len(matching_companies) > 0:
            ld["cip_df"] = selected_cached_stocks_cip(matching_companies,
                                                      self.timeframe)
        else:
            ld["cip_df"] = pd.DataFrame()
        ld["sector_performance_df"] = lambda ld: make_sector_performance_dataframe(
            ld["cip_df"], ld["sector_companies"])
        ld["sector_performance_plot"] = lambda ld: self.sector_performance(ld)
        self.ld = ld
        wanted_stocks = self.filter_top_bottom(ld, matching_companies,
                                               report_top_n, report_bottom_n)

        print("Requesting valid quotes for {} stocks".format(
            len(wanted_stocks)))
        quotations_as_at, actual_mrd = valid_quotes_only(
            "latest", ensure_date_has_data=True)
        ret = quotations_as_at.filter(asx_code__in=wanted_stocks)
        if len(ret) < len(wanted_stocks):
            got = set([q.asx_code
                       for q in self.qs.all()]) if self.qs else set()
            missing_stocks = wanted_stocks.difference(got)
            warning(
                self.request,
                f"could not obtain quotes for all stocks as at {actual_mrd}: {missing_stocks}",
            )

        print("Showing results for {} companies".format(
            len(matching_companies)))
        ret, _ = latest_quote(tuple(matching_companies))
        return ret
Пример #6
0
def get_dataset(dataset_wanted, request, timeframe=None):
    assert (dataset_wanted in set(["market_sentiment", "eps-per-sector"])
            or dataset_wanted.startswith("kmeans-")
            or dataset_wanted.startswith("financial-metrics-")
            or dataset_wanted.startswith("stock-quotes-"))

    if timeframe is None:
        timeframe = Timeframe(past_n_days=300)

    if dataset_wanted == "market_sentiment":
        df = cached_all_stocks_cip(timeframe)
        return df
    elif dataset_wanted == "kmeans-watchlist":
        _, _, _, _, df = make_kmeans_cluster_dataframe(
            timeframe, 7, user_watchlist(request.user))
        return df
    elif dataset_wanted == "kmeans-etfs":
        _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7,
                                                       all_etfs())
        return df
    elif dataset_wanted.startswith("stock-quotes-"):
        stock = dataset_wanted[len("stock-quotes-"):]
        validate_stock(stock)
        df = company_prices([stock],
                            timeframe=timeframe,
                            fields=all_stock_fundamental_fields,
                            missing_cb=None)
        df['stock_code'] = stock
        return df
    elif dataset_wanted.startswith("kmeans-sector-"):
        sector_id = int(dataset_wanted[14:])
        sector = Sector.objects.get(sector_id=sector_id)
        if sector is None or sector.sector_name is None:
            raise Http404("No stocks associated with sector")
        asx_codes = all_sector_stocks(sector.sector_name)
        _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, asx_codes)
        return df
    elif dataset_wanted.startswith("financial-metrics-"):
        stock = dataset_wanted[len("financial-metrics-"):]
        validate_stock(stock)
        df = financial_metrics(stock)
        if df is not None:
            # excel doesnt support timezones, so we remove it first
            colnames = [d.strftime("%Y-%m-%d") for d in df.columns]
            df.columns = colnames
            # FALLTHRU
        return df
    elif dataset_wanted == "eps-per-sector":
        df, _ = pe_trends_df(Timeframe(past_n_days=180))
        df = make_pe_trends_eps_df(df, stocks_by_sector())
        df = df.set_index("asx_code", drop=True)
        return df
    else:
        raise ValueError("Unsupported dataset {}".format(dataset_wanted))
Пример #7
0
def test_all_sectors(
    all_sector_fixture,
):  # pylint: disable=unused-argument,redefined-outer-name
    # since company_details_factory gives a single ANZ company details record, this test will work...
    ret = all_sectors()
    # print(ret)
    assert ret == [("Financials", "Financials")]
    # and check the reverse is true: financials -> ANZ
    all_sector_stocks.cache_clear()
    stocks_by_sector.cache_clear()
    assert all_sector_stocks("Financials") == set(["ANZ"])
Пример #8
0
def filter_stocks_to_search(request, what_to_search: str) -> set:
    assert request is not None
    assert len(what_to_search) > 0

    if what_to_search == "all_stocks":
        stocks_to_consider = all_stocks()
    elif what_to_search == "watchlist":
        stocks_to_consider = user_watchlist(request.user)
    else:
        #print(what_to_search)
        stocks_to_consider = all_sector_stocks(what_to_search)
    return stocks_to_consider
Пример #9
0
 def form_valid(self, form):
     sector = form.cleaned_data.get('sector', "Communication Services")
     norm_method = form.cleaned_data.get('normalisation_method', None)
     n_days = form.cleaned_data.get('n_days', 30)
     stocks = all_sector_stocks(sector)
     timeframe = Timeframe(past_n_days=n_days)
     cip = selected_cached_stocks_cip(stocks, timeframe)
     context = self.get_context_data()
     boxplot, winner_results = plot_boxplot_series(cip, normalisation_method=norm_method)
     context.update({
         'title': "Past {} day sector performance: box plot trends".format(n_days),
         'n_days': n_days,
         'sector': sector,
         'plot': boxplot,
         'winning_stocks': winner_results
     })
     return render(self.request, self.template_name, context)
Пример #10
0
    def form_valid(self, form):
        sector = form.cleaned_data.get("sector", "Communication Services")
        norm_method = form.cleaned_data.get("normalisation_method", None)
        n_days = form.cleaned_data.get("n_days", 30)
        ld = LazyDictionary()
        ld["stocks"] = lambda ld: all_sector_stocks(sector)
        ld["timeframe"] = Timeframe(past_n_days=n_days)
        ld["cip_df"] = lambda ld: selected_cached_stocks_cip(
            ld["stocks"], ld["timeframe"])

        context = self.get_context_data()

        def winner_results(df: pd.DataFrame) -> list:
            # compute star performers: those who are above the mean on a given day counted over all days
            count = defaultdict(int)
            avg = df.mean(axis=0)
            for col in df.columns:
                winners = df[df[col] > avg[col]][col]
                for winner in winners.index:
                    count[winner] += 1
            results = []
            for asx_code, n_wins in count.items():
                x = df.loc[asx_code].sum()
                # avoid "dead cat bounce" stocks which fall spectacularly and then post major increases in percentage terms
                if x > 0.0:
                    results.append((asx_code, n_wins, x))
            return list(reversed(sorted(results, key=lambda t: t[2])))

        context.update({
            "title":
            "Past {} day sector performance: box plot trends".format(n_days),
            "n_days":
            n_days,
            "sector":
            sector,
            "plot_uri":
            cache_plot(
                f"{sector}-recent-sector-view-{ld['timeframe'].description}-{norm_method}",
                lambda ld: plot_boxplot_series(
                    ld["cip_df"], normalisation_method=norm_method),
                datasets=ld,
            ),
            "winning_stocks":
            winner_results(ld["cip_df"]),
        })
        return render(self.request, self.template_name, context)
Пример #11
0
def analyse_sector_performance(stock,
                               sector,
                               all_stocks_cip,
                               window_size=10) -> tuple:
    assert isinstance(stock, str)
    assert isinstance(all_stocks_cip, pd.DataFrame)

    if sector is not None:  # not an ETF? ie. sector information available?
        sector_companies = all_sector_stocks(sector)
        c_vs_s_plot, sector_momentum_plot = analyse_sector(
            stock,
            sector,
            sector_companies,
            all_stocks_cip,
            window_size=window_size)
        return c_vs_s_plot, sector_momentum_plot, sector_companies
    else:
        return (None, None, None)
Пример #12
0
 def stocks(self):
     if self.sector is None:
         self.sector = "Information Technology"
     return sorted(all_sector_stocks(self.sector))
Пример #13
0
def is_valid_sector(value):
    assert value is not None
    return len(all_sector_stocks(value)) > 0
Пример #14
0
def cluster_stocks_view(request, stocks: str):
    """
    ref: https://pythonforfinance.net/2018/02/08/stock-clusters-using-k-means-algorithm-in-python/
    """
    validate_user(request.user)
    timeframe = Timeframe(past_n_days=300)
    if stocks == "watchlist":
        asx_codes = user_watchlist(request.user)
    elif stocks == "etfs":
        asx_codes = all_etfs()
    elif stocks.startswith("sector-"):
        sector_id = int(stocks[7:])
        sector = Sector.objects.get(sector_id=sector_id)
        if sector is None or sector.sector_name is None:
            raise Http404("No stocks associated with sector")
        asx_codes = all_sector_stocks(sector.sector_name)
    else:
        raise Http404("Unknown stock list {}".format(stocks))
    chosen_k = 7  # often a reasonable tradeoff

    def elbow_curve_plot(ld: LazyDictionary):
        distortion, _, _, _, _ = make_kmeans_cluster_dataframe(
            timeframe, chosen_k, asx_codes
        )
        fig = plt.figure(figsize=(15, 5))
        plt.plot(range(2, 20), distortion)
        plt.grid(True)
        plt.title("Elbow curve")
        return fig

    def cluster_plot(ld: LazyDictionary):
        _, _, centroids, idx, data_df = make_kmeans_cluster_dataframe(
            timeframe, chosen_k, asx_codes
        )
        centroids_df = pd.DataFrame.from_records(
            centroids, columns=["return", "volatility"]
        )
        plot = (
            p9.ggplot(
                data_df, p9.aes("return", "volatility", colour="factor(cluster_id)")
            )
            + p9.geom_point(size=3)
            + p9.facet_wrap("~cluster_id", ncol=3, scales="free")
        )
        return user_theme(
            plot,
            x_axis_label="Returns (%)",
            y_axis_label="Volatility (%)",
            figure_size=(15, 15),
            subplots_adjust={"hspace": 0.15, "wspace": 0.15},
        )

    stocks_as_str = "-".join(sorted(asx_codes))
    elbow_curve_uri = cache_plot(
        f"{request.user.username}-cluster-{stocks_as_str}-elbow-curve-plot",
        elbow_curve_plot,
    )
    cluster_uri = cache_plot(
        f"{request.user.username}-cluster-{stocks_as_str}-kmeans-cluster-plot",
        cluster_plot,
    )
    context = {
        "elbow_curve_plot_uri": elbow_curve_uri,
        "k": chosen_k,
        "dataset": stocks,
        "n_stocks": len(asx_codes),
        "cluster_plot_uri": cluster_uri,
        "timeframe": timeframe,
    }
    return render(request, "cluster_stocks.html", context=context)