class DataFrameServiceTest(unittest.TestCase):

    def setUp(self):
        self.sut = DataFrameService()

    def test_getWordCorrelations(self):
        df = self.sut.getWordCorrelations("modi")
        self.assertTrue(len(df.columns)  > 1)

    def test_getWordDates(self):
        df = self.sut.getWordDates("coronavirus")
        self.assertTrue(len(df.columns) , 1)

    def test_getWordDatesMultiIndex(self):
        df = self.sut.getWordDates("coronavirus")
        idx = pd.IndexSlice
Пример #2
0
 def _update_checklist(self, input_value):
     word = input_value.lower()
     df = DataFrameService().getWordDates(word)
     options = [{"label": c, 'value': c} for c in df["channel_id"].unique()]
     options.append({
         "label": WordTrend.HTML_IDS.CHECKLIST_ALL,
         'value': WordTrend.HTML_IDS.CHECKLIST_ALL
     })
     return options
Пример #3
0
    def _filteredDf(self, checklist, word):
        df = DataFrameService().getWordDates(word)

        if df is None:
            return None

        if WordTrend.HTML_IDS.CHECKLIST_ALL in checklist:
            # if "All" selected
            checklist = df["channel_id"].unique()

        df = df[df["channel_id"].isin(checklist)]

        df["rolling"] = df.groupby([
            "channel_id"
        ])["mean_prop"].transform(lambda x: x.rolling(14, 1).mean())
        #df["rolling"] = df["mean_prop"]
        df["date"] = pd.to_datetime(df["date"], format="%m_%d_%y")

        return df
Пример #4
0
    def _filteredDf(self, checklist, selected_words):
        dfs = []
        for word in selected_words:
            df = DataFrameService().getWordDates(word)
            df["word"] = word
            dfs.append(df)

        if len(dfs) == 0:
            return None

        df = pd.concat(dfs)

        if WordProportion.HTML_IDS.CHECKLIST_ALL in checklist:
            # if "All" selected
            checklist = df["channel_id"].unique()

        df = df[df["channel_id"].isin(checklist)]
        df = df.groupby(["channel_id",
                         "word"]).agg(overall_prop=("mean_prop",
                                                    "mean")).reset_index()

        return df
Пример #5
0
    def _filteredDf(self, checklist, word):
        # dfs = []
        # for word in selected_words:
        #
        #     df["word"] = word
        #     dfs.append(df)
        #
        # df = pd.concat(dfs)
        df = DataFrameService().getWordCorrelations(word)

        if WordProportion.HTML_IDS.CHECKLIST_ALL in checklist:
            # if "All" selected
            checklist = df["channel_id"].unique()

        df = df[df["channel_id"].isin(checklist)]

        df["abs_corr"] = np.abs(df["corr"])
        df = df.groupby(["channel_id"]).apply(lambda x: x.sort_values(
            ["abs_corr"], ascending=False).head(10)).reset_index(drop=True)
        df = df[df["index"] != word]
        #print(df)
        return df
Пример #6
0
 def getDataFrameMethod(self):
     return DataFrameService().getWordDates
 def setUp(self):
     self.sut = DataFrameService()
Пример #8
0
 def setupOptions(self):
     df = DataFrameService().getWordDates(self.default_word)
     if df.shape[0] > 0:
         self.colors = ColorPalette.mapRandomColors(df["channel_id"],
                                                    THEME.COLOR)