Пример #1
0
    def test_churn_predictor(self):
        # Arrange
        time = [1453845953 + 20000 * x for x in range(500)]
        user = [1, 2, 3, 4, 5] * 20 + [1, 2, 3, 4] * 25 + [1, 2, 3] * 100
        actions = turicreate.SFrame({
            "user_id": user,
            "timestamp": time,
            "action": [1, 2, 3, 4, 5] * 100,
        })

        def _unix_timestamp_to_datetime(x):
            import datetime

            return datetime.datetime.fromtimestamp(x)

        actions["timestamp"] = actions["timestamp"].apply(
            _unix_timestamp_to_datetime)
        actions = turicreate.TimeSeries(actions, "timestamp")

        # Act
        m = turicreate.churn_predictor.create(actions)
        actual = [x for x in dir(m) if not x.startswith("_")]

        # Assert.
        expected = [
            "categorical_features",
            "evaluate",
            "extract_features",
            "get_feature_importance",
            "churn_period",
            "grace_period",
            "features",
            "get",
            "is_data_aggregated",
            "_list_fields",
            "_list_fields",
            "lookback_periods",
            "model_options",
            "name",
            "num_features",
            "num_observations",
            "num_users",
            "numerical_features",
            "predict",
            "explain",
            "processed_training_data",
            "save",
            "show",
            "summary",
            "time_boundaries",
            "time_period",
            "trained_model",
            "trained_explanation_model",
            "get_churn_report",
            "get_activity_baseline",
            "views",
            "use_advanced_features",
            "user_id",
        ]
        check_visible_modules(actual, expected)
Пример #2
0
    def test_churn_predictor(self):
        # Arrange
        time = [1453845953 + 20000 * x for x in range(500)]
        user = [1,2,3,4,5] * 20 + [1,2,3,4] * 25 + [1,2,3] * 100
        actions = turicreate.SFrame({
            'user_id': user,
            'timestamp': time,
            'action': [1,2,3,4,5] * 100,
        })
        def _unix_timestamp_to_datetime(x):
            import datetime
            return datetime.datetime.fromtimestamp(x)
        actions['timestamp'] = actions['timestamp'].apply(
                                  _unix_timestamp_to_datetime)
        actions = turicreate.TimeSeries(actions, 'timestamp')

        # Act
        m = turicreate.churn_predictor.create(actions)
        actual = [x for x in dir(m) if not x.startswith('_')]

        # Assert.
        expected = ['categorical_features',
                    'evaluate',
                    'extract_features',
                    'get_feature_importance',
                    'churn_period',
                    'grace_period',
                    'features',
                    'get',
                    'is_data_aggregated',
                    '_list_fields',
                    '_list_fields',
                    'lookback_periods',
                    'model_options',
                    'name',
                    'num_features',
                    'num_observations',
                    'num_users',
                    'numerical_features',
                    'predict',
                    'explain',
                    'processed_training_data',
                    'save',
                    'show',
                    'summary',
                    'time_boundaries',
                    'time_period',
                    'trained_model',
                    'trained_explanation_model',
                    'get_churn_report',
                    'get_activity_baseline',
                    'views',
                    'use_advanced_features',
                    'user_id']
        check_visible_modules(actual, expected)
    def get_venue_authors_links_timeseries(self):
        a = self.authors_affilations_sframe["Paper ID", "Author ID"]

        a = self._all_papers_sf.join(a, on="Paper ID")
        a = a['datetime', 'Author ID', 'Paper publish year', 'Paper ID']
        links_sf = a.join(a, on="Paper ID")
        p = self.papers_sframe["Paper ID", "Paper publish year"]

        links_sf.rename({'Author ID': 'src_id', 'Author ID.1': 'dst_id'})
        links_sf = links_sf["src_id", "dst_id", "datetime"]
        links_sf = links_sf[links_sf["src_id"] != links_sf[
            "dst_id"]]  # because this is a direct network we keep for each link both (u,v) and (v,u)
        return tc.TimeSeries(links_sf, index="datetime")
    def get_venue_authors_timeseries(self):

        p = self._all_papers_sf["Paper ID", "Paper publish year"]
        a = self.authors_affilations_sframe["Paper ID", "Author ID"]
        sf = p.join(a, on="Paper ID")["Author ID", "Paper publish year"]
        sf = sf.groupby(
            "Author ID", {
                "mindate": agg.MIN("Paper publish year"),
                "maxdate": agg.MAX("Paper publish year")
            })
        sf.rename({"Author ID": "v_id"})
        sf["mindate"] = sf["mindate"].apply(
            lambda y: datetime(year=y, month=1, day=1))
        sf["maxdate"] = sf["maxdate"].apply(
            lambda y: datetime(year=y, month=1, day=1))

        if sf.num_rows() == 0:
            return None

        return tc.TimeSeries(sf, index="mindate")