Пример #1
0
 def write_prediction_to_table(dolt_context: DoltDT, reviews, labels,
                               predictions, table_name):
     predictions = pd.Series(predictions).rename('predictions')
     result = pd.concat([reviews, labels, predictions], axis=1)
     dolt_context.write_table(table_name=table_name,
                              df=result,
                              pks=['review'])
Пример #2
0
    def stats(self):
        train_score = self.model.score(self.train_bigram, self.train_labels)
        print("Train Score {}".format(round(train_score, 2)))

        train_predictions = self.model.predict(self.train_bigram)

        test_score = self.model.score(self.test_bigram, self.test_labels)
        print("Test Score {}".format(round(test_score, 2)))

        test_predictions = self.model.predict(self.test_bigram)

        # Output the predictions to a result table
        def write_prediction_to_table(dolt_context: DoltDT, reviews, labels,
                                      predictions, table_name):
            predictions = pd.Series(predictions).rename('predictions')
            result = pd.concat([reviews, labels, predictions], axis=1)
            dolt_context.write_table(table_name=table_name,
                                     df=result,
                                     pks=['review'])

        with DoltDT(run=self,
                    doltdb_path=self.doltdb_path,
                    branch='vinai/add-rotten-data') as dolt:
            write_prediction_to_table(dolt, self.train_reviews,
                                      self.train_labels, train_predictions,
                                      "train_results")
            write_prediction_to_table(dolt, self.test_reviews,
                                      self.test_labels, test_predictions,
                                      "test_results")

        self.next(self.end)
Пример #3
0
    def middle(self):
        with DoltDT(run=self) as dolt:

            df = self.inp1 + self.inp2

            dolt.write_table(table_name='baz', df=df, pks=['index'])

        self.next(self.end)
Пример #4
0
    def middle(self):
        with DoltDT(run=self, database='foo', branch="master") as dolt:
            df = self.df
            df["B"] = df["B"].map(lambda x: x * 2)

            dolt.write_table(table_name='baz', df=df, pks=['index'])

        self.next(self.end)
Пример #5
0
    def add_random(self):
        import random

        with DoltDT(run=self, doltdb_path='metaflow_movies') as dolt:
            self.df['gross'] = self.df['gross'] + random.randint(1, 1000000)

            dolt.write_table(table_name='movies',
                             df=self.df,
                             pks=['movie_title'])

        self.next(self.end)
Пример #6
0
    def start(self):
        flow, run = self.flow_dep.split("/")
        d = DoltRun(flow_name=flow, run_id=run)
        f_input = d.reads[0]
        f_output = d.writes[0]
        with DoltDT(run=self) as dolt:
            self.inp1 = dolt.read_table(f_input.table_name,
                                        commit=f_input.commit)
            self.inp2 = dolt.read_table(f_output.table_name,
                                        commit=f_output.commit)

        self.next(self.middle)
Пример #7
0
    def start(self):
        with DoltDT(run=self, database='foo', branch="master") as dolt:
            self.df = dolt.read_table('bar')

        first_run = Flow("SucceedsFirstDemo").latest_successful_run
        first_run_ts = datetime.datetime.strptime(first_run.finished_at,
                                                  "%Y-%m-%dT%H:%M:%SZ")
        one_minute_ago = datetime.datetime.now() + datetime.timedelta(
            hours=8) - datetime.timedelta(minutes=1)
        if first_run_ts < one_minute_ago:
            raise Exception(
                "Run `FirstDemo` within one minute of `SecondDemo`")

        self.next(self.middle)
Пример #8
0
    def start(self):
        with DoltDT(run=self,
                    doltdb_path=self.doltdb_path,
                    branch='vinai/add-rotten-data') as dolt:
            self.train_table = dolt.read_table('reviews_train')
            self.test_table = dolt.read_table('reviews_test')

            # Split the train and test into matrices and labels
            self.train_reviews = self.train_table['review']
            self.train_labels = self.train_table['sentiment']

            self.test_reviews = self.test_table['review']
            self.test_labels = self.test_table['sentiment']

        self.next(self.bigram_representation)
Пример #9
0
    def predict(self):
        with DoltDT(run=self, doltdb_path='iris-model-results') as dolt:
            self.model = pickle.load(open('model.p', 'rb'))
            self.model_type = 'Decision Tree'

            samples = self.test_set['sample']
            y_true = self.test_set['species']
            y_true = y_true.rename('labels')

            test = self.test_set.drop(columns=['species', 'sample'])
            predictions = pd.Series(self.model.predict(test))
            predictions = predictions.rename('predictions')

            self.result = pd.concat([samples, y_true, predictions], axis=1)

            dolt.write_table(table_name='result', df=self.result, pks=['sample'])

        self.next(self.end)
Пример #10
0
    def start(self):
        with DoltDT(run=self) as dolt:
            self.df = dolt.read_table('bar')

        self.next(self.middle)
Пример #11
0
    def start(self):
        # Start by getting original dataset
        with DoltDT(run=self, database='iris-test') as dolt:
            self.test_set = dolt.read_table('iris-test')

        self.next(self.predict)
Пример #12
0
    def start(self):
        with DoltDT(run=self, database='foo', branch="master") as dolt:
            self.df = dolt.read_table('bar', commit=self.bar_version)

        self.next(self.middle)
Пример #13
0
 def end(self):
     with DoltDT(run=self, doltdb_path='iris-model-results') as dolt:
         dolt.commit_table_writes()
Пример #14
0
 def end(self):
     with DoltDT(run=self, doltdb_path='metaflow_demo') as dolt:
         dolt.commit_table_writes()