Пример #1
0
    def start(self):
        """
        Use the Metaflow client to retrieve the latest successful run from our
        MovieStatsFlow and assign them as data artifacts in this flow.

        This step uses 'conda' to isolate the environment. This step will
        always use pandas==1.3.3 regardless of what is installed on the
        system.

        """
        # Load the analysis from the MovieStatsFlow.
        from metaflow import Flow, get_metadata

        # Print metadata provider
        print("Using metadata provider: %s" % get_metadata())

        # Load the analysis from the MovieStatsFlow.
        run = Flow("MovieStatsFlow").latest_successful_run
        print("Using analysis from '%s'" % str(run))

        # Get the dataframe from the start step before we sliced into into
        # genre specific dataframes.
        self.dataframe = run["start"].task.data.dataframe

        # Also grab the summary statistics.
        self.genre_stats = run.data.genre_stats

        # Compute our two recommendation types in parallel.
        self.next(self.bonus_movie, self.genre_movies)
Пример #2
0
    def start(self):
        """
        Use the Metaflow client to retrieve the latest successful run from our
        MovieStatsFlow and assign them as data artifacts in this flow.

        This step uses 'conda' to isolate the environment. This step will
        always use pandas==0.24.2 regardless of what is installed on the
        system.

        """
        from metaflow import get_metadata
        print("Using metadata provider: %s" % get_metadata())

        self.next(self.extract)
Пример #3
0
    def start(self):
        """
        The 'start' step is a regular step, so runs locally on the machine from
        which the flow is executed.

        """
        from metaflow import get_metadata

        print("HelloAWS is starting.")
        print("")
        print("Using metadata provider: %s" % get_metadata())
        print("")
        print("The start step is running locally. Next, the ")
        print("'hello' step will run remotely on AWS batch. ")
        print("If you are running in the Netflix sandbox, ")
        print("it may take some time to acquire a compute resource.")

        self.next(self.hello)
Пример #4
0
    def start(self):
        """
        Use the Metaflow client to retrieve the latest successful run from our
        MovieStatsFlow and assign them as data artifacts in this flow.

        """
        from metaflow import Flow, get_metadata

        # Print metadata provider
        print("Using metadata provider: %s" % get_metadata())

        # Load the analysis from the MovieStatsFlow.
        run = Flow("MovieStatsFlow").latest_successful_run
        print("Using analysis from '%s'" % str(run))

        self.genre_stats = run.data.genre_stats

        # Compute our two recommendation types in parallel.
        self.next(self.bonus_movie, self.genre_movies)
Пример #5
0
from metaflow import Flow, get_metadata

# Print metadata provider
print("Using metadata provider: %s" % get_metadata())

# Load the analysis from the MovieStatsFlow.
run = Flow('GenreStatsFlow').latest_successful_run
print("Using analysis from '%s'" % str(run))

genre_stats = run.data.genre_stats
print(genre_stats)
from metaflow import Flow,get_metadata,Run
print("Metadata",get_metadata())
from metaflow_train import FinalData
from typing import List
import chart_studio.plotly as py
import plotly.graph_objects  as go
import plotly.express as ps
from plotly.subplots import make_subplots
import math
import os 
import datetime
import itertools
import seaborn as sns
from scipy.stats import norm

def get_key_map(arr):
    finalmap = []
    for i in itertools.product(*arr):
        finalmap.append(i)
    return finalmap

def plot_and_save_grad_figures(run:Run):
    """
    Directly plots gradients. Will Die if gradients are too large. 
    """
    final_data_arr = run.data.final_data
    run_dir_path = "RunAnalytics/Run-"+run.id
    # Gradients are collected for whole Flow. So if 1st doesnt have. No one has. 
    if not hasattr(final_data_arr[0],'gradients'): 
        return None
    if len(final_data_arr[0].gradients['avg']) == 0:
Пример #7
0
from metaflow import Flow, get_metadata
from metaflow.datatools.dolt import DoltDT
from doltpy.core import Dolt


def print_data_map(data_map):
    for run_step in data_map.keys():
        for table in data_map[run_step]:
            print('{}, {}'.format(run_step, table))
            #print(data_map[run_step][table])


print("Current metadata provider: %s" % get_metadata())
doltdb_path = './imdb-reviews'
flow = Flow('IMDBSentimentsFlow')
run = flow.latest_successful_run
print("Using run: %s" % str(run))
'''
Ex 1: Get all the inputs used by a specific run of a flow
'''
# doltdt = DoltDT(run, doltdb_path, 'master')
# data_map_for_run = doltdt.get_reads(steps=['start'])
# print_data_map(data_map_for_run)
'''
Ex 2: Get all the inputs used by a specific step of a run of a flow
'''
# doltdt = DoltDT(run, doltdb_path, 'vinai/add-rotten-data')
# data_map_for_run = doltdt.get_reads(steps=['start'])
# print_data_map(data_map_for_run)
'''
Ex 3 Outputs are handled identically
Пример #8
0
def lambda_handler(event, context):

    print(event)

    for record in event['Records']:
        key = record['s3']['object']['key']
        bucket_name = record['s3']['bucket']['name']

        os.environ['METAFLOW_HOME'] = '/tmp'
        os.environ['USERNAME'] = "******"

        obj = {
            'METAFLOW_DEFAULT_METADATA':
            'service',
            'METAFLOW_DEFAULT_DATASTORE':
            's3',
            'METAFLOW_DATASTORE_SYSROOT_S3':
            f"s3://{bucket_name}",
            'METAFLOW_SERVICE_AUTH_KEY':
            "yvhNDfEzcRa5fxKq2ZELda1zk8wNXxMs17Jt4OGs",
            'METAFLOW_SERVICE_URL':
            "https://5sqcgnuyte.execute-api.eu-west-1.amazonaws.com/api/"
        }

        with open('/tmp/config.json', 'w', encoding='utf-8') as f:
            json.dump(obj, f, ensure_ascii=False, indent=4)

        from metaflow import Run, get_metadata, namespace

        namespace(None)
        print(get_metadata())

        step = key.split("/")[2]
        flow = key.split("/")[0]
        run_id = key.split("/")[1]

        run = Run(f"{flow}/{run_id}")

        dynamo_object = {
            "created_at":
            int(
                datetime.strptime(
                    run.created_at.split(".")[0],
                    '%Y-%m-%dT%H:%M:%S').timestamp()),
            "flow_name":
            flow,
            "run_id":
            int(run_id),
            "success":
            run.successful,
            "finished":
            run.finished,
            "finished_at":
            0 if run.finished_at == None else int(
                datetime.strptime(
                    run.finished_at.split(".")[0],
                    '%Y-%m-%dT%H:%M:%S').timestamp()),
            "current_step":
            step,
            "user":
            _parse_tags(run.tags, "user"),
            "tags":
            run.tags,
            "bucket":
            bucket_name
        }

        print(dynamo_object)

        table = dynamodb.Table(EVENTS_RECORD_STORE)

        table.put_item(Item=dynamo_object)

    return