import kfp import kfp.components as comp import kfp.dsl as dsl from os import path import json import yaml cs = comp.ComponentStore() component_path = path.join(path.dirname(__file__), '..') cs.local_search_paths.append(component_path) caip_train_op = comp.load_component_from_url( 'https://raw.githubusercontent.com/kubeflow/pipelines/1.0.0/' 'components/gcp/ml_engine/train/component.yaml') #pre_process_op = cs.load_component('preProcess') param_comp = cs.load_component('get_tuned_params') preprocess_op = cs.load_component('preprocess') # Config parameters PROJECT_ID = 'pytorch-tpu-nfs' REGION = 'us-central1' FAIRSEQ_IMAGE = 'gcr.io/pytorch-tpu-nfs/fairseq-lm-train' training_input_json = './config.yaml' with open(training_input_json) as f: training_input = json.dumps(yaml.safe_load(f)['trainingInput']) pipeline_args = { 'project_id': PROJECT_ID, 'region': REGION, 'args': json.dumps([
import kfp from kfp import components component_store = components.ComponentStore(url_search_prefixes=['https://raw.githubusercontent.com/kubeflow/pipelines/af3eaf64e87313795cad1add9bfd9fa1e86af6de/components/']) chicago_taxi_dataset_op = component_store.load_component(name='datasets/Chicago_Taxi_Trips') convert_csv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_CSV') convert_tsv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_TSV') convert_apache_parquet_to_csv_op = component_store.load_component(name='_converters/ApacheParquet/to_CSV') convert_apache_parquet_to_tsv_op = component_store.load_component(name='_converters/ApacheParquet/to_TSV') convert_apache_parquet_to_apache_arrow_feather_op = component_store.load_component(name='_converters/ApacheParquet/to_ApacheArrowFeather') convert_apache_arrow_feather_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_ApacheArrowFeather') def parquet_pipeline(): csv = chicago_taxi_dataset_op( where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"', select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total', limit=10000, ).output tsv = chicago_taxi_dataset_op( where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"', select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total', limit=10000, format='tsv', ).output csv_parquet = convert_csv_to_apache_parquet_op(csv).output csv_parquet_csv = convert_apache_parquet_to_csv_op(csv_parquet).output csv_parquet_feather = convert_apache_parquet_to_apache_arrow_feather_op(csv_parquet).output
import kfp from kfp import components component_store = components.ComponentStore(url_search_prefixes=[ 'https://raw.githubusercontent.com/kubeflow/pipelines/0d7d6f41c92bdc05c2825232afe2b47e5cb6c4b3/components/' ]) chicago_taxi_dataset_op = component_store.load_component( name='datasets/Chicago_Taxi_Trips') convert_csv_to_apache_parquet_op = component_store.load_component( name='_converters/ApacheParquet/from_CSV') convert_tsv_to_apache_parquet_op = component_store.load_component( name='_converters/ApacheParquet/from_TSV') convert_apache_parquet_to_apache_arrow_feather_op = component_store.load_component( name='_converters/ApacheParquet/to_ApacheArrowFeather') convert_apache_arrow_feather_to_apache_parquet_op = component_store.load_component( name='_converters/ApacheParquet/from_ApacheArrowFeather') def parquet_pipeline(): csv = chicago_taxi_dataset_op( where= 'trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"', select= 'tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total', limit=10000, ).output tsv = chicago_taxi_dataset_op( where= 'trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',