def test_client(): c = v3f.Client('localhost:8081', should_check_version=False) assert isinstance(c, v3f.gRPCClient), 'default is not grpc' c = v3f.Client('grpc://localhost:8081', should_check_version=False) assert isinstance(c, v3f.gRPCClient), 'not gRPC' c = v3f.Client('http://localhost:8081', should_check_version=False) assert isinstance(c, v3f.HTTPClient), 'not HTTP'
def test_client(): c = v3f.Client('localhost:8081') assert isinstance(c, v3f.gRPCClient), 'default is not grpc' c = v3f.Client('grpc://localhost:8081') assert isinstance(c, v3f.gRPCClient), 'not gRPC' c = v3f.Client('http://localhost:8081') assert isinstance(c, v3f.HTTPClient), 'not HTTP'
def handler(context, event): kv_table_path = 'iguazio/demos/face-recognition/artifacts/encodings' user_name = 'iguazio' password = '******' client = v3f.Client("framesd:8081", user=user_name, password=password, container="users") df = client.read(backend='kv', table=kv_table_path, reset_index=True) context.logger.info(df.head()) df2 = df[['fileName', 'camera', 'label', 'imgUrl']] options = ['unknown'] df3 = df2[df2.fileName.str.startswith(tuple(options))] for idx in range(len(df3)): img_url = df3.iloc[idx]['imgUrl'] splited = img_url.split("/") destination = "/".join((splited[0], splited[1], splited[2], splited[3], "dataset/label_pending")) print(img_url) print(idx) print(splited) print(destination) # Move the content of # source to destination dest = shutil.move(img_url, destination)
def init_context(context): # IGZ variables igz_v3f = os.getenv('IGZ_V3F') igz_v3f_port = os.getenv('IGZ_V3F_PORT') # MYSQL variables host = os.getenv('SQL_HOST') port = os.getenv('SQL_PORT') user = os.getenv('SQL_USER') password = os.getenv('SQL_PWD', "") database = os.getenv('SQL_DB_NAME') # Init v3io-frames connection and set it as a context attribute client = v3f.Client(address=f'{igz_v3f}:{igz_v3f_port}', password=os.getenv('IGZ_PWD')) setattr(context, 'client', client) # Init DB connection and set it as a context attribute dbconn = pymysql.connect(host=host, port=int(port), user=user, passwd=password, db=database, charset='utf8mb4') setattr(context, 'dbconn', dbconn)
def test_client_env(): url = 'localhost:8080' data = json.dumps({'url': url}) with setenv(v3f.SESSION_ENV_KEY, data): c = v3f.Client('localhost:8081') assert c.session.url == url, 'missing URL from env'
def test_concurrent(framesd, protocol): addr = getattr(framesd, '{}_addr'.format(protocol)) c = v3f.Client(addr) start = monotonic() with ThreadPoolExecutor() as pool: for i in range(7): pool.submit(reader, i, 5, c) duration = monotonic() - start print('duration: {:.3f}sec'.format(duration))
def read_encodings_table(params): client = v3f.Client(address=params.frames_url, token=params.token, container=params.container) encoding_df = client.read(backend="kv", table=params.encodings_path, reset_index=False, filter='label != -1') return encoding_df
def train(context, processed_data, model_name='model.bst'): device = torch.device("cpu") context.logger.info('Client') client = v3f.Client('framesd:8081', container="users") with open(processed_data.url, 'r') as f: t = f.read() context.logger.info('Loading dataset') data_df = client.read(backend="kv", table=t, reset_index=False, filter='label != -1') X = data_df[['c' + str(i).zfill(3) for i in range(128)]].values y = data_df['label'].values n_classes = len(set(y)) X = torch.as_tensor(X, device=device) y = torch.tensor(y, device=device).reshape(-1, 1) input_dim = 128 hidden_dim = 64 output_dim = n_classes context.logger.info('Preparing model architecture') spec = importlib.util.spec_from_file_location('models', MODELS_PATH) models = importlib.util.module_from_spec(spec) spec.loader.exec_module(models) model = models.FeedForwardNeuralNetModel(input_dim, hidden_dim, output_dim) model.to(device) model = model.double() criterion = nn.CrossEntropyLoss() learning_rate = 0.05 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) dataset = data.TensorDataset(X, y) train_loader = data.DataLoader(dataset) context.logger.info('Starting training process') for epoch in range(20): for features, target in train_loader: optimizer.zero_grad() out = model(features) loss = criterion(out, target[0]) loss.backward() optimizer.step() context.logger.info('Save model') dump(model._modules, open(model_name, 'wb')) context.log_artifact('model', src_path=model_name, target_path=model_name, labels={'framework': 'Pytorch-FeedForwardNN'}) os.remove(model_name)
def create_streams_v1alpha1(project_graph=''): for stream in project_graph['project']['v3io_streams']: try: client = v3f.Client("framesd:8081", container=stream['container']) client.create("stream", table=stream['path'], shards=stream['shards'], retention_hours=stream['retention'], if_exists=0) except: print("Failed to create stream", stream) raise
def test_timestamp(framesd, session, protocol): backend = 'kv' test_id = uuid4().hex tableName = 'integtest{}'.format(test_id) addr = getattr(framesd, '{}_addr'.format(protocol)) client = v3f.Client(addr, **session) df = pd.DataFrame({'birthday': [pd.Timestamp('1940-04-25', tz='Asia/Dubai'), pd.Timestamp('1940-04-25', tz='US/Pacific'), None, pd.Timestamp('1940-04-25')]}) client.write(backend, table=tableName, dfs=df) df = client.read(backend, table=tableName) client.delete(backend, tableName)
def create_streams_v0_1(project_graph=''): _streams = project_graph['project']['v3io_streams'] for stream in _streams.keys(): try: client = v3f.Client("framesd:8081", container=_streams[stream]['container']) client.create("stream", table=_streams[stream]['path'], shards=_streams[stream]['shards'], retention_hours=_streams[stream]['retention'], if_exists=0) except Exception as e: print("Failed to create stream", stream, e) raise
def init_context(context): # Init v3io-frames connection and set it as a context attribute client = v3f.Client(address=IGZ_V3F, user=IGZ_USER, password=IGZ_PWD, container=CONTAINER) setattr(context, 'client', client) connection_string = f"mysql://{SQL_USER}:{SQL_PWD}@{SQL_HOST}:{SQL_PORT}/{SQL_DB_NAME}" engine = create_engine(connection_string, encoding='utf8', convert_unicode=True, isolation_level='READ_COMMITTED') session = sessionmaker() session.configure(bind=engine) dbconn = session() setattr(context, 'dbconn', dbconn)
def test_datetime(framesd, session, protocol): backend = 'kv' test_id = uuid4().hex tableName = 'integtest{}'.format(test_id) addr = getattr(framesd, '{}_addr'.format(protocol)) client = v3f.Client(addr, **session) col = pd.Series([datetime.now(pytz.timezone("Africa/Abidjan")), datetime.now(pytz.timezone("America/Nassau")), None, datetime.now()]) df = pd.DataFrame({'col': col}) client.write(backend, table=tableName, dfs=df) df = client.read(backend, table=tableName) client.delete(backend, tableName)
def test_client_wrong_params(proto, cls): address = '{}://localhost:8080'.format(proto) session_params = { 'data_url': 'http://iguazio.com', 'container': 'large one', 'user': '******', 'password': '******', 'token': 'a quarter', } try: v3f.Client(address, should_check_version=False, **session_params) raise ValueError('expected fail but finished successfully') except ValueError: return
def test_client(proto, cls): address = '{}://localhost:8080'.format(proto) session_params = { 'data_url': 'http://iguazio.com', 'container': 'large one', 'user': '******', 'password': '******', } client = v3f.Client(address, should_check_version=False, **session_params) assert client.__class__ is cls, 'wrong class' for key, value in session_params.items(): key = 'url' if key == 'data_url' else key assert getattr(client.session, key) == value, \ 'bad session value for {}'.format(key)
def test_integration(framesd, session, protocol, backend): test_id = uuid4().hex size = 293 table = 'integtest{}'.format(test_id) addr = getattr(framesd, '{}_addr'.format(protocol)) client = v3f.Client(addr, **session) cfg = test_config.get(backend, {}) df = cfg['df_fn'](size) create_kw = cfg.get('create', {}) if create_kw is not None: client.create(backend, table, **create_kw) write_kw = cfg.get('write', {}) labels = {} if backend == 'tsdb': labels = { 'li': 17, 'lf': 3.22, 'ls': 'hi', } client.write(backend, table, [df], **write_kw, labels=labels) sleep(1) # Let db flush read_kw = cfg.get('read', {}) dfs = list(client.read(backend, table=table, iterator=True, **read_kw)) df2 = pd.concat(dfs) if backend == 'tsdb': compare_dfs_tsdb(df, df2, backend) elif backend == 'stream': compare_dfs_stream(df, df2, backend) else: if backend == 'kv': # FIXME: Probably the schema df2.dropna(inplace=True) compare_dfs(df, df2, backend) df = client.read(backend, table=table, **read_kw) assert isinstance(df, pd.DataFrame), 'iterator=False returned generator' client.delete(backend, table) exec_kw = cfg.get('execute', {}) if exec_kw is not None: client.execute(backend, table, **exec_kw)
def test_cudf(framesd, session): df = cudf.DataFrame({ 'a': [1, 2, 3], 'b': [1.1, 2.2, 3.3], }) c = v3f.Client(framesd.grpc_addr, frame_factory=cudf.DataFrame) backend = 'csv' table = 'cudf-{}'.format(int(time())) print('table = {}'.format(table)) c.write(backend, table, [df]) sleep(1) # Let db flush rdf = c.read(backend, table=table) assert isinstance(rdf, cudf.DataFrame), 'not a cudf.DataFrame' assert len(rdf) == len(df), 'wrong frame size' assert set(rdf.columns) == set(df.columns), 'columns mismatch'
def test_kv_read_empty_df(framesd, session, protocol): backend = 'kv' test_id = uuid4().hex tableName = 'integtest{}'.format(test_id) addr = getattr(framesd, '{}_addr'.format(protocol)) client = v3f.Client(addr, **session) index = [str(i) for i in range(1, 4)] df = pd.DataFrame(data={'col1': [i for i in range(1, 4)], 'col2': ['aaa', 'bad', 'cffd']}, index=index) client.write(backend, table=tableName, dfs=df, condition="starts({col2}, 'aaa') AND {col1} == 3") df = client.read(backend, table=tableName) assert df.to_json() == '{}' assert isinstance(df, pd.DataFrame), 'iterator=False returned generator' client.delete(backend, tableName)
def init_context(context): v3io_client = v3io.dataplane.Client() setattr(context, "v3io_client", v3io_client) v3f_client = v3f.Client("framesd:8081", container="bigdata") setattr(context, "v3f", v3f_client) window = [] setattr(context, "window", window) setattr(context, "window_size", int(os.getenv("window_size", 10))) setattr(context, "tsdb_table", os.getenv("tsdb_table", "concept_drift_tsdb_1")) try: context.v3f.create("tsdb", context.tsdb_table, rate="1/s", if_exists=1) except Exception as e: context.logger.info(f"Creating context with rate= faile for {e}") context.v3f.create( "tsdb", context.tsdb_table, attrs={"rate": "1/s"}, if_exists=1 ) callbacks = [callback.strip() for callback in os.getenv("callbacks", "").split(",")] setattr(context, "callbacks", callbacks) setattr(context, "drift_stream", os.getenv("drift_stream", "/bigdata/drift_stream")) try: create_stream( context, context.drift_stream, int(os.getenv("drift_stream_shards", 1)) ) except: context.logger.info(f"{context.drift_stream} already exists") models = {} model_types = ["pagehinkely", "ddm", "eddm"] path_suffix = "_model_path" for model in model_types: model_env = f"{model}{path_suffix}" if model_env in os.environ: with open(os.environ[model_env], "rb") as f: models[model] = load(f) setattr(context, "models", models) setattr(context, "label_col", os.getenv("label_col", "label")) setattr(context, "prediction_col", os.getenv("prediction_col", "prediction"))
def init_context(context): # MYSQL variables host = os.getenv('SQL_HOST') port = os.getenv('SQL_PORT') user = os.getenv('SQL_USER') password = os.getenv('SQL_PWD', "") database = os.getenv('SQL_DB_NAME') # Init v3io-frames connection and set it as a context attribute client = v3f.Client(address=os.getenv('IGZ_V3F'), username=os.getenv('IGZ_USER'), password=os.getenv('IGZ_PWD'), container=os.getenv('CONTAINER')) setattr(context, 'client', client) connection_string = f"mysql://{user}:{password}@{host}:{port}/{database}" engine = create_engine(connection_string, encoding='utf8', convert_unicode=True, isolation_level='READ_COMMITTED') session = sessionmaker() session.configure(bind=engine) dbconn = session() setattr(context, 'dbconn', dbconn)
$('#browsed').attr("src", window.URL.createObjectURL(xhr.response)) }}; xhr.send(); }} }}); $('#button').click( function () {{ table.row('.selected').remove().draw(false); }}); }}); </script>""" img_tag = "<img src = '' alt = 'Please select image to show' id = 'browsed' height = 750 width = 750 align = 'middle'> </img>" client = v3f.Client("framesd:8081", container="users") access_key = os.environ['V3IO_ACCESS_KEY'] web_api_prefix = os.environ['WEB_API_PREFIX'] def load_images(data_path): return [f for f in paths.list_images(data_path) if '.ipynb' not in f] def load_enc_df(): return client.read(backend="kv", table='iguazio/demos/demos/faces/artifacts/encodings', reset_index=True)
def test_read(benchmark, framesd, protocol, backend): addr = getattr(framesd, '{}_addr'.format(protocol)) client = v3f.Client(addr) benchmark(read_benchmark, client)
def get_cameras_list(): client = v3f.Client(address=config['v3io']['frames'], container=config['project']['container'], token=config['v3io']['access_key']) df = client.read("kv", config['camera']['list_table']) return df
def test_write(benchmark, framesd, protocol, backend): addr = getattr(framesd, '{}_addr'.format(protocol)) client = v3f.Client(addr) benchmark(write_benchmark, client, wdf)
def drift_magnitude( context, t: pd.DataFrame, u: pd.DataFrame, label_col=None, prediction_col=None, discretizers: dict = None, n_bins=5, stream_name: str = "some_stream", results_tsdb_container: str = "bigdata", results_tsdb_table: str = "concept_drift/drift_magnitude", ): """Drift magnitude metrics Computes drift magnitude metrics between base dataset t and dataset u. Metrics: - TVD (Total Variation Distance) - Helinger - KL Divergence :param context: MLRun context :param t: Base dataset for the drift metrics :param u: Test dataset for the drift metrics :param label_col: Label colum in t and u :param prediction_col: Predictions column in t and u :param discritizers: Dictionary of dicsritizers for the features if available (Created automatically if not provided) :param n_bins: Number of bins to be used for histrogram creation from continuous variables :param stream_name: Output stream to push metrics to :param results_tsdb_container: TSDB table container to push metrics to :param results_tsdb_table: TSDB table to push metrics to """ v3io_client = v3f.Client("framesd:8081", container=results_tsdb_container) try: v3io_client.create("tsdb", results_tsdb_table, if_exists=1, rate="1/s") except: v3io_client.create("tsdb", results_tsdb_table, if_exists=1, attrs={"rate": "1/s"}) df_t = t.as_df() df_u = u.as_df() drop_columns = [] if label_col is not None: drop_columns.append(label_col) if prediction_col is not None: drop_columns.append(prediction_col) continuous_features = df_t.select_dtypes(["float"]) if discretizers is None: discretizers = {} for feature in continuous_features.columns: context.logger.info(f"Fitting discretizer for {feature}") discretizer = KBinsDiscretizer(n_bins=n_bins, encode="ordinal", strategy="uniform") discretizer.fit(continuous_features.loc[:, feature].values.reshape( -1, 1)) discretizers[feature] = discretizer os.makedirs(context.artifact_path, exist_ok=True) discretizers_path = os.path.abspath( f"{context.artifact_path}/discritizer.pkl") with open(discretizers_path, "wb") as f: pickle.dump(discretizers, f) context.log_artifact("discritizers", target_path=discretizers_path) context.logger.info("Discretizing featuers") for feature, discretizer in discretizers.items(): df_t[feature] = discretizer.transform(df_t.loc[:, feature].values.reshape( -1, 1)) df_u[feature] = discretizer.transform(df_u.loc[:, feature].values.reshape( -1, 1)) df_t[feature] = df_t[feature].astype("int") df_u[feature] = df_u[feature].astype("int") context.log_dataset("t_discrete", df_t, format="parquet") context.log_dataset("u_discrete", df_u, format="parquet") context.logger.info("Compute prior metrics") results = {} t_prior, u_prior = to_observations( context, df_t.drop(drop_columns, axis=1), df_u.drop(drop_columns, axis=1), "features", ) results["prior_tvd"], results["prior_helinger"], results[ "prior_kld"] = all_metrics(t_prior, u_prior) if prediction_col is not None: context.logger.info("Compute prediction metrics") t_predictions = pd.DataFrame(df_t.loc[:, prediction_col]) u_predictions = pd.DataFrame(df_u.loc[:, prediction_col]) t_class, u_class = to_observations(context, t_predictions, u_predictions, "prediction") ( results["prediction_shift_tvd"], results["prediction_shift_helinger"], results["prediction_shift_kld"], ) = all_metrics(t_class, u_class) if label_col is not None: context.logger.info("Compute class metrics") t_labels = pd.DataFrame(df_t.loc[:, label_col]) u_labels = pd.DataFrame(df_u.loc[:, label_col]) t_class, u_class = to_observations(context, t_labels, u_labels, "class") ( results["class_shift_tvd"], results["class_shift_helinger"], results["class_shift_kld"], ) = all_metrics(t_class, u_class) for key, value in results.items(): if value == float("inf"): context.logger.info(f"value: {value}") results[key] = 10 for key, result in results.items(): context.log_result(key, round(result, 3)) now = pd.to_datetime(str(datetime.datetime.now())) now results["timestamp"] = pd.to_datetime(str((datetime.datetime.now()))) context.logger.info(f"Timestamp: {results['timestamp']}") results["stream"] = stream_name results_df = pd.DataFrame(data=[list(results.values())], columns=list(results.keys())) results_df = results_df.set_index(["timestamp", "stream"]) v3io_client.write("tsdb", results_tsdb_table, dfs=results_df)
def encode_images(context): params = Params() params.set_params_from_context(context) context.logger.info(params) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') context.logger.info(f'Running on device: {device}') client = v3f.Client(params.frames_url, container=params.container, token=params.token) if not os.path.exists(params.artifacts_path): os.makedirs(params.artifacts_path) context.logger.info("created dir {}".format(params.artifacts_path)) if not os.path.exists(params.data_path + 'processed'): os.makedirs(params.data_path + 'processed') context.logger.info("created dir {}".format(params.data_path + 'processed')) if not os.path.exists(params.data_path + 'label_pending'): os.makedirs(params.data_path + 'label_pending') context.logger.info("created dir {}".format(params.data_path + 'label_pending')) # If no train images exist in the predefined path we will train the model on a small dataset of movie actresses if not os.path.exists(params.data_path + 'input'): os.makedirs(params.data_path + 'input') context.logger.info("created dir {}".format(params.data_path + 'input')) context.logger.info( "no input provided dowloading actresses images ......") resp = urlopen( 'https://iguazio-public.s3.amazonaws.com/faces-demo/Actresses.zip') zip_ref = zipfile.ZipFile(BytesIO(resp.read()), 'r') zip_ref.extractall(params.data_path + 'input') zip_ref.close() if os.path.exists(params.data_path + 'input/__MACOSX'): shutil.rmtree(params.data_path + 'input/__MACOSX') idx_file_path = params.artifacts_path + "idx2name.csv" context.logger.info("index file path : {}".format(idx_file_path)) if os.path.exists(idx_file_path): context.logger.info( "index file path exists : {} reading file".format(idx_file_path)) idx2name_df = pd.read_csv(idx_file_path, index_col=0) else: context.logger.info( "index file path does not exists : {} ".format(idx_file_path)) idx2name_df = pd.DataFrame(columns=['value', 'name']) # creates a mapping of classes(person's names) to target value new_classes_names = [ f for f in os.listdir(params.data_path + 'input') if not '.ipynb' in f and f not in idx2name_df['name'].values ] initial_len = len(idx2name_df) final_len = len(idx2name_df) + len(new_classes_names) for i in range(initial_len, final_len): idx2name_df.loc[i] = {'value': i, 'name': new_classes_names.pop()} name2idx = idx2name_df.set_index('name')['value'].to_dict() # log name to index mapping into mlrun context context.logger.info("artifact_path {} + local_path idx2name.csv".format( context.artifact_path)) context.log_artifact(TableArtifact('idx2name', df=idx2name_df), artifact_path=params.artifacts_path, local_path='idx2name.csv') # generates a list of paths to labeled images imagePaths = [ f for f in paths.list_images(params.data_path + 'input') if not '.ipynb' in f ] knownEncodings = [] knownLabels = [] fileNames = [] urls = [] for (i, imagePath) in enumerate(imagePaths): print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths))) # extracts label (person's name) of the image name = imagePath.split(os.path.sep)[-2] # prepares to relocate image after extracting features file_name = imagePath.split(os.path.sep)[-1] new_path = params.data_path + 'processed/' + file_name # converts image format to RGB for comptability with face_recognition library image = cv2.imread(imagePath) rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # detects coordinates of faces bounding boxes boxes = face_recognition.face_locations(rgb, model='hog') # computes embeddings for detected faces encodings = face_recognition.face_encodings(rgb, boxes) # this code assumes that a person's folder in the dataset does not contain an image with a face other then his own for enc in encodings: file_name = name + '_' + ''.join( random.choices(string.ascii_uppercase + string.digits, k=5)) knownEncodings.append(enc) knownLabels.append([name2idx[name]]) fileNames.append(file_name) urls.append(new_path) # move image to processed images directory shutil.move(imagePath, new_path) # saves computed encodings to avoid repeating computations df_x = pd.DataFrame(knownEncodings, columns=['c' + str(i).zfill(3) for i in range(128)]).reset_index(drop=True) df_y = pd.DataFrame(knownLabels, columns=['label']).reset_index(drop=True) df_details = pd.DataFrame([['initial training'] * 3] * len(df_x), columns=['imgUrl', 'camera', 'time']) df_details['time'] = [datetime.datetime.utcnow()] * len(df_x) df_details['imgUrl'] = urls data_df = pd.concat([df_x, df_y, df_details], axis=1) data_df['fileName'] = fileNames client.write(backend='kv', table=params.encodings_path, dfs=data_df, index_cols=['fileName']) encoding_path = "encoding" # with open('encodings_path.txt', 'w+') as f: # f.write('encodings') context.log_artifact('encodings_path', body=encoding_path)
def get_cameras_list(): client = v3f.Client(os.getenv('V3IO_FRAMES'), container=os.getenv('IGZ_CONTAINER')) df = client.read('kv', os.getenv('CAMERA_LIST_TBL')) return df
return [f for f in paths.list_images(images_path) if '.ipynb' not in f] @st.cache def load_enc_df(p_kv_table_path): return client.read(backend="kv", table=p_kv_table_path, reset_index=True, filter="label!=-1") if __name__ == '__main__': logger = Logger() frames_uri = os.environ.get('FRAMES_URI') container = os.getenv('CONTAINER', 'users') token = os.getenv('V3IO_ACCESS_KEY') kv_table_path = os.getenv('KV_TABLE_PATH') logger.info(os.environ.items()) client = v3f.Client(frames_uri, token=token, container=container) base_path = '/User/examples/faces/' data_path = base_path + 'data/' artifact_path = base_path+'artifacts/' classes_path = artifact_path + 'idx2name.csv' logger.info("classes_path: {}".format(classes_path)) classes_df = pd.read_csv(classes_path) known_classes = [n.replace('_', ' ') for n in classes_df['name'].values] page = st.sidebar.selectbox('Choose option', ['Label Unknown Images', 'View Collected Images'], key=1) if page == 'Label Unknown Images': label_path = data_path + 'label_pending' logger.info("label_path: {}".format(data_path + 'label_pending')) images = load_images(label_path) st.title('Label Unknown Images')
def train(context, processed_data, model_name='model.bst'): hvd.init() try: device = torch.device("cuda") except AssertionError: context.logger.info("Requested running on cuda but no cuda device available. Terminating") exit(1) context.logger.info('Client') client = v3f.Client('framesd:8081', container="users") with open(processed_data.url, 'r') as f: t = f.read() context.logger.info('Loading dataset') data_df = client.read(backend="kv", table=t, reset_index=False, filter='label != -1') X = data_df[['c'+str(i).zfill(3) for i in range(128)]].values y = data_df['label'].values n_classes = len(set(y)) X = torch.as_tensor(X, device=device) y = torch.tensor(y, device=device).reshape(-1, 1) input_dim = 128 hidden_dim = 64 output_dim = n_classes context.logger.info('Preparing model architecture') spec = importlib.util.spec_from_file_location('models', MODELS_PATH) models = importlib.util.module_from_spec(spec) spec.loader.exec_module(models) model = models.FeedForwardNeuralNetModel(input_dim, hidden_dim, output_dim) model.to(device) model = model.double() criterion = nn.CrossEntropyLoss() learning_rate = 0.05 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) dataset = data.TensorDataset(X, y) train_loader = data.DataLoader(dataset) context.logger.info('preparing for horovod distributed training') torch.cuda.set_device(hvd.local_rank()) optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters()) train_sampler = data.distributed.DistributedSampler(dataset, num_replicas=hvd.size(), rank=hvd.rank()) train_loader = data.DataLoader(dataset, sampler=train_sampler) hvd.broadcast_parameters(model.state_dict(), root_rank=0) context.logger.info('Starting training process') for epoch in range(20): for features, target in train_loader: optimizer.zero_grad() out = model(features) loss = criterion(out, target[0]) loss.backward() optimizer.step() if hvd.rank() == 0: context.logger.info('Save model') dump(model._modules, open(model_name, 'wb')) context.log_artifact('model', src_path=model_name, target_path=model_name, labels={'framework': 'Pytorch-FeedForwardNN'}) os.remove(model_name)
# Copyright 2018 Iguazio # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Code used by test_pip_docker from argparse import ArgumentParser import v3io_frames as v3f parser = ArgumentParser() parser.add_argument('--grpc-port', default='8081') parser.add_argument('--http-port', default='8080') args = parser.parse_args() client = v3f.Client('localhost:{}'.format(args.grpc_port)) df = client.read('csv', table='weather.csv') assert len(df) > 0, 'empty df'