def yield_event(self, dagster_event): """Yield a dagster event directly from notebook code. When called interactively or in development, returns its input. Args: dagster_event (Union[:class:`dagster.AssetMaterialization`, :class:`dagster.ExpectationResult`, :class:`dagster.TypeCheck`, :class:`dagster.Failure`, :class:`dagster.RetryRequested`]): An event to yield back to Dagster. """ valid_types = ( Materialization, AssetMaterialization, ExpectationResult, TypeCheck, Failure, RetryRequested, ) if not isinstance(dagster_event, valid_types): raise DagstermillError( f"Received invalid type {dagster_event} in yield_event. Expected a Dagster event type, one of {valid_types}." ) if not self.in_pipeline: return dagster_event # deferred import for perf import scrapbook event_id = "event-{event_uuid}".format(event_uuid=str(uuid.uuid4())) out_file_path = os.path.join(self.marshal_dir, event_id) with open(out_file_path, "wb") as fd: fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL)) scrapbook.glue(event_id, out_file_path)
def yield_event(self, dagster_event): '''Yield a dagster event directly from notebook code. When called interactively or in development, returns its input. Args: dagster_event (Union[:class:`dagster.Materialization`, :class:`dagster.ExpectationResult`, :class:`dagster.TypeCheck`, :class:`dagster.Failure`]): An event to yield back to Dagster. ''' check.inst_param( dagster_event, 'dagster_event', (Materialization, ExpectationResult, TypeCheck, Failure)) if not self.in_pipeline: return dagster_event # deferred import for perf import scrapbook event_id = 'event-{event_uuid}'.format(event_uuid=str(uuid.uuid4())) out_file_path = os.path.join(self.marshal_dir, event_id) with open(out_file_path, 'wb') as fd: fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL)) scrapbook.glue(event_id, out_file_path)
def yield_result(self, value, output_name="result"): """Yield a result directly from notebook code. When called interactively or in development, returns its input. Args: value (Any): The value to yield. output_name (Optional[str]): The name of the result to yield (default: ``'result'``). """ if not self.in_pipeline: return value # deferred import for perf import scrapbook if not self.solid_def.has_output(output_name): raise DagstermillError( f"Solid {self.solid_def.name} does not have output named {output_name}." f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}" ) dagster_type = self.solid_def.output_def_named( output_name).dagster_type out_file = os.path.join(self.marshal_dir, f"output-{output_name}") scrapbook.glue(output_name, write_value(dagster_type, value, out_file))
def yield_result(self, value, output_name="result"): """Yield a result directly from notebook code. When called interactively or in development, returns its input. Args: value (Any): The value to yield. output_name (Optional[str]): The name of the result to yield (default: ``'result'``). """ if not self.in_pipeline: return value # deferred import for perf import scrapbook if not self.solid_def.has_output(output_name): raise DagstermillError( f"Solid {self.solid_def.name} does not have output named {output_name}." f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}" ) # pass output value cross process boundary using io manager step_context = self.context._step_context # pylint: disable=protected-access # Note: yield_result currently does not support DynamicOutput step_output_handle = StepOutputHandle(step_key=step_context.step.key, output_name=output_name) output_context = step_context.get_output_context(step_output_handle) io_manager = step_context.get_io_manager(step_output_handle) # Note that we assume io manager is symmetric, i.e handle_input(handle_output(X)) == X io_manager.handle_output(output_context, value) # record that the output has been yielded scrapbook.glue(output_name, "")
def yield_result(self, value, output_name="result"): """Yield a result directly from notebook code. When called interactively or in development, returns its input. Args: value (Any): The value to yield. output_name (Optional[str]): The name of the result to yield (default: ``'result'``). """ if not self.in_pipeline: return value # deferred import for perf import scrapbook if not self.solid_def.has_output(output_name): raise DagstermillError( f"Solid {self.solid_def.name} does not have output named {output_name}." f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}" ) dagster_type = self.solid_def.output_def_named(output_name).dagster_type # https://github.com/dagster-io/dagster/issues/2648 # dagstermill temporary file creation should use a more systematic and robust scheme out_file = os.path.join( self.marshal_dir, f"{self.context.solid_handle}-output-{output_name}" ) scrapbook.glue(output_name, write_value(dagster_type, value, out_file))
def yield_result(self, value, output_name='result'): '''Yield a result directly from notebook code. When called interactively or in development, returns its input. Args: value (Any): The value to yield. output_name (Optional[str]): The name of the result to yield (default: ``'result'``). ''' if not self.in_pipeline: return value # deferred import for perf import scrapbook if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'. format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) scrapbook.glue(output_name, write_value(runtime_type, value, out_file))
def yield_event(self, dagster_event): if not self.populated_by_papermill: return dagster_event event_id = 'event-{event_uuid}'.format(event_uuid=str(uuid.uuid4())) out_file_path = os.path.join(self.marshal_dir, event_id) with open(out_file_path, 'wb') as fd: fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL)) scrapbook.glue(event_id, out_file_path)
def yield_materialization(self, path, description): if not self.populated_by_papermill: return Materialization(path, description) materialization_id = 'materialization-{materialization_uuid}'.format( materialization_uuid=str(uuid.uuid4())) out_file_path = os.path.join(self.marshal_dir, materialization_id) with open(out_file_path, 'wb') as fd: fd.write( pickle.dumps(Materialization(path, description), PICKLE_PROTOCOL)) scrapbook.glue(materialization_id, out_file_path)
def yield_event(self, dagster_event): if not self.in_pipeline: return dagster_event # deferred import for perf import scrapbook event_id = 'event-{event_uuid}'.format(event_uuid=str(uuid.uuid4())) out_file_path = os.path.join(self.marshal_dir, event_id) with open(out_file_path, 'wb') as fd: fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL)) scrapbook.glue(event_id, out_file_path)
def yield_result(self, value, output_name='result'): if not self.in_pipeline: return value # deferred import for perf import scrapbook if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'. format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) scrapbook.glue(output_name, write_value(runtime_type, value, out_file))
def yield_result(self, value, output_name='result'): if not self.populated_by_papermill: return value if self.solid_def is None: if output_name not in self.output_name_type_dict: raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def_name, output_name=output_name)) runtime_type_enum = self.output_name_type_dict[output_name] if runtime_type_enum == SerializableRuntimeType.SCALAR: scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable( value): scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) PickleSerializationStrategy().serialize_to_file( value, out_file) scrapbook.glue(output_name, out_file) else: raise DagstermillError( # Discuss this in the docs and improve error message # https://github.com/dagster-io/dagster/issues/1275 # https://github.com/dagster-io/dagster/issues/1276 'Output Definition for output {output_name} requires repo registration ' 'since it has a complex serialization format'.format( output_name=output_name)) else: if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) scrapbook.glue(output_name, write_value(runtime_type, value, out_file))
# %% results.plot_diagnostics(figsize=(16, 8)) plt.show() # %% pred = results.get_prediction(start=(run_datetime - timedelta(1)), dynamic=False) pred_ci = pred.conf_int() fig, ax = plt.subplots() ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'], label='observed') ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7) ax.fill_between(pred_ci.index, pred_ci.iloc[:, 0], pred_ci.iloc[:, 1], color='k', alpha=.2) ax.set_xlabel('Date') ax.set_ylabel('mydata') ax.set(title='Results of ARIMA{}x{}12 - AIC:{} on {}'.format( best[1], best[2], round(best[0]), run_date)) fig.legend() sb.glue('arima_results_fig', fig, display=True) # %% pred.save("../data/output/step2/prediction_model_" + run_date + "-" + source_id) # %%
# %% date_hour_stamp = time.strftime('%Y-%m-%d_%H_%M') file_name = 'All_Plants_Before_After_Budgeted_CPUs_' + date_hour_stamp + '.xlsx' # %% [markdown] # ### Write/save file to designated network share drive location: # %% cpu_before_after_merge.to_excel(save_dir / file_name, index=False) # %% [markdown] # ### Now, we need to "glue" the location of the saved file location to this notebook so that another notebook can retrieve/reference from it: # %% str(save_dir / file_name) # %% sb.glue("path_to_red_green_sheet_excel_file", str(save_dir / file_name)) # %% [markdown] # ### Send Windows Toast notification when script completes # %% toaster = ToastNotifier() toaster.show_toast( "### Before vs After CPU Status ###", "Successfuly compared before CPUs with after CPU adjustments", icon_path="images/honda_logo.ico", duration=5)
# Here we use the **start_date** and **stop_date** parameters, which are defined above by default, but can # be overwritten at runtime by papermill. # %% data_highlight = data.loc[start_date:stop_date] # %% [markdown] # We use the `pm.record()` function to keep track of how many records were included in the # highlighted section. This lets us inspect this value after running the notebook with papermill. # # We also include a ValueError if we've got a but in the start/stop times, which will be captured # and displayed by papermill if it's triggered. # %% num_records = len(data_highlight) sb.glue('num_records', num_records, display=True) if num_records == 0: raise ValueError( "I have no data to highlight! Check that your dates are correct!") # %% [markdown] # ## Make our plot # # Below we'll generate a matplotlib figure with our highlighted dates. By calling `pm.display()`, papermill # will store the figure to the key that we've specified (`highlight_dates_fig`). This will let us inspect the # output later on. # %% fig, ax = plt.subplots() ax.plot(data.index, data['mydata'], c='k', alpha=.5) ax.plot(data_highlight.index, data_highlight['mydata'], c='r', lw=3)
data = pd.DataFrame(np.random.randn(len(td)), columns=['mydata']) data = data.rolling(70, min_periods=1, center=True).mean() # Smooth it so it looks purdy data['date'] = td data['hour'] = data['date'].apply(lambda x: datetime.strftime(x, "%H")) # %% print(data['date'].describe()) data.describe() # %% data = data.sort_values('date').set_index('date', drop=True) data.head(5) # %% fig, ax = plt.subplots() ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S')) plt.gcf().autofmt_xdate() ax.plot(data.index, data['mydata'], c='k', alpha=.5) ax.set(title="Activity for the day of {}".format(run_date)) sb.glue('activity_day_fig', fig, display=True) # %% month_partition = run_datetime.strftime("%Y-%m") output_file = "../data/output/step1/" + month_partition + "/" + run_date + '-' + source_id + '.csv' print(output_file) # %% os.makedirs(os.path.dirname(output_file), exist_ok=True) data.to_csv(output_file)
show_step=10) logger.debug(f"hparams: {hparams}") iterator = MINDIterator model = NRMSModel(hparams, iterator, seed=seed) logger.info(model.run_eval(valid_news_file, valid_behaviors_file)) model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file) res_syn = model.run_eval(valid_news_file, valid_behaviors_file) logger.debug(f"res_syn: {res_syn}") sb.glue("res_syn", res_syn) model_path = os.path.join(BASE_DIR, "ckpt") os.makedirs(model_path, exist_ok=True) model.model.save_weights(os.path.join(model_path, "nrms_ckpt")) group_impr_indexes, group_labels, group_preds = model.run_fast_eval( test_news_file, test_behaviors_file) with open(os.path.join(BASE_DIR, 'submits', 'prediction.txt'), 'w') as f: for impr_index, preds in tqdm(zip(group_impr_indexes, group_preds)): impr_index += 1 pred_rank = (np.argsort(np.argsort(preds)[::-1]) + 1).tolist() pred_rank = '[' + ','.join([str(i) for i in pred_rank]) + ']' f.write(' '.join([str(impr_index), pred_rank]) + '\n')