class MyParamDataFrame(param.Parameterized): d = {'col1': [1, 2], 'col2': [3, 4]} df = pd.DataFrame(data=d) dataset = param.DataFrame(df)
class KickstarterDashboard(param.Parameterized): # pylint: disable=line-too-long """The purpose of the Kickstarter Dashboard is to test if the claims regarding Bokeh as of Jan 2018 in the [bokeh-dash-best-dashboard-framework](https://www.sicara.ai/blog/2018-01-30-bokeh-dash-best-dashboard-framework-python) article holds for Panel and the HoloViews suite of tools as of Dec 2019. The claims where - Data in Bokeh becomes inconsistent - Cannot link charts to dataframe - Bokeh is slow for big datasets - Interactions take a long time to develop You can evaluate this dashboard and the code to make your personal evaluation of the above statements. My evaluation is - the **first two statements does no longer hold**. - The third is up for discussion. I would also like the Dashboard updates to be a bit faster. Maybe it's because I don't yet know how to implement this efficiently. - The fourth I've also experienced see this [discussion](https://discourse.holoviz.org/t/how-to-create-a-parameterized-dashboard-with-seperation-between-data-transforms-and-data-views/53/13). I can see that I made a lot of mistakes because it takes time for me to understand how the api works. There is a lot to I need to learn across the HoloViz suite of tools.""" # pylint: enable=line-too-long kickstarter_df = param.DataFrame() categories = param.ListSelector() scatter_df = param.DataFrame() bar_df = param.DataFrame() rangexy = param.ClassSelector( class_=hv.streams.RangeXY, default=hv.streams.RangeXY(), ) def __init__(self, kickstarter_df: Optional[pd.DataFrame] = None, **kwargs): if not isinstance( kickstarter_df, pd.DataFrame, ): kickstarter_df = self.get_kickstarter_df() categories = self.get_categories(kickstarter_df) self.param.kickstarter_df.default = kickstarter_df self.param.categories.default = categories self.param.categories.objects = categories self.param.scatter_df.default = kickstarter_df self.param.bar_df.default = kickstarter_df super().__init__(**kwargs) @param.depends( "kickstarter_df", "categories", watch=True, ) def _set_scatter_df( self, ): self.scatter_df = self.filter_on_categories( self.kickstarter_df, self.categories, ) @param.depends("scatter_df") def scatter_plot_view( self, ): """A Reactive View of the scatter plot""" # Potential Improvements # Rename columns to Capitalized without under score # Add name of movie to hover tooltip scatter_plot = self.get_scatter_plot(self.scatter_df) # Please note that depending on how the scatter_plot is generated it might be a Scatter # or Ndoverlay objects # In the first case use scatter_plot. In the second case use scatter_plot.last self.rangexy.source = scatter_plot.last return scatter_plot @param.depends( "scatter_df", "rangexy.x_range", "rangexy.y_range", watch=True, ) def _set_bar_df( self, ): """Update the bar_df dataframe""" self.bar_df = self.filter_on_ranges( self.scatter_df, self.rangexy.x_range, # pylint: disable=no-member self.rangexy.y_range, # pylint: disable=no-member ) @param.depends("bar_df") def bar_chart_view( self, ): """A Reactive View of the Bar Chart""" return self.get_bar_chart(self.bar_df) def view( self, ): """A Reactive View of the KickstarterDashboard""" return pn.Column( pn.pane.Markdown(__doc__), pn.layout.HSpacer(height=25), pn.Row( pn.Column(self.scatter_plot_view, self.bar_chart_view, sizing_mode="stretch_width"), pn.Param( self.param.categories, widgets={ "categories": { "max_width": 125, "size": len(self.categories), } }, width=150, height=500, sizing_mode="fixed", ), sizing_mode="stretch_width", ), sizing_mode="stretch_width", ) @staticmethod def _extract() -> pd.DataFrame: """Extracts the kickstarter data into a DataFrame Returns: pd.DataFrame -- A Dataframe of kickstarter data with columns=["created_at", "usd_pledged", "state", "category_slug"] """ return pd.read_csv( KICKSTARTER_PATH, parse_dates=DATE_COLUMNS, ) @staticmethod def _transform( source_data: pd.DataFrame, n_samples: int = N_SAMPLES, ) -> pd.DataFrame: """Transform the data by - adding broader_category, - converting usd_pledged to millions - sampling to n_samples Arguments: source_data {pd.DataFrame} -- The source kickstarter data Returns: pd.DataFrame -- The transformed DataFrame with columns=["created_at", "usd_pledged", "state", "category_slug", "broader_category"] """ source_data["broader_category"] = source_data["category_slug"].str.split("/").str.get(0) source_data["usd_pledged"] = source_data["usd_pledged"] / 10 ** 6 return source_data.sample(n_samples) @classmethod def get_kickstarter_df( cls, ) -> pd.DataFrame: """The Dataframe of Kickstarter Data Returns: [pd.DataFrame] -- The Dataframe of Kickstarter Data """ source_data = cls._extract() kickstarter_df = cls._transform(source_data) return kickstarter_df @staticmethod def get_categories( kickstarter_df, ) -> List[str]: """The list of kickstarter broader categories Arguments: kickstarter_df {[type]} -- [description] Returns: List[str] -- [description] """ return list(kickstarter_df["broader_category"].unique()) @classmethod def filter_on_categories( cls, kickstarter_df: pd.DataFrame, categories: List[str], ) -> pd.DataFrame: """Filters the kickstarter_df by the specified categories Arguments: kickstarter_df {pd.DataFrame} -- A Kickstarter Dataframe categories {List[str]} -- The list of broader_category in the DataFrame Returns: pd.DataFrame -- The filtered DataFrame """ if categories is None or categories == []: categories = cls.get_categories(kickstarter_df) categories_filter = kickstarter_df["broader_category"].isin(categories) return kickstarter_df[categories_filter] @staticmethod def filter_on_ranges( kickstarter_df: pd.DataFrame, x_range, y_range, ) -> pd.DataFrame: """Filter the kickstarter_df by x_range and y_range Arguments: kickstarter_df {pd.DataFrame} -- [description] x_range {[type]} -- The usd_pledged range y_range {[type]} -- The created_at range Returns: pd.DataFrame -- The filtered DataFrame """ sub_df = kickstarter_df if y_range: y_filter = (kickstarter_df["usd_pledged"] >= y_range[0]) & ( kickstarter_df["usd_pledged"] <= y_range[1] ) sub_df = sub_df[y_filter] if x_range: x_filter = (kickstarter_df["created_at"] >= x_range[0]) & ( kickstarter_df["created_at"] <= x_range[1] ) sub_df = sub_df[x_filter] return sub_df @staticmethod def get_scatter_plot( kickstarter_df: pd.DataFrame, ): # pylint: disable=missing-return-type-doc """A Scatter plot of the kickstarter_df Arguments: kickstarter_df {pd.DataFrame} -- The DataFrame of kickstarter data Returns: [type] -- A Scatter plot """ # Potential Improvements # Rename columns to Capitalized without under score # Add name of movie to hover tooltip kickstarter_df["color"] = kickstarter_df["state"] return kickstarter_df.hvplot.scatter( x="created_at", y="usd_pledged", # color="color", by="state", cmap=list(CMAP.values()), height=400, responsive=True, yformatter="%.1fM", ) @staticmethod def get_bar_chart( kickstarter_df: pd.DataFrame, ): # pylint: disable=missing-return-type-doc """A bar chart of the kickstarter_df Arguments: kickstarter_df {pd.DataFrame} -- A DataFrame of Kickstarter data Returns: [type] -- A bar chart of the kickstarter_df """ # Potential improvements # Sort by Number of Projects Desc to make it easier to see what large and small # Filter stacked_barchart_df = ( kickstarter_df[ [ "broader_category", "state", "created_at", ] ] .groupby( [ "broader_category", "state", ] ) .count() .rename(columns={"created_at": "Number of projects"}) ) # Plot bar_chart = stacked_barchart_df.hvplot.bar( stacked=True, height=400, responsive=True, xlabel="Number of projects", cmap=CMAP, ) return bar_chart
class OverallParameters(param.Parameterized): localisation = param.String(default="Jegun", label="") score = param.Range(default=(0, 250), bounds=(0, 250),) tout_axes = param.Boolean(False, label="") interfaces_num = param.ListSelector(label="") infos_num = param.ListSelector(label="") comp_admin = param.ListSelector(label="") comp_usage_num = param.ListSelector(label="") point_ref = param.Selector( default=SELECT[2], objects=SELECT, label="Point de référence", ) niveau_observation = param.Selector( default=SELECT[2], objects=SELECT, label="Niveau d'observation", ) niveau_details = param.Selector( default=SELECT[2], objects=SELECT, label="Niveau de détail", ) donnees_infra = param.Action( lambda x: x, doc="""Données Infra-Communales""", precedence=0.7 ) file_name = param.String( default="Export_mednum.csv", doc=""" The filename to save to.""", ) edit_report = param.Action( lambda x: x.timestamps.append(dt.datetime.utcnow()), doc="""Editer un rapport""", precedence=0.7, ) tiles = gv.tile_sources.StamenTerrain df_merged = param.DataFrame() df_score = param.DataFrame() def __init__(self, **params): super(OverallParameters, self).__init__(**params) interim_data, cont_iris, indice_frag = self.define_paths() # Merged output_data_path = interim_data / "add_geom_data_to_merged_data.trc.pqt" if output_data_path.exists(): import geopandas as gpd self.df_merged = gpd.read_parquet(output_data_path) else: self.df_merged = add_geom_data_to_merged_data( iris_df(cont_iris), read_merged_data(indice_frag) ) # Create multindex self.set_dataframes_indexes() self.set_dataframes_level() # Create widgets for indicators self.define_indices_params() # Define what is level 0 and level 1 to consider self.set_entity_levels() # What is selected in each level self.get_selected_indice_by_level() # Define define_searchable_element self.define_searchable_element() self.score_calculation() # Download self.download = pn.widgets.FileDownload( label="""Exporter les résultats""", filename=self.file_name, callback=self._download_callback, ) def define_paths(self): data_path = Path("../data") if not data_path.exists(): data_path = Path("./data") if not data_path.exists(): data_path = Path("../../data") raw_data = data_path / "raw/" external_data = data_path / "external/" interim_data = data_path / "interim/" cont_iris = external_data / "france-geojson" / "contours-iris.geojson" indice_frag = processed_data / "MERGE_data_clean.csv" return interim_data, cont_iris, indice_frag def define_searchable_element(self): self.seachable_localisation = list( self.df_merged.index.get_level_values(self.level_0_column_names).unique() ) def define_indices_params(self): """ Create all indices parameters -> Will become a TreeCheckBox or Checkbox """ self.g_params = [] for k, widget_opts in TREEVIEW_CHECK_BOX.items(): # Voir si description ne peut être passée widgets_params = self.create_checkbox_type_widget_params(widget_opts) self.g_params.append(pn.Param(self.param[k], widgets={k: widgets_params})) def _download_callback(self): """ A FileDownload callback will return a file-like object which can be serialized and sent to the client. """ self.file_name = "Export_%s.csv" % self.point_ref self.download.filename = self.file_name sio = io.StringIO() self.df_score.drop("geometry", axis=1).to_csv(sio, index=False) sio.seek(0) return sio def get_params(self): paramater_names = [par[0] for par in self.get_param_values()] return pn.Param( self.param, parameters=[par for par in paramater_names if par != "df_merged"], ) def set_dataframes_level(self): real_name_level = [] for col in self.df_merged.columns: if col in CATEGORIES_INDICES.keys(): real_name_level.append((col, CATEGORIES_INDICES[col])) else: real_name_level.append((col, col)) self.df_merged.columns = pd.MultiIndex.from_tuples( real_name_level, names=["variable", "nom"] ) def set_dataframes_indexes(self): indexes = list( set( list(MAP_COL_WIDGETS["level_0"].values()) + list(MAP_COL_WIDGETS["level_1"].values()) ) ) self.df_merged.set_index(indexes, inplace=True) @pn.depends("localisation", "point_ref", watch=True) def set_entity_levels(self): """Set the entity levels and point values for this entity. """ self.level_0_column, self.level_1_column = ( MAP_COL_WIDGETS["level_0"]["index"], MAP_COL_WIDGETS["level_1"][self.point_ref], ) self.level_0_column_names = MAP_COL_WIDGETS["level_0"]["names"] self.level_0_value = self.localisation @pn.depends( "tout_axes", "interfaces_num", "infos_num", "comp_admin", "comp_usage_num", watch=True, ) def get_selected_indice_by_level(self): """get the indices of the selected column Args: self ([type]): [description] Returns: [type]: [description] """ param_values = {k: v for k, v in self.param.get_param_values()} selected_col = [] for axe, indices in param_values.items(): if axe in TREEVIEW_CHECK_BOX.keys() and indices: for indice in indices: try: selected_col += [CATEGORIES_INDICES_REV[indice]] except: pass self.selected_indices_level_0 = list(set(selected_col)) self.selected_indices_level_1 = list(set(selected_col)) return self.selected_indices_level_0, self.selected_indices_level_1 def create_checkbox_type_widget_params(self, widget_opts): """Create dict of widget type and checkbox params . Args: widget_opts ([type]): [description] Returns: [type]: [description] """ if len(widget_opts.items()) > 3: select_options = [ val["nom"] for opt, val in widget_opts.items() if opt not in ["nom", "desc"] ] descriptions = [ val["desc"] for opt, val in widget_opts.items() if opt not in ["nom", "desc"] ] widget_type = TreeViewCheckBox widgets_params = { "type": widget_type, "select_options": select_options, "select_all": widget_opts["nom"], "desc": descriptions, } else: descriptions = widget_opts["desc"] widget_type = Checkbox widgets_params = { "name": widget_opts["nom"], "type": widget_type, "value": True, "desc": descriptions, } return widgets_params def set_real_name(df): real_name_level = [] for col in df.columns: if col in CATEGORIES_INDICES.keys(): real_name_level.append((col, CATEGORIES_INDICES[col])) else: real_name_level.append((col, col)) return real_name_level def info_localisation(self): info_loc = {} index = self.df_merged.xs( self.localisation, level=self.level_0_column_names, drop_level=False ).index ids = index.unique().to_numpy()[0] names = index.names for k, v in zip(names, ids): info_loc[k] = v return info_loc def get_indices_properties(self): indices_properties = {} import copy tree = copy.deepcopy(TREEVIEW_CHECK_BOX) for indic_dict in tree.values(): indic_dict.pop("nom", None) indic_dict.pop("desc", None) indices_properties.update(indic_dict) return indices_properties @pn.depends( "localisation", "point_ref", "tout_axes", "interfaces_num", "infos_num", "comp_admin", "comp_usage_num", watch=True, ) def score_calculation(self): indices_properties = self.get_indices_properties() selected_indices = self.selected_indices_level_0 df = self.df_merged.copy().droplevel("nom", axis=1) info_loc = self.info_localisation() if selected_indices != []: selected_indices_aggfunc = { k: indices_properties[k]["aggfunc"] for k in selected_indices } # map_info = [self.level_0_column_names] vdims = map_info + selected_indices # Aggregation selon la fonction specifié (mean, median) # au niveau level_1_column sur les indice selectionne selected_indices_aggfunc score_agg_niveau = ( df.xs( info_loc[self.level_1_column], level=self.level_1_column, drop_level=False, ) .groupby(self.level_1_column) .agg(selected_indices_aggfunc) ) # Division par l'aggregation sur la zone level_1_column (pondération) score_niveau = ( df.xs( info_loc[self.level_1_column], level=self.level_1_column, drop_level=False, )[selected_indices].div(score_agg_niveau) * 100 ) # Dissolution (i.e. agregation geographique) au niveau de découpage souhaité level_0_column df = df.xs( info_loc[self.level_1_column], level=self.level_1_column, drop_level=False, ).dissolve( by=[self.level_0_column, self.level_0_column_names], aggfunc=selected_indices_aggfunc, ) # Score sur les indices merge sur l'index pour récupérer la geometry. # _BRUT : initial # _SCORE : Score de l'indice sur le découpage level_0_column divisé par la fonction d'aggragation au level_1_column scores = df.merge( score_niveau, on=[self.level_0_column, self.level_0_column_names], suffixes=("_BRUT", "_SCORE"), ).drop_duplicates() # Drop duplicate pour supprimer les doublons (zone homogène) # Calcul des scores sur chaque axes et au total number_axes = 0 for axe, indices in AXES_INDICES.items(): selected_in_axes = [ k + "_SCORE" for k in indices.keys() if k in selected_indices ] if selected_in_axes != []: scores.loc[:, axe] = scores[selected_in_axes].mean(axis=1) number_axes += 1 else: scores.loc[:, axe] = 0 # Score total scores.loc[:, "tout_axes"] = scores[list(AXES_INDICES.keys())].sum(axis=1) if number_axes != 0: scores.loc[:, "tout_axes"] /= number_axes # self.df_score = df.merge( scores, on=[self.level_0_column, self.level_0_column_names, "geometry"] ).drop_duplicates() # Suppression des doublons sur les communes découpées en IRIS else: df = df.xs( info_loc[self.level_1_column], level=self.level_1_column, drop_level=False, ).dissolve( by=[self.level_0_column, self.level_0_column_names], # aggfunc='first', ) for axe, indices in AXES_INDICES.items(): df.loc[:, axe] = 0 df.loc[:, "tout_axes"] = 0 self.df_score = df
class OutputControl(param.Parameterized): output_control_option = param.ObjectSelector( default='Specify output frequency (OC)', objects=['Specify output frequency (OC)', 'Specify autobuild (SERIES AWRITE)'], doc='Output control option to specify output time using a time series (OC) or an autobuild time series (OS).', precedence=1, ) oc_time_series_id = param.Integer( default=0, bounds=(0, None), doc='OC: Time Series ID for output times.', precedence=2, ) output_flow_strings = param.DataFrame( default=None, doc='FLX: CARD S_ID, ' 'CARD - FLX, ' 'S_ID - String ID for mid string or edge string for which flow is to be output.', ) print_adaptive_mesh = param.Boolean( default=False, doc='PC ADP: Adaptive mesh printing active.', precedence=4, ) print_numerical_fish_surrogate = param.Boolean( default=False, doc='PC ELM: Print numerical fish surrogate information in TecPlot format.', precedence=5, ) screen_output_residual = param.Boolean( default=False, doc='SOUT RESID: output the residual to the screen.', precedence=6, ) screen_output_all = param.Boolean( default=False, doc='SOUT ALL: output all information to the screen.', precedence=7, ) screen_output_mass_error = param.Boolean( default=False, doc='SOUT MERROR: Screen output mass error active.', precedence=8, ) screen_output_worst_nonlinear_node = param.Boolean( default=False, doc='SOUT NLNODE: output the id of the worst nonlinear node.', precedence=9, ) screen_output_worst_linear_node = param.Boolean( default=False, doc='SOUT LNODE: output the id of the worst linear node.', precedence=10, ) file_output_wind = param.Boolean( default=False, doc='FOUT WIND: output wind to a file.', precedence=11, ) file_output_wave = param.Boolean( default=False, doc='FOUT WAVE: output wave to a file.', precedence=12, ) file_output_adapted_grid = param.Boolean( default=False, doc='FOUT ADAPT GRID: write adapted grid to a file.', precedence=13, ) file_output_adapted_solution = param.Boolean( default=False, doc='FOUT ADAPT SW: write the adapted grid and solution to a file.', precedence=14, ) file_output_adapted_transport = param.Boolean( default=False, doc='FOUT ADAPT CON: write the adapted transport files (does not include sediment).', precedence=15, ) file_output_sediment = param.Boolean( default=False, doc='FOUT SED: write the adapted sediment files.', precedence=16, ) def panel(self): return pn.Pane(self.param, show_name=False)
class HpcJobMonitor(HpcConfigurable): uit_client = param.ClassSelector(Client) jobs = param.List() update = param.Action(lambda self: self.update_statuses()) statuses = param.DataFrame() selected_job = param.ObjectSelector(label='Job') active_sub_job = param.ObjectSelector(label='Iteration') log = param.ObjectSelector(objects=[], label='Log File') custom_logs = param.List(default=[]) num_log_lines = param.Integer(default=100, label='n') file_viewer = param.ClassSelector(FileViewer) ready = param.Boolean() next_btn = param.Action(lambda self: self.next(), label='Next') def __init__(self, **params): super().__init__(**params) self.tabs = [ ('Status', self.status_panel), ('Logs', self.logs_panel), ('Files', self.file_browser_panel), ] def next(self): self.ready = True @param.output(finished_job_ids=list) def finished_jobs(self): return self.statuses[self.statuses['status'] == 'F']['job_id'].tolist() @param.depends('jobs', watch=True) def update_selected_job(self): self.param.selected_job.names = {j.job_id: j for j in self.jobs} self.param.selected_job.objects = self.jobs self.selected_job = self.jobs[0] if self.jobs else None @param.depends('selected_job', watch=True) def update_statuses(self): self.statuses = None sub_jobs = self.selected_job.sub_jobs self.statuses = PbsJob.update_statuses(sub_jobs, as_df=True) objects = [j for j in sub_jobs if j.status != 'Q'] self.param.active_sub_job.names = {j.job_id: j for j in objects} self.param.active_sub_job.objects = objects if objects: self.active_sub_job = objects[0] @param.depends('active_sub_job', watch=True) def udpate_log(self): self.param.log.objects = ['stdout', 'stderr'] + [ self.active_sub_job.resolve_path(p) for p in self.custom_logs ] self.log = 'stdout' @param.depends('active_sub_job') def out_log(self): return self.get_log(lambda job: job.get_stdout_log()) @param.depends('active_sub_job') def err_log(self): return self.get_log(lambda job: job.get_stderr_log()) @param.depends('active_sub_job') def x_log(self, log_file): try: return self.get_log(lambda job: job.get_custom_log( log_file, num_lines=self.num_log_lines)) except RuntimeError as e: log.exception(e) def get_log(self, func): job = self.active_sub_job if job is not None: log_contents = func(job) return pn.pane.Str(log_contents, width=800) @param.depends('statuses') def statuses_panel(self): statuses = pn.panel(self.statuses, width=1300) \ if self.statuses is not None \ else pn.pane.GIF(resource_filename('panel', 'assets/spinner.gif')) return statuses @param.depends('selected_job') def status_panel(self): if self.selected_job: return pn.Column( self.statuses_panel, pn.Param(self.param.update, widgets={ 'update': { 'button_type': 'primary', 'width': 100 } }), ) else: return pn.pane.HTML('<h2>No jobs are available</h2>') @param.depends('active_sub_job', 'log') def log_pane(self): if self.log == 'stdout': return self.out_log() elif self.log == 'stderr': return self.err_log() else: return self.x_log(self.log) @param.depends('jobs') def logs_panel(self): return pn.Column( pn.Param(self, parameters=['active_sub_job', 'log'], show_name=False, width=300), self.log_pane, ) @param.depends('uit_client', watch=True) def configure_file_viewer(self): self.file_viewer = FileViewer(uit_client=self.uit_client) self.file_viewer.configure_file_selector() @param.depends('selected_job') def file_browser_panel(self): viewer = self.file_viewer.panel if self.file_viewer else pn.Spacer() self.file_viewer.file_path = str(self.selected_job.working_dir) return pn.Column( viewer, name='Files', width_policy='max', ) def panel(self): return pn.Column( '# Job Status', pn.Row( pn.panel(self.param.selected_job, width_policy='max'), pn.Param(self.param.next_btn, widgets={ 'next_btn': { 'button_type': 'success', 'width': 100 } }), ), pn.layout.Tabs(*self.tabs, ), )
class ObservationsExplorer(param.Parameterized): """Param interface for inspecting observations""" observation_df = param.DataFrame( doc='The DataFrame for the observations.', precedence=-1 # Don't show widget ) images_df = param.DataFrame( doc='The DataFrame for the images from the selected observations.', precedence=-1 # Don't show widget ) show_recent = param.Boolean(label='Show recent observations', doc='Show recent observations', default=True) search_name = param.String( label='Coordinates for object', doc='Field name for coordinate lookup', ) coords = param.XYCoordinates(label='RA/Dec Coords [deg]', doc='RA/Dec Coords [degrees]', default=(0, 0)) radius = param.Number(label='Search radius [deg]', doc='Search radius [degrees]', default=15., bounds=(0, 180), softbounds=(0, 25)) time = param.DateRange( label='Date Range', default=(pendulum.parse('2016-01-01').replace(tzinfo=None), now), bounds=(pendulum.parse('2016-01-01').replace(tzinfo=None), now)) min_num_images = param.Integer(doc='Minimum number of images.', default=1, bounds=(1, 50), softbounds=(1, 10)) unit_id = param.ListSelector( doc='Unit IDs', label='Unit IDs', ) def __init__(self, **kwargs): super().__init__(**kwargs) logger.debug(f'Getting recent stats from {BASE_URL}') self._observations_path = download_file(f'{BASE_URL}', cache='update', show_progress=False, pkgname='panoptes') self._observations_df = pd.read_csv( self._observations_path).convert_dtypes() # Setup up widgets # Set some default for the params now that we have data. units = sorted(self._observations_df.unit_id.unique()) units.insert(0, 'The Whole World! 🌎') self.param.unit_id.objects = units self.unit_id = [units[0]] # Create the source objects. self.update_dataset() @param.depends('coords', 'radius', 'time', 'min_num_images', 'unit_id', 'search_name') def update_dataset(self): if self.show_recent: # Get just the recent result on initial load df = search_observations(ra=180, dec=0, radius=180, start_date=now.subtract(months=1), end_date=now, min_num_images=1, source=self._observations_df).sort_values( by=['time', 'unit_id', 'camera_id'], ascending=False) else: # If using the default unit_ids option, then search for all. unit_ids = self.unit_id if unit_ids == self.param.unit_id.objects[0:1]: unit_ids = self.param.unit_id.objects[1:] if self.search_name != '': coords = SkyCoord.from_name(self.search_name) self.coords = (round(coords.ra.value, 3), round(coords.dec.value, 3)) # Search for the observations given the current params. df = search_observations(ra=self.coords[0], dec=self.coords[1], radius=self.radius, start_date=self.time[0], end_date=self.time[1], min_num_images=self.min_num_images, unit_id=unit_ids).sort_values( by=['time', 'unit_id', 'camera_id'], ascending=False) df.time = pd.to_datetime(df.time) cds = ColumnDataSource(data=df, name='observations_source') def obs_row_selected(attrname, old_row_index, new_row_index): # We only lookup one even if they select multiple rows. newest_index = new_row_index[-1] row = df.iloc[newest_index] print(f'Looking up sequence_id={row.sequence_id}') self.images_df = get_metadata(sequence_id=row.sequence_id) if self.images_df is not None: self.images_df = self.images_df.dropna() cds.selected.on_change('indices', obs_row_selected) return cds @param.depends("images_df") def selected_title(self): try: sequence_id = self.images_df.sequence_id.iloc[0] except AttributeError: sequence_id = '' return pn.panel(f'<h5>{sequence_id}</h5>') @param.depends('images_df') def image_table(self): columns = [('time', 'Time [UTC]')] try: images_table = self.images_df.hvplot.table(columns=columns).opts( width=250, height=100, title=f'Images ({len(self.images_df)})', ) except AttributeError: images_table = self.images_df return images_table @param.depends('images_df') def image_preview(self): try: image_url = self.images_df.public_url.dropna().iloc[0].replace( '.fits.fz', '.jpg') return pn.pane.HTML(f''' <div class="media" style="width: 300px; height: 200px"> <a href="{image_url}" target="_blank"> <img src="{image_url}" class="card-img-top" alt="Observation Image"> </a> </div> ''') except AttributeError: return '' @param.depends('observation_df') def fits_file_list_to_csv_cb(self): """ Generates a CSV file from current image list.""" df = self.images_df.public_url.dropna() sio = StringIO() df.to_csv(sio, index=False, header=False) sio.seek(0) return sio def table_download_button(self): """ A button for downloading the images CSV.""" try: sequence_id = self.images_df.sequence_id.iloc[0] return pn.widgets.FileDownload( callback=self.fits_file_list_to_csv_cb, filename=f'fits-list-{sequence_id}.txt', label='Download FITS List (.txt)', ) except AttributeError: return '' def sources_download_button(self): try: sequence_id = self.images_df.sequence_id.iloc[0] parquet_url = f'{OBSERVATIONS_BASE_URL}/{sequence_id}-sources.parquet' source_btn = pn.widgets.Button( name='Download sources list (.parquet)', ) source_btn.js_on_click(args=dict(url=parquet_url), code=''' window.open(url, '_blank') ''') return source_btn except AttributeError: return '' def table(self): columns = [ TableColumn( field="unit_id", title="Unit ID", width=60, ), TableColumn( field="camera_id", title="Camera ID", width=60, ), TableColumn( field="time", title="Time [UTC]", formatter=DateFormatter(format='%Y-%m-%d %H:%M'), width=130, ), TableColumn( field="field_name", title="Field Name", width=240, ), TableColumn( field="ra", title="RA [deg]", formatter=NumberFormatter(format="0.000"), width=70, ), TableColumn( field="dec", title="Dec [deg]", formatter=NumberFormatter(format="0.000"), width=70, ), TableColumn( field="num_images", title="Images", width=40, ), TableColumn( field="status", title="Status", width=75, ), TableColumn( field="exptime", title="Exptime [sec]", formatter=NumberFormatter(format="0.00"), width=60, ), TableColumn( field="total_minutes_exptime", title="Total Minutes", formatter=NumberFormatter(format="0.0"), width=60, ), ] cds = self.update_dataset() data_table = DataTable( source=cds, name='observations_table', columns=columns, index_position=None, min_width=1100, fit_columns=True, sizing_mode='stretch_both', ) return data_table
class Model(param.Parameterized): meta = param.Dict(default={ 'Analyst': "N/A", 'Element': "N/A", }) g_inputs = param.ClassSelector(GlobalInputs, GlobalInputs(), instantiate=False) u_inputs = param.Dict( default={'uncertainty': ng.NormalRandom(mu=1, sigma=.25)}) uncertainty = param.Number(1) simulate = param.Action(lambda self: self.run_simulation(100)) results = param.DataFrame( default=pd.DataFrame( columns=['Element', 'APPN', 'BaseYear', 'FY', 'Value']), doc= '''Results container shared by all estimates. This value should be updated using the generic _calc method''', columns=set(['Element', 'APPN', 'BaseYear', 'FY', 'Value']), precedence=.1) sim_results = param.DataFrame(precedence=.1) def __init__(self, **params): super().__init__(**params) # Automatically set results to a calculation given defaults self._calc() def _calc(self): print(self.name, "Not Implement") self.results = self.results @param.depends('results', watch=True) def _update_results(self): self.results['Value'] = self.results['Value'] * self.uncertainty def _prepare_sim(self): if self.u_inputs is not None: self.param.set_param(**self.u_inputs) def _end_sim(self): if self.u_inputs is not None: for key, val in self.u_inputs.items(): self.param.set_param(**{key: self.param[key].default}) self._calc() def run_simulation(self, trials=100, clear_previous_sim=True, agg_results=True, agg_columns=['APPN', 'FY']): self._prepare_sim() if clear_previous_sim: self.sim_results = pd.DataFrame() for i in range(trials): self._prepare_sim() self._calc() if agg_results: self.sim_results = self.sim_results.append( self.results.groupby( by=agg_columns)['Value'].sum().reset_index().assign( Trial=i)) else: self.sim_results = self.sim_results.append( self.results.assign(Trial=i)) self._end_sim() def run_simulation_parallel(self, trials=100, agg_results=True, agg_columns=['APPN', 'FY']): import multiprocessing with multiprocessing.Pool() as pool: pool.map(self.run_simulation, range(len(self.models))) def build_panel_app(self): self.app = pn.Pane(self) def build_app(self): try: import panel self._build_panel() except: try: import ipywidets except: print( "No dashboard apps available. Try downloading panel or ipywidgets" )
class OrthoAnnotationDashBoard(OrthoSegmentationDashBoard): annot_channel = param.Parameter(doc='id of annotation channel') z_freehand_editor = param.Parameter(FreehandEditor()) y_freehand_editor = param.Parameter(FreehandEditor()) x_freehand_editor = param.Parameter(FreehandEditor()) old_subdf = param.DataFrame() def _make_dataset(self, key, img): if key == self.annot_channel: annot_dataset = EditableHvDataset(img=img, label=self.index_to_str(key), spacing=self.spacing) # force reset drawing tool axis self.z_freehand_editor = FreehandEditor( dataset=annot_dataset, slicer=self.ortho_viewer.z_viewer) self.y_freehand_editor = FreehandEditor( dataset=annot_dataset, slicer=self.ortho_viewer.y_viewer) self.x_freehand_editor = FreehandEditor( dataset=annot_dataset, slicer=self.ortho_viewer.x_viewer, swap_axes=True) return annot_dataset else: return HvDataset(img=img, label=self.index_to_str(key), spacing=self.spacing) # NOTE overriding base class --> watch=True not needed (else triggers double update) @param.depends('_dynamic_update_counter') def _dynamic_img_update(self): self.save_annot() for hv_ds, img in zip(self.hv_datasets, self.loaded_objects.values()): hv_ds.img = img def dmap(self): if not self.segmentation_viewer.channel_viewers or self._has_multiselect_changed: selected_channel_config = { key: self.channel_config[key] for key in self.loaded_objects.keys() } self.segmentation_viewer = SegmentationViewer.from_channel_config( selected_channel_config, composite_channels=self.composite_channels, overlay_channels=self.overlay_channels) self._widget_update_counter += 1 self.hv_datasets = [ self._make_dataset(key, img) for key, img in self.loaded_objects.items() ] dmaps = [hv_ds.dmap() for hv_ds in self.hv_datasets] dmaps = [self.ortho_viewer(dmap) for dmap in dmaps] # invert slices and channels dmaps = list(zip(*dmaps)) # add crosshair overlay + drawingtool overlay, bug if adding to an existing overlay # NOTE: workaround to overlay drawingtool. does not work if overlayed after Overlay + collate # similar to reported holoviews bug. tap stream attached to a dynamic map does not update # https://github.com/holoviz/holoviews/issues/3533 cross = self.ortho_viewer.get_crosshair() freehands = [(self.z_freehand_editor.path_plot, ), (self.x_freehand_editor.path_plot, ), (self.y_freehand_editor.path_plot, )] dmaps = [ dmap + cr + fh for dmap, cr, fh in zip(dmaps, cross, freehands) ] dmaps = [self.segmentation_viewer(dmap) for dmap in dmaps] @param.depends(self.z_freehand_editor.param.draw_in_3D, watch=True) def _sync_freehands_3D(draw_in_3D): self.x_freehand_editor.draw_in_3D = draw_in_3D self.y_freehand_editor.draw_in_3D = draw_in_3D @param.depends(self.z_freehand_editor.param.tool_width, watch=True) def _sync_freehands_toolsize(tool_width): self.x_freehand_editor.tool_width = tool_width self.y_freehand_editor.tool_width = tool_width return dmaps def save_annot(self, event=None): npimg = self.z_freehand_editor.dataset.img.astype(np.int16) if npimg.shape != (2, 2) and self.old_subdf is not None: single_index = list( set(self.old_subdf.index.names) - set(self.multi_select_levels)) row = self.old_subdf.reset_index(single_index).dc[ self.annot_channel] row.dc.write(npimg, compress=9, overwrite=True) def discard_changes(self, event=None): single_index = list( set(self.old_subdf.index.names) - set(self.multi_select_levels)) row = self.old_subdf.reset_index(single_index).dc[self.annot_channel] img = row.dc.read()[0] self.z_freehand_editor.dataset.img = img @param.depends('subdf', watch=True) def _backup_subdf(self): self.old_subdf = self.subdf @param.depends('_widget_update_counter') def widgets(self): wg = super().widgets() save_button = pn.widgets.Button(name='save') save_button.on_click(self.save_annot) discard_button = pn.widgets.Button(name='discard changes') discard_button.on_click(self.discard_changes) edit_wg = self.z_freehand_editor.dataset.widgets() edit_wg.append(self.z_freehand_editor.param.tool_width) edit_wg.append(self.z_freehand_editor.param.draw_in_3D) edit_wg.append(save_button) edit_wg.append(discard_button) return pn.Column(self.io_widgets, self.segmentation_viewer.widgets, edit_wg)
class GlobalPowerPlantDatabaseApp(param.Parameterized): data = param.DataFrame(precedence=-1) opacity = param.Number(default=0.8, step=0.05, bounds=(0, 1)) pitch = param.Number(default=0, bounds=(0, 90)) zoom = param.Integer(default=1, bounds=(1, 22)) view_state = param.ObjectSelector(default=VIEW_STATES["World"], objects=VIEW_STATES) def __init__(self, nrows: Optional[int] = None, **params): if "data" not in params: if nrows: params["data"] = self._get_pp_data(nrows=nrows) else: params["data"] = self._get_pp_data() super(GlobalPowerPlantDatabaseApp, self).__init__(**params) self._view_state = pdk.ViewState( latitude=52.2323, longitude=-1.415, zoom=self.zoom, min_zoom=self.param.zoom.bounds[0], max_zoom=self.param.zoom.bounds[1], ) self._scatter = pdk.Layer( "ScatterplotLayer", data=self.data, get_position=["longitude", "latitude"], get_fill_color="[color_r, color_g, color_b, color_a]", get_radius="capacity_mw*10", pickable=True, opacity=self.opacity, filled=True, wireframe=True, ) self._deck = pdk.Deck( map_style="mapbox://styles/mapbox/light-v9", initial_view_state=self._view_state, layers=[self._scatter], tooltip=True, mapbox_key=MAPBOX_KEY, ) self.pane = pn.pane.DeckGL(self._deck, sizing_mode="stretch_width", height=700) self.param.watch(self._update, ["data", "opacity", "pitch", "zoom"]) @staticmethod def _get_pp_data(nrows: Optional[int] = None): pp_data = pd.read_csv(POWER_PLANT_PATH, nrows=nrows) pp_data["primary_fuel_color"] = pp_data.primary_fuel.map(FUEL_COLORS) pp_data["primary_fuel_color"] = pp_data["primary_fuel_color"].fillna( "gray") pp_data["color_r"] = pp_data["primary_fuel_color"].map(COLORS_R) pp_data["color_g"] = pp_data["primary_fuel_color"].map(COLORS_G) pp_data["color_b"] = pp_data["primary_fuel_color"].map(COLORS_B) pp_data["color_a"] = 140 # "name", "primary_fuel", "capacity_mw", pp_data = pp_data[[ "latitude", "longitude", "name", "capacity_mw", "color_r", "color_g", "color_b", "color_a", ]] return pp_data @pn.depends("pane.hover_state", "data") def _info_pane(self): index = self.pane.hover_state.get("index", -1) if index == -1: index = slice(0, 0) return self.data.iloc[index][["name", "capacity_mw"]] @pn.depends("view_state", watch=True) def _update_view_state_from_selection(self): self._view_state.latitude = self.view_state.latitude self._view_state.longitude = self.view_state.longitude self._view_state.zoom = self.view_state.zoom self.pane.param.trigger("object") print(self._view_state) @pn.depends("pane.view_State", watch=True) def _update(self): print("update") state = self.pane.view_state self._view_state.longitude = state["longitude"] self._view_state.latitude = state["latitude"] def _update(self, event): print(event.name) if event.name == "data": self._scatter.data = self.data if event.name == "opacity": self._scatter.opacity = self.opacity if event.name == "zoom": self._view_state.zoom = self.zoom if event.name == "pitch": self._view_state.pitch = self.pitch self.pane.param.trigger("object") def _view_state_pane(self): return pn.Param( self, parameters=["view_state"], show_name=False, widgets={"view_state": pn.widgets.RadioButtonGroup}, default_layout=pn.Column, ) def _settings_pane(self): return pn.Param( self, parameters=["opacity", "pitch", "zoom"], show_name=False, widgets={"view_state": pn.widgets.RadioButtonGroup}, ) def view(self): # self._info_pane, does not work return pn.Row( pn.Column(self._view_state_pane, self.pane), pn.Column(self._settings_pane, width=300, sizing_mode="fixed"), )
class TraceParam(param.Parameterized): select_column = param.Parameter() data = param.DataFrame(precedence=-1) listeEntier = param.Selector(default=[]) listeDecimal = param.Selector(default=[]) listeBooleen = param.Selector(default=[]) listeTexte = param.Selector(default=[]) listeDate = param.Selector(default=[]) listeObjet = param.Selector(default=[]) x = param.String(default='x', doc="Colonne à afficher sur l'axe X.") y = param.String(default='frequence', doc="Colonne à afficher sur l'axe Y.") opts = param.Dict(default={}, doc="Options a appliquer sur le graphique.") def __init__(self, **params): self.progress = ProgressExtMod() self.completed = self.progress.completed self.viz = params.get("viz") self.trace = params.get("trace") self.data = self.getUpdatedData(self.trace.dataFrame) self.overlayElement = None self.overlay = None self.idOverlayElement = None self.groupeName = None self.labelName = None super(TraceParam, self).__init__(data=self.data, viz=self.viz) @param.depends("progress.completed") def viewProgress(self): print("TraceParam ", self, " id:", id(self)) return pn.Column(self.progress.view) def view(self): if not self.overlay: table = pn.pane.Markdown("## Pas de donnée chargée") else: #dataCentroid = GeomUtil.convertGeomToCentroid(self.trace,self.data) #dataCentroid['x']= dataCentroid["geometry"].x #dataCentroid['y'] = dataCentroid["geometry"].y #longitude = Dimension('x', label= 'Longitude') #latitude = Dimension('y', label='Latitude') data = self.data if isinstance(self.data, gpd.GeoDataFrame): data = GeomUtil.transformGdfToDf(self.trace, data) table = hv.Table(data) if len(table) == len(self.overlayElement): pass #DataLink(table,self.overlayElement) return pn.Row(table) def panel(self): panel = pn.Row(self.viz.getConfigTracePanel) return panel def populateListeType(self, dataFrame): self.listeEntier = list(dataFrame.select_dtypes('int64').columns) self.listeDecimal = list(dataFrame.select_dtypes('float64').columns) self.listeObjet = list(dataFrame.select_dtypes('object').columns) self.listeTexte = list(dataFrame.select_dtypes('string').columns) def setOverlay(self, overlay): self.overlay = overlay if isinstance(overlay, Overlay) or isinstance(overlay, NdOverlay): for k, v in self.overlay.items(): self.overlayElement = v self.idOverlayElement = v.id self.groupeName = v.group self.labelName = v.label def getUpdatedData(self, dataFrame): if self.viz.type not in GEOMPLOT and not dataFrame.empty: # on vire la geom si on est pas dans une Viz GEOMPLOT dataFrame = GeomUtil.transformGdfToDf(self.trace, dataFrame) self.populateListeType(dataFrame) return dataFrame def settingWidgetSelectColumn(self): columnList = list(self.data) panelWidgetSelectCol = pn.widgets.Select( name="Sélection d'une colonne", options=columnList) widgetColumnSelector = pn.Param( self.param, parameters=['select_column'], widgets={'select_column': panelWidgetSelectCol}, sizing_mode='stretch_width') return widgetColumnSelector @param.depends('select_column', watch=True) def onChangeSelectColumn(self): pass
class AnnotationDashBoard(SegmentationDashBoard): annot_channel = param.Parameter(doc='id of annotation channel') freehand_editor = param.Parameter(FreehandEditor()) old_subdf = param.DataFrame() def _make_dataset(self, key, img): if key == self.annot_channel: annot_dataset = EditableHvDataset(img=img, label=self.index_to_str(key)) # force reset drawing tool axis self.freehand_editor = FreehandEditor(dataset=annot_dataset, slicer=self.slicer) return annot_dataset else: return HvDataset(img=img, label=self.index_to_str(key)) # NOTE overriding base class --> watch=True not needed (else triggers double update) @param.depends('_dynamic_update_counter') def _dynamic_img_update(self): self.save_annot() for hv_ds, img in zip(self.hv_datasets, self.loaded_objects.values()): hv_ds.img = img @param.depends('_complete_update_counter') def dmap(self): self.save_annot() if not self.segmentation_viewer.channel_viewers or self._has_multiselect_changed: selected_channel_config = { key: self.channel_config[key] for key in self.loaded_objects.keys() } self.segmentation_viewer = SegmentationViewer.from_channel_config( selected_channel_config, composite_channels=self.composite_channels, overlay_channels=self.overlay_channels) self._widget_update_counter += 1 self.hv_datasets = [ self._make_dataset(key, img) for key, img in self.loaded_objects.items() ] dmaps = [hv_ds.dmap() for hv_ds in self.hv_datasets] # apply slicer if 3d image if next(iter(self.loaded_objects.values())).ndim > 2: dmaps = [self.slicer(dmap) for dmap in dmaps] # NOTE: workaround to overlay drawingtool. does not work if overlayed after Overlay + collate # similar to reported holoviews bug. tap stream attached to a dynamic map does not update # https://github.com/holoviz/holoviews/issues/3533 dmaps.append(self.freehand_editor.path_plot) dmap = self.segmentation_viewer(dmaps) # Note # dmap * self.freehand_editor.path_plot does not work (no drawing tool available) # self.freehand_editor.path_plot * dmap works but path is drawn behind the image return dmap def save_annot(self, event=None): npimg = self.freehand_editor.dataset.img.astype(np.int16) if npimg.shape != (2, 2) and self.old_subdf is not None: single_index = list( set(self.old_subdf.index.names) - set(self.multi_select_levels)) row = self.old_subdf.reset_index(single_index).dc[ self.annot_channel] row.dc.write(npimg, compress=9, overwrite=True) def discard_changes(self, event=None): single_index = list( set(self.old_subdf.index.names) - set(self.multi_select_levels)) row = self.old_subdf.reset_index(single_index).dc[self.annot_channel] img = row.dc.read()[0] self.freehand_editor.dataset.img = img @param.depends('subdf', watch=True) def _backup_subdf(self): self.old_subdf = self.subdf def widgets(self): wg = super().widgets() save_button = pn.widgets.Button(name='save') save_button.on_click(self.save_annot) discard_button = pn.widgets.Button(name='discard changes') discard_button.on_click(self.discard_changes) edit_wg = self.freehand_editor.widgets() edit_wg.append(save_button) edit_wg.append(discard_button) return pn.Column(wg, edit_wg)
class ReactiveDashboard(param.Parameterized): title = pn.pane.Markdown("# Booze Cruise YYC") # Add a widget that picks the environment and bucket number_dest = param.Integer(len(DEFAULT_DEST), label="Select number of destinations", bounds=(0, 15)) waypoints_per_batch = param.Integer( 10, label="Waypoints per batch in Google Maps URL", bounds=(1, 12)) progress_bar = pnw.misc.Progress( active=False, bar_color="light", value=None, width_policy="max", sizing_mode="stretch_width", ) date_custom_map: Dict = {} get_best_route_action = pnw.Button(name="Optimize Route", button_type="primary") get_batch_destinations = pnw.Button(name="Import Destinations", button_type="primary") destinations_pane = param.Parameter(default=destinations_pane_default) destinations_wlist = param.List(default=destinations_wlist_default) destinations_latlongs = param.List(default=[(0, 0), (0, 0)], precedence=-0.5) gmaps_urls = param.List(default=['', ''], precedence=-0.5) destinations_addresses = param.List(default=[(0, 0), (0, 0)], precedence=-0.5) all_dates_forecast = default_altair() default_plot = pn.Pane(default_altair()) start_location = param.String(label='Departure Point') end_location = param.String(label='Destination Point') batch_import_str = pnw.TextAreaInput( name='Batch import', placeholder= 'Add locations here by e.g. copy-pasting from a spreadsheet', width=300, height=450, sizing_mode='scale_both') is_start_equal_end = param.Boolean( default=True, label='My final destination same as Departure Point') start_latlong = param.Tuple(default=(0, 0), precedence=-0.5) end_latlong = param.Tuple(default=(0, 0), precedence=-0.5) df_label = param.DataFrame(precedence=-0.5, default=pd.DataFrame()) df_all_pts = param.DataFrame(precedence=-0.5, default=pd.DataFrame()) # Placeholder for tabs: tabs = pn.Tabs(('Batch Location Import', pn.Row())) tmp_buffer = 'Temporary buffer' @param.depends("number_dest", watch=True) def change_destinations_number(self): new_destinations = create_destination_inputs( n=self.number_dest, prev_destinations=self.destinations_wlist) self.destinations_pane, self.destinations_wlist = ( new_destinations[0], new_destinations[1], ) self.tabs.active = 0 return self.destinations_pane def geocode_dest_list_latlong(self, event, destinations_list): self.progress_bar.bar_color = 'info' self.progress_bar.active = True logger_bc.info(event) destinations_str = [_pull_value_wlist(x) for x in destinations_list] logger_bc.info(f"Geocoding the destinations list: {destinations_str}") destinations_jsons = [ _geocode_destination_here(x) for x in destinations_str ] latlongs = [ _pull_lat_long_here(x, n_entry=0) for x in destinations_jsons ] addresses = [ _pull_address_here(x, n_entry=0) for x in destinations_jsons ] logger_bc.info(latlongs) logger_bc.info(addresses) # latlongs = [(random.randint(i, 20), random.randint(i, 40)) for i in range(len(destinations_list))] self.destinations_latlongs = latlongs self.destinations_addresses = addresses logger_bc.info(self.destinations_latlongs) logger_bc.info(self.destinations_addresses) self.progress_bar.bar_color = 'light' self.progress_bar.active = False @param.depends('destinations_latlongs') def show_latlongs(self): destinations_str = [ _pull_value_wlist(x) for x in self.destinations_wlist ] x = f' Length = {len(self.destinations_wlist)}, vals = {destinations_str}' x += f' Latlongs = {len(self.destinations_latlongs)}, vals = {self.destinations_addresses}' res_md = pn.pane.Markdown(x) return res_md def find_best_route(self, event, latlong_list, start_point: Tuple = (0, 0), end_point: Tuple = (0, 0)): ''' Find optimal route using TomTom routing service :param start_point: :param end_point: :param event: :param latlong_list: :return: ''' self.progress_bar.bar_color = 'info' self.progress_bar.active = True latlongs = [start_point] + latlong_list + [end_point] latlong_concat = concat_latlongs(latlongs) url_locations = f'{base_url_tomtom}/{latlong_concat}/json' params = { 'key': API_KEY_TOMTOM, 'travelMode': 'car', 'computeBestOrder': 'true', 'traffic': 'true', 'instructionsType': 'text', 'computeTravelTimeFor': 'all', } response = requests.get(url_locations, params=params) response_json = response.json() latlongs_original_optimal = rearrange_waypoints(response_json) sorted_addresses = self.get_ordered_addresses( latlongs_original_optimal) sorted_addresses_with_terminals = [ self.start_location ] + sorted_addresses + [self.end_location] _, urls = construct_gmaps_urls(sorted_addresses_with_terminals, waypoints_batch_size=10) self.gmaps_urls = urls # Prepare dataframes to feed Bokeh with self.df_label = create_label_df(start_point, end_point, latlongs_original_optimal, sorted_addresses=sorted_addresses, start_location=self.start_location, end_location=self.end_location) self.df_all_pts = create_legs_df(response_json) self.progress_bar.bar_color = 'light' self.progress_bar.active = False @param.depends('df_all_pts') def plot_bokeh(self): if self.df_all_pts.shape[0] > 0: print('Plotting bokeh') p = create_bokeh_figure(df_all_pts=self.df_all_pts, df_label=self.df_label) else: p = figure() return p def get_ordered_addresses(self, ordered_latlongs): """ Sort geocoded addresses into optimal order """ def closest_node(node, nodes): nodes = np.asarray(nodes) deltas = nodes - node dist_2 = np.einsum('ij,ij->i', deltas, deltas) return np.argmin(dist_2) sort_vector = [ closest_node(x, self.destinations_latlongs) for x in ordered_latlongs ] sorted_addresses = [ self.destinations_addresses[x]['label'] for x in sort_vector ] return sorted_addresses @param.depends('gmaps_urls') def show_urls(self): base_url_string = """ ### The route links for navigation in Google Maps: URL """ urls_links_md = [ f'**[Group {i}]({u})**' for i, u in enumerate(self.gmaps_urls) ] url_string = '/n/n'.join(urls_links_md) base_url_string = base_url_string.replace('URL', url_string) res_md = pn.pane.Markdown(base_url_string) print(res_md) return res_md def optimize_route(self, event): print(f'start_loc: {self.start_location}') start_latlong = _pull_lat_long_here( _geocode_destination_here(self.start_location)) if self.is_start_equal_end: end_latlong = start_latlong self.end_latlong = start_latlong self.end_location = self.start_location else: end_latlong = _pull_lat_long_here( _geocode_destination_here(self.end_location)) self.start_latlong = start_latlong self.end_latlong = end_latlong self.geocode_dest_list_latlong( event, destinations_list=self.destinations_wlist) self.find_best_route(event, latlong_list=self.destinations_latlongs, start_point=start_latlong, end_point=end_latlong) def destinations_from_import_str(self, event): self.progress_bar.bar_color = 'info' self.progress_bar.active = True destinations_new = self.batch_import_str.value.split('\n') self.destinations_pane, self.destinations_wlist = create_destination_inputs( n=len(destinations_new), prev_destinations=None, init_vals=destinations_new) self.number_dest = len(destinations_new) self.progress_bar.bar_color = 'light' self.progress_bar.active = False @param.depends('is_start_equal_end') def start_end_widget(self): if self.is_start_equal_end: self.end_location = self.start_location self.end_latlong = self.start_latlong return pn.Column(self.param.start_location, self.param.is_start_equal_end) else: return pn.Column(self.param.start_location, self.param.is_start_equal_end, self.param.end_location) def panel(self): # Attach a callback to geocoding & optimal route search self.get_best_route_action.on_click(lambda x: self.optimize_route(x)) # Attach a callback to batch import: self.batch_import_str.link(self.batch_import_str, callbacks={'value': clean_space_callback}) self.batch_import_str.value = '' # Attach a callback to Import Destinations button so the destinations pasted propagate into the Destinations list & sidebar self.get_batch_destinations.on_click( lambda x: self.destinations_from_import_str(x)) # Setup the sidebar: widgets_sliders = pn.Column(self.param.number_dest, self.param.waypoints_per_batch) widgets_start_end = self.start_end_widget buttons_ = pn.Column(self.get_best_route_action) progress_bar = pn.Pane(self.progress_bar, sizing_mode="stretch_width", width_policy="max") # Set up tabs tab_bokeh = pn.Column(pn.Column(self.plot_bokeh), self.show_urls, sizing_mode="stretch_width", width_policy="max") tab_import = pn.Row(self.batch_import_str, self.get_batch_destinations) self.tabs = pn.Tabs(('Optimal Route Map', tab_bokeh), ('Batch Location Import', tab_import)) result = pn.Row( pn.Column( self.title, widgets_sliders, progress_bar, widgets_start_end, buttons_, self.change_destinations_number, ), self.tabs, sizing_mode="stretch_width", ) return result
class trainer(param.Parameterized): display_df = param.DataFrame(default=pd.DataFrame()) results = param.Boolean(default=False) X = param.Array(default=None) result_string = param.String(default='') result_string = param.String('') def __init__(self, **params): super().__init__(**params) self.name_of_page = 'Test and Train' self.test_slider = pn.widgets.IntSlider(name='Test Percentage', start=0, end=100, step=10, value=20) self.tt_button = pn.widgets.Button(name='Train and Test', button_type='primary') self.tt_button.on_click(self.train_test) self.tt_model = pn.widgets.Select( name='Select', options=['Random Forrest Classifier']) def train_test(self, event): #get values from sentiment. self.display_df = convert_sentiment_values(self.display_df) y = self.display_df['label'] #get train test sets X_train, X_test, y_train, y_test = train_test_split( self.X, y, test_size=self.test_slider.value / 100, random_state=0) if self.tt_model.value == 'Random Forrest Classifier': sentiment_classifier = RandomForestClassifier(n_estimators=1000, random_state=0) sentiment_classifier.fit(X_train, y_train) y_pred = sentiment_classifier.predict(X_test) self.y_test = y_test self.y_pred = y_pred self.analyze() def analyze(self): self.cm = confusion_matrix(self.y_test, self.y_pred) self.cr = classification_report(self.y_test, self.y_pred) self.acc_score = accuracy_score(self.y_test, self.y_pred) splits = self.cr.split('\n') cml = self.cm.tolist() self.result_string = f""" ### Classification Report <pre> {splits[0]} {splits[1]} {splits[2]} {splits[3]} {splits[4]} {splits[5]} {splits[6]} {splits[7]} {splits[8]} </pre> ### Confusion Matrix <pre> {cml[0]} {cml[1]} </pre> ### Accuracy Score <pre> {round(self.acc_score, 4)} </pre """ self.results = True def options_page(self, help_text): return pn.WidgetBox(help_text, self.tt_model, self.test_slider, self.tt_button, height=375, width=300) @pn.depends('results') def df_pane(self): if self.results == False: self.result_pane = self.display_df else: self.result_pane = pn.pane.Markdown(f""" {self.result_string} """, width=500, height=350) return pn.WidgetBox(self.result_pane, height=375, width=450) def panel(self): help_text = ( "Your text will now be trained and tested using a selected model. You may " + "choose a percentage of your data to reserve for testing, the rest will be used for " + "training. For example, if I reserve 20%, the rest of the 80% will be used for training " + "and the 20% will be used to determine how well the trained model does assigning a " + "sentiment label to the testing text. Currently, the only model available is the sklearn " + "Random Forrest Classifier model.") return pn.Row( pn.Column( pn.pane.Markdown(f'##Train and Test'), self.options_page(help_text), ), pn.Column( pn.Spacer(height=52), self.df_pane, ))
class WordEmbedder(base_page): spark_df = param.ClassSelector( class_= sdf ) display_df = param.DataFrame(default = pd.DataFrame()) df = param.DataFrame() X = param.Array(default = None) def __init__(self, **params): super().__init__(**params) # self.spark_df = spark_df self.param.name_of_page.default = 'Word Embedding' self.we_model = pn.widgets.Select(name='Select', options=['SKLearn Count Vectorizer', 'Glove', 'Bert']) self.we_button = pn.widgets.Button(name='Transform', button_type='primary') self.we_button.on_click(self.transform) def options_page(self): return pn.WidgetBox(self.we_model, self.we_button, height = 300, width = 300 ) def transform(self, event): print('embedding') if self.we_model.value == 'Glove': print('glove') from sparknlp.annotator import WordEmbeddingsModel word_embeddings=WordEmbeddingsModel.pretrained() word_embeddings.setInputCols(['document','stem']) word_embeddings.setOutputCol('embeddings') self.spark_df = word_embeddings.transform(self.spark_df) embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings') if self.we_model.value == 'SKLearn Count Vectorizer': from sklearn.feature_extraction.text import CountVectorizer print('join lines') corpus = join_lines(self.display_df) print('doing vectorizer') vectorizer = CountVectorizer(max_features=1500) print('vectorizing 2') X = vectorizer.fit_transform(corpus).toarray() cnt = self.spark_df.count() print('getting sentiment from spark df') labels = self.spark_df.select('sentiment').take(cnt) for n in range(cnt): labels[n] = labels[n][0] print('done getting sentiment, creating dataframe') xlist = [] for n in range(len(X)): xlist.append(list(X[n])) self.X = X embeddings_df = pd.DataFrame({'embeddings': xlist, 'sentiment': labels}) else: print('bert') from sparknlp.annotator import BertEmbeddings bertEmbeddings = BertEmbeddings.pretrained() bertEmbeddings.setInputCols(['document','stem']) bertEmbeddings.setOutputCol('embeddings') embeddings_df=bertEmbeddings.transform(self.spark_df) self.spark_df = embeddings_df embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings') self.display_df = embeddings_df self.continue_button.disabled = False @param.output('X', 'display_df') def output(self): return self.X, self.display_df
class Test(param.Parameterized): df = param.DataFrame(default=valid_df, columns={'a', 'd'})
class RiskRewardCalculation(param.Parameterized): """A model of a Risk Reward Calculation""" days_to_delivery_start: int = param.Integer(DEFAULT_DAYS_TO_DELIVERY_START, bounds=DAYS_TO_DELIVERY_BOUNDS) days_to_delivery_end: int = param.Integer(DEFAULT_DAYS_TO_DELIVERY_END, bounds=DAYS_TO_DELIVERY_BOUNDS) aggregation: str = param.ObjectSelector( default=DEFAULT_RISK_REWARD_AGGREGATION, objects=RISK_REWARD_AGGREGATIONS) spreads: pd.DataFrame = param.DataFrame() analysis = param.DataFrame() payoff_up = param.Number() payoff_down = param.Number() risk_reward = param.Number() @param.depends( "days_to_delivery_start", "days_to_delivery_end", "aggregation", "spreads", watch=True, ) def _update(self): print("_update") spreads_filter = (self.spreads["days_to_delivery"] == self.days_to_delivery_start) | ( self.spreads["days_to_delivery"] == self.days_to_delivery_end) spreads_on_days_to_delivery = self.spreads[spreads_filter] analysis = spreads_on_days_to_delivery.pivot( columns="days_to_delivery", values="value", index="spread", ) analysis["change"] = (analysis[self.days_to_delivery_end] - analysis[self.days_to_delivery_start]) analysis = analysis.dropna() up_filter = analysis["change"] > 0 down_filter = analysis["change"] < 0 up_data = analysis[up_filter] down_data = analysis[down_filter] if self.aggregation == "mean": payoff_up = up_data.change.mean() payoff_down = down_data.change.mean() elif self.aggregation == "sum": payoff_up = up_data.change.sum() payoff_down = down_data.change.sum() if payoff_up and payoff_down: risk_reward = -payoff_up / payoff_down else: payoff_up = 0.0 payoff_down = 0.0 risk_reward = 0.0 print(analysis.round(1).index) self.analysis = analysis.round(1) self.payoff_up = round(payoff_up, 1) self.payoff_down = round(payoff_down, 1) self.risk_reward = round(risk_reward, 1)
class PandasProfilingApp(param.Parameterized): """The PandasProfilingApp showcases how to integrate the Pandas Profiling Report with Panel""" csv_url = param.String(label="CSV URL") dataframe = param.DataFrame() report = param.ClassSelector(class_=ProfileReport) html_report = param.String() update_report = param.Action(label="UPDATE REPORT") random_report = param.Action(label="RANDOM REPORT") progress = param.Parameter() html_report_pane = param.ClassSelector(class_=pn.pane.HTML) view = param.ClassSelector(class_=pn.layout.Reactive) config = param.ClassSelector(class_=Config, instantiate=False) def __init__(self, **params): self._csv_urls_cycle = cycle(CSV_URLS) params["config"] = Config() params["update_report"] = self._update_report params["random_report"] = self._random_report params["progress"], params["html_report_pane"], params[ "view"] = self._get_view(params["config"]) super().__init__(**params) self._set_random_csv_url() def _update_report(self, _=None): self.progress.active = True self._generate_report() self.html_report_pane.object = HTML_LOADING_REPORT html_report = html.escape(self.html_report) self.html_report_pane.object = ( f"""<iframe srcdoc="{html_report}" frameborder="0" allowfullscreen></iframe>""" ) self.progress.active = False self.csv_url = self.csv_url def _random_report(self, _=None): self.progress.active = True self._set_random_csv_url() self._update_report() def _get_view(self, config): style = pn.pane.HTML(STYLE, width=0, height=0, margin=0, sizing_mode="fixed") description = pn.pane.Markdown(__doc__) app_bar = pn.Row( pn.pane.PNG( LOGO_URL, embed=False, height=50, width=62, sizing_mode="fixed", margin=(10, 0, 10, 25), ), pn.pane.Markdown( "# Pandas Profiling Report", sizing_mode="stretch_width", margin=(None, None, None, 25), align="center", ), sizing_mode="stretch_width", margin=(25, 5, 0, 5), css_classes=["app-bar"], background=GREEN, ) progress = pn.widgets.Progress(bar_color="secondary", width=335, sizing_mode="fixed", margin=(0, 5, 10, 5)) progress.active = False widgets = { "csv_url": { "sizing_mode": "stretch_width", }, "update_report": { "align": "end", "width": 150, "sizing_mode": "fixed" }, "random_report": { "button_type": "success", "align": "end", "width": 150, "sizing_mode": "fixed", "css_classes": ["id-random-report-button"], }, } top_selections = pn.Param( self, parameters=["csv_url", "update_report", "random_report"], widgets=widgets, default_layout=pn.Row, show_name=False, sizing_mode="stretch_width", ) html_report_pane = pn.pane.HTML(EMPTY_HTML_REPORT, height=900, sizing_mode="stretch_both") report_tab = pn.Column( top_selections, html_report_pane, sizing_mode="stretch_width", name="Report", ) config_tab = pn.Param(config, sizing_mode="stretch_width", name="Configuration", show_name=False) tabs = pn.Tabs( report_tab, config_tab, ) _view = pn.Column( style, description, app_bar, pn.Row(pn.layout.HSpacer(), progress, sizing_mode="stretch_width"), tabs, pn.layout.HSpacer(height=400), # Gives better scrolling sizing_mode="stretch_width", ) return progress, html_report_pane, _view def _generate_report(self): print(self.csv_url, self.config.title, self.config.minimal) self.html_report_pane.object = HTML_LOADING_DATA self.dataframe = self._get_dataframe(self.csv_url) self.html_report_pane.object = HTML_CREATING_PROFILER self.report = self._get_profile_report(self.csv_url, self.config.title, self.config.minimal) self.html_report_pane.object = HTML_GENERATING_REPORT self.html_report = self._get_html_report(self.csv_url, self.config.title, self.config.minimal) @staticmethod @lru_cache(maxsize=128) def _get_dataframe(url): return pd.read_csv(url, nrows=MAX_ROWS) @lru_cache(maxsize=128) def _get_profile_report(self, url, title, minimal): print(url, title, minimal) return ProfileReport(self.dataframe, minimal=minimal, title=title) @lru_cache(maxsize=128) def _get_html_report(self, url, title, minimal): print(url, title, minimal) return self.report.to_html() def _set_random_csv_url(self): self.csv_url = next(self._csv_urls_cycle) def __str__(self): return "Pandas Profiler App" def __repr__(self): return self.__str__()
class DeepLearningConfig(WorkflowParams, DatasetParams, OutputParams, OptimizerParams, TrainerParams, GenericConfig): """ A class that holds all settings that are shared across segmentation models and regression/classification models. """ _model_category: ModelCategory = param.ClassSelector( class_=ModelCategory, doc="The high-level model category described by this config.") num_dataload_workers: int = param.Integer( 2, bounds=(0, None), doc="The number of data loading workers (processes). When set to 0," "data loading is running in the same process (no process startup " "cost, hence good for use in unit testing. However, it " "does not give the same result as running with 1 worker process)") shuffle: bool = param.Boolean( True, doc="If true, the dataset will be shuffled randomly during training.") train_batch_size: int = param.Integer( 4, bounds=(0, None), doc="The number of crops that make up one minibatch during training.") use_model_parallel: bool = param.Boolean( False, doc="If true, neural network model is partitioned across all " "available GPUs to fit in a large model. It shall not be used " "together with data parallel.") pin_memory: bool = param.Boolean( True, doc="Value of pin_memory argument to DataLoader") restrict_subjects: Optional[str] = \ param.String(doc="Use at most this number of subjects for train, val, or test set (must be > 0 or None). " "If None, do not modify the train, val, or test sets. If a string of the form 'i,j,k' where " "i, j and k are integers, modify just the corresponding sets (i for train, j for val, k for " "test). If any of i, j or j are missing or are negative, do not modify the corresponding " "set. Thus a value of 20,,5 means limit training set to 20, keep validation set as is, and " "limit test set to 5. If any of i,j,k is '+', discarded members of the other sets are added " "to that set.", allow_None=True) _dataset_data_frame: Optional[DataFrame] = \ param.DataFrame(default=None, doc="The dataframe that contains the dataset for the model. This is usually read from disk " "from dataset.csv") avoid_process_spawn_in_data_loaders: bool = \ param.Boolean(is_windows(), doc="If True, use a data loader logic that avoid spawning new processes at the " "start of each epoch. This speeds up training on both Windows and Linux, but" "on Linux, inference is currently disabled as the data loaders hang. " "If False, use the default data loader logic that starts new processes for " "each epoch.") max_batch_grad_cam: int = param.Integer( default=0, doc="Max number of validation batches for which " "to save gradCam images. By default " "visualizations are saved for all images " "in the validation set") label_smoothing_eps: float = param.Number( 0.0, bounds=(0.0, 1.0), doc="Target smoothing value for label smoothing") log_to_parent_run: bool = param.Boolean( default=False, doc="If true, hyperdrive child runs will log their metrics" "to their parent run.") use_imbalanced_sampler_for_training: bool = param.Boolean( default=False, doc="If True, use an imbalanced sampler during training.") drop_last_batch_in_training: bool = param.Boolean( default=False, doc="If True, drop the last incomplete batch during" "training. If all batches are complete, no batch gets " "dropped. If False, keep all batches.") log_summaries_to_files: bool = param.Boolean( default=True, doc= "If True, model summaries are logged to files in logs/model_summaries; " "if False, to stdout or driver log") mean_teacher_alpha: float = param.Number( bounds=(0, 1), allow_None=True, default=None, doc="If this value is set, the mean teacher model will be computed. " "Currently only supported for scalar models. In this case, we only " "report metrics and cross-validation results for " "the mean teacher model. Likewise the model used for inference " "is the mean teacher model. The student model is only used for " "training. Alpha is the momentum term for weight updates of the mean " "teacher model. After each training step the mean teacher model " "weights are updated using mean_teacher_" "weight = alpha * (mean_teacher_weight) " " + (1-alpha) * (current_student_weights). ") #: Name of the csv file providing information on the dataset to be used. dataset_csv: str = param.String( DATASET_CSV_FILE_NAME, doc= "Name of the CSV file providing information on the dataset to be used. " "For segmentation models, this file must contain at least the fields: `subject`, `channel`, `filePath`." ) def __init__(self, **params: Any) -> None: self._model_name = type(self).__name__ # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here. self._datasets_for_training: Optional[Dict[ModelExecutionMode, Any]] = None self._datasets_for_inference: Optional[Dict[ModelExecutionMode, Any]] = None self.recovery_start_epoch = 0 super().__init__(throw_if_unknown_param=True, **params) logging.info("Creating the default output folder structure.") self.create_filesystem(fixed_paths.repository_root_directory()) # Disable the PL progress bar because all InnerEye models have their own console output self.pl_progress_bar_refresh_rate = 0 self.extra_downloaded_run_id: Optional[Any] = None def validate(self) -> None: """ Validates the parameters stored in the present object. """ WorkflowParams.validate(self) OptimizerParams.validate(self) if self.azure_dataset_id is None and self.local_dataset is None: raise ValueError( "Either of local_dataset or azure_dataset_id must be set.") @property def model_category(self) -> ModelCategory: """ Gets the high-level model category that this configuration objects represents (segmentation or scalar output). """ return self._model_category @property def is_segmentation_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Segmentation. """ return self.model_category == ModelCategory.Segmentation @property def is_scalar_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Scalar i.e. for Classification or Regression models. """ return self.model_category.is_scalar @property def compute_grad_cam(self) -> bool: return self.max_batch_grad_cam > 0 @property def dataset_data_frame(self) -> Optional[DataFrame]: """ Gets the pandas data frame that the model uses. :return: """ return self._dataset_data_frame @dataset_data_frame.setter def dataset_data_frame(self, data_frame: Optional[DataFrame]) -> None: """ Sets the pandas data frame that the model uses. :param data_frame: The data frame to set. """ self._dataset_data_frame = data_frame def get_train_epochs(self) -> List[int]: """ Returns the epochs for which training will be performed. :return: """ return list(range(self.recovery_start_epoch + 1, self.num_epochs + 1)) def get_total_number_of_training_epochs(self) -> int: """ Returns the number of epochs for which a model will be trained. :return: """ return len(self.get_train_epochs()) def get_total_number_of_validation_epochs(self) -> int: """ Returns the number of epochs for which a model will be validated. :return: """ return self.get_total_number_of_training_epochs() @property def compute_mean_teacher_model(self) -> bool: """ Returns True if the mean teacher model should be computed. """ return self.mean_teacher_alpha is not None def __str__(self) -> str: """Returns a string describing the present object, as a list of key: value strings.""" arguments_str = "\nArguments:\n" # Avoid callable params, the bindings that are printed out can be humongous. # Avoid dataframes skip_params = { name for name, value in self.param.params().items() if isinstance(value, (param.Callable, param.DataFrame)) } for key, value in self.param.get_param_values(): if key not in skip_params: arguments_str += f"\t{key:40}: {value}\n" return arguments_str def load_checkpoint_and_modify(self, path_to_checkpoint: Path) -> Dict[str, Any]: """ By default, uses torch.load to read and return the state dict from the checkpoint file, and does no modification of the checkpoint file. Overloading this function: When weights_url or local_weights_path is set, the file downloaded may not be in the exact format expected by the model's load_state_dict() - for example, pretrained Imagenet weights for networks may have mismatched layer names in different implementations. In such cases, you can overload this function to extract the state dict from the checkpoint. NOTE: The model checkpoint will be loaded using the torch function load_state_dict() with argument strict=False, so extra care needs to be taken to check that the state dict is valid. Check the logs for warnings related to missing and unexpected keys. See https://pytorch.org/tutorials/beginner/saving_loading_models.html#warmstarting-model-using-parameters -from-a-different-model for an explanation on why strict=False is useful when loading parameters from other models. :param path_to_checkpoint: Path to the checkpoint file. :return: Dictionary with model and optimizer state dicts. The dict should have at least the following keys: 1. Key ModelAndInfo.MODEL_STATE_DICT_KEY and value set to the model state dict. 2. Key ModelAndInfo.EPOCH_KEY and value set to the checkpoint epoch. Other (optional) entries corresponding to keys ModelAndInfo.OPTIMIZER_STATE_DICT_KEY and ModelAndInfo.MEAN_TEACHER_STATE_DICT_KEY are also supported. """ return load_checkpoint(path_to_checkpoint=path_to_checkpoint, use_gpu=self.use_gpu)
class Dashboard(param.Parameterized): # Setup model_options = { 'Linear Regression': LinearRegression(), **{f'Poly Degree {i}': PolyRegression(i) for i in range(2, 16, 2)} } relationsships = {'linear': 'linear', 'sine wave': 'sine_wave'} # Widgets for controling simulation n = param.Integer(default=100, bounds=(20, MAX_N), step=20) Noise_Amplitude = param.Number(default=1, bounds=(0, 10)) noise = param.ObjectSelector(default='normal', objects=['normal', 'constant']) Underlying_Relation = param.ObjectSelector( default=relationsships['linear'], objects=relationsships) # Widgets for modeling estimator = param.ObjectSelector( default=model_options['Linear Regression'], objects=model_options) N_Folds = param.Integer(default=10, bounds=(5, MAX_N_FOLDS)) Shuffle_Folds = param.Boolean(False) # Widgets for changing visuals Show_Unselected_Folds = param.Boolean(True) Select_Fold = param.ObjectSelector( default='all', objects={ 'all': 'all', **{ f'fold:{fold}': f'fold:{fold}' for fold in range(N_Folds.default) } }) # interactive changes on data data = param.DataFrame(data_generator(n.default, noise.default, Noise_Amplitude.default, Underlying_Relation.default), precedence=-1) data_extra = param.DataFrame(data_generator(N_EXTERNAL_TEST, noise.default, Noise_Amplitude.default, Underlying_Relation.default), precedence=-1) data_splitted = param.DataFrame(KFold_split(data.default, data_extra.default, N_Folds.default, False), precedence=-1) data_plot = param.DataFrame(fit_transform(data_splitted.default, estimator.default), precedence=-1) @param.depends('n', 'noise', 'Noise_Amplitude', 'Underlying_Relation', watch=True) def update_data_creation(self): self.data = data_generator(self.n, self.noise, self.Noise_Amplitude, self.Underlying_Relation) self.data_extra = data_generator(N_EXTERNAL_TEST, self.noise, self.Noise_Amplitude, self.Underlying_Relation) @param.depends('data', 'data_extra', 'N_Folds', 'Shuffle_Folds', watch=True) def update_split(self): # So that never try more folds than samples => would run into an error in sklearn if self.N_Folds > self.n: self.N_Folds = self.n if self.N_Folds > self.param['N_Folds'].bounds[1]: self.N_Folds = self.param['N_Folds'].bounds[1] self.param['N_Folds'].bounds = (5, min(MAX_N_FOLDS, self.n)) self.data_splitted = KFold_split(self.data, self.data_extra, self.N_Folds, self.Shuffle_Folds) @param.depends('data_splitted', 'estimator', watch=True) def update_estimator(self): self.data_plot = fit_transform(self.data_splitted, self.estimator) @param.depends('data_plot', 'Show_Unselected_Folds', 'Select_Fold') def view(self): ds = hv.Dataset(self.data_plot, kdims=['x', 'data_split', 'in_train_set', 'y'], vdims=['y_pred']) self.param['Select_Fold'].objects = { 'all': 'all', **{f'fold:{fold}': f'fold:{fold}' for fold in range(self.N_Folds)} } scatter = ds.apply(create_scatter, fold=self.Select_Fold) lines = ds.apply(create_line, fold=self.Select_Fold, show_unselected=self.Show_Unselected_Folds) dist_plot = ds.apply(create_dist_plot, fold=self.Select_Fold) return pn.Column(pn.Row((scatter * lines), dist_plot))
class Test(param.Parameterized): df = param.DataFrame(default=invalid_df, columns=(None, 2))
class AnnotationMission(param.Parameterized): # (input) parameters cpr = param.Parameter() signals = param.Parameter() titles = param.Parameter() mission_id = param.Parameter() annotations = param.DataFrame(default=df_default) next_classification = param.ObjectSelector(default=CLASSIFICATIONS[0], objects=CLASSIFICATIONS) pending_start = param.Number(default=None) ############################ ## ANNOTATIONS PLOT ############################ # Display the interactive elements, like annotations as colorful ranges over the plot def plot_annotations(self, **kwargs): flag = [ str(i) == str(self.mission_id) for i in self.annotations.mission_id.values ] rows = self.annotations[flag].iterrows() plots = [] # We remember the first double-click and draw a vertical line if we expect another click to happen if self.pending_start: plots.append(hv.VLine(self.pending_start).opts(line_dash="dashed")) plots.extend([ hv.VSpan(r["start_clock_ms"], r["end_clock_ms"]).opts( color=color_dict.get(r["classification"], "yellow")) #* #hv.Text((r["start_clock_ms"]+r["end_clock_ms"])/2,0.9,str(r["classification"])).opts(color="red") for ix, r in rows ]) return hv.Overlay(plots) ############################ ## ANNOTATIONS REFRESH ############################ def refresh_annotations(self): if hasattr(self, "_plot_update_stream"): self._plot_update_stream.event() ############################ ## SIGNALS PLOT ############################ # Plot and datashade the ecg signal def plot_signal(self, **kwargs): curves = [] curves.append(hv.Curve(self.cpr)) curves.append(hv.Curve(self.signals[0])) curves.append( hv.Curve(self.signals[1], label=self.titles[1]).opts(opt)) curves.append( hv.Curve(self.signals[2], label=self.titles[2]).opts(opt)) return curves ############################ ## PLOT ############################ def plot(self): signal_curves = self.plot_signal() # This is the clicking behaviour. self._plot_update_stream = hv.streams.Counter() def on_tap(x, y): # We have two modes, either there is no annotation pending, # so we remember the first click, or we record the annotation and reset the pending state. if not self.pending_start: self.pending_start = x else: values = (self.pending_start, x) start, end = min(values), max(values) self.annotations = self.annotations.append(pd.DataFrame({ "mission_id": [self.mission_id], "annotation_time": [datetime.now()], "start_clock_ms": [start], "end_clock_ms": [end], "classification": [self.next_classification], }), ignore_index=True) self.pending_start = None self.refresh_annotations() tap0 = hv.streams.DoubleTap(source=signal_curves[1]) tap1 = hv.streams.DoubleTap(source=signal_curves[2]) tap2 = hv.streams.DoubleTap(source=signal_curves[3]) @tap0.add_subscriber def on_tap0(x, y): on_tap(x, y) @tap1.add_subscriber def on_tap1(x, y): on_tap(x, y) @tap2.add_subscriber def on_tap2(x, y): on_tap(x, y) ## annotation dynamic map annotations_dmap = hv.DynamicMap(self.plot_annotations, streams=[self._plot_update_stream]) ## ECG and CPR plot overlay ecg_opt = hv.opts.Overlay(title='ECG and CPR') ecg_curve = hv.Overlay([ datashade(signal_curves[1], cmap=["grey", "black"]).opts(opt), annotations_dmap, ]) ecg_annot = hv.Overlay([ ecg_curve, signal_curves[0].opts(color="red"), ]).opts(ecg_opt) ## output plot I output_plots = [] output_plots.append(ecg_annot) ## output plot II output_plots.append( hv.Overlay([ datashade(signal_curves[2], cmap=["grey", "black"]).opts(opt), annotations_dmap, ]).opts(opt)) ## output plot III output_plots.append( hv.Overlay([ datashade(signal_curves[3], cmap=["grey", "black"]).opts(opt), annotations_dmap, ]).opts(opt)) return tuple(output_plots) ############################ ## ANNOTATION REMOVE ############################ # These are the handlers for the "detail table" def on_remove_annotation(self, ix): self.annotations = self.annotations.drop(ix) self.refresh_annotations() ############################ ## ANNOTATION CHANGE ############################ def on_change_annotation(self, ix, value): self.annotations.loc[ix, "classification"] = value # This line is needed to notify param of the inplace updated annotations dataframe self.annotations = self.annotations self.refresh_annotations() ############################ ## ANNOTATION SAVE ############################ @param.depends("annotations") def action_save_annotations(self): try: self.annotations.to_csv(pth_df, mode='w') except: self.annotations.to_csv(pth_df) save_annotations = param.Action(action_save_annotations, doc="Save Changes", label="Save Mission Changes") ############################ ## CONTROL PANEL ############################ # This is the detail table below where you can change the annotation made, or remove it. @param.depends("annotations") def plot_annotation_details(self): elements = [] for i, (ix, r) in enumerate( self.annotations # Sorting the dataframe here is necessary, # otherwise we would number the ranges by their insertion, not by their time. .sort_values("start_clock_ms").iterrows()): if str(r["mission_id"]) == str(self.mission_id): select = pn.widgets.RadioButtonGroup( name="Select classification", options=CLASSIFICATIONS, value=r["classification"], inline=True, ) remove = pn.widgets.Button( name="remove", width=40, ) clock_ms = int(float(r['start_clock_ms']) / 1000) tstamp = datetime.fromtimestamp(clock_ms).strftime("%H:%M:%S") select.param.watch( partial( lambda ix, event: self.on_change_annotation( ix, event.new), ix), "value") remove.param.watch( partial(lambda ix, event: self.on_remove_annotation(ix), ix), "clicks") elements.extend([ pn.widgets.StaticText(name=f"@ {tstamp} ", value=""), remove, select ]) return pn.GridBox(*elements, ncols=3, width=1200) def render(self): return pn.Column( pn.pane.Markdown( '### Start annotating by double clicking into the plot. This will mark the start of a range. Double click again to mark the end of the range.', style={ 'font-family': "serif", 'color': "#ff0000" }), pn.Row( pn.pane.Markdown('### Classification for next annotation:', style={'font-family': "serif"}), pn.Param( self.param.next_classification, widgets={ "next_classification": pn.widgets.RadioButtonGroup( options=CLASSIFICATIONS, ) #style={'font-size':'10pt'},css_classes=["widget-button"]) }), pn.Spacer(background='white', width=100, height=10), #self.param.remove_last_annotation, self.param.save_annotations, ), *(self.plot()), pn.pane.Markdown( f"### List of annotations for mission {self.mission_id}", style={'font-family': "serif"}), self.plot_annotation_details, )
class Test(param.Parameterized): df = param.DataFrame(default=valid_df, rows=(None, 3))
class WaveformWatcher(param.Parameterized): DIMS = [ ["cs1", "cs2"], ["z", "r"], ["e_light", 'e_charge'], ["e_light", 'e_ces'], ["drift_time", "n_peaks"], ] dates = param.DateRange(default=(dt.datetime(2016, 11, 10), dt.datetime.utcnow()), bounds=(dt.datetime(2016, 11, 10), dt.datetime.utcnow())) runs = param.List(default=[]) sources = param.List(default=[], ) linked_selection = param.Parameter() selection_spaces = param.List(default=DIMS) events = param.DataFrame(default=pd.DataFrame()) def __init__(self, **params): super().__init__(**params) self.linked_selection = hv.selection.link_selections.instance() @param.depends("selection_spaces", watch=True) def event_selection(self): if not self.selection_spaces: return hv.Points(dset, ["cs1", "cs2"]).opts(color="blue") colors = hv.Cycle('Category10').values plots = [ hv.Points(dset, dims).opts(color=c) for c, dims in zip(colors, self.selection_spaces) ] layout = hv.Layout(plots).cols(6) lsp = hv.selection.link_selections.instance() self.linked_selection = lsp layout = self.linked_selection(layout) return layout @param.depends("linked_selection.selection_expr") def selection(self): table = hv.Table(dset).opts(width=1550) if self.linked_selection and self.linked_selection.selection_expr: selected = table[self.linked_selection.selection_expr.apply(table)] self.events = selected.data return selected self.events = table.data return table def panel(self): date_picker = self.param.dates runs_picker = pn.widgets.MultiChoice(value=["181028_0045"], name="Runs", options=["181028_0045"], solid=False, width=1000) runs_picker.link(self, value="runs") source_picker = pn.widgets.CheckButtonGroup( value=["None"], name="Source", options=["None", "AmBe", "NG", "Rn220"]) source_picker.link(self, value="source") selection_spaces = pn.widgets.CheckButtonGroup( value=self.DIMS, name="Selection spaces", options={f"{x} vs {y}": [x, y] for x, y in self.DIMS}, width=1000) selection_spaces.link(self, value="selection_spaces") return pn.Column( pn.layout.Divider(), pn.pane.Markdown( "## First allow the user to load events by date range/run_id/source" ), date_picker, runs_picker, pn.pane.Markdown(" Source"), source_picker, pn.layout.Divider(), pn.pane.Markdown( "## Allow user to choose the selection spaces of interest e.g. cut spaces, energy etc." ), selection_spaces, pn.pane.Markdown( "## Plot events in selection spaces of interest for user to apply selections." ), pn.panel(self.event_selection), pn.layout.Divider(), pn.pane.Markdown("## Preview selected events with properties"), self.selection, width=1600, )
class Test(param.Parameterized): df = param.DataFrame(default=invalid_df, rows=(5, 7))
class StatusTab(TabView): title = param.String(default='Status') statuses = param.DataFrame(precedence=0.1) update = param.Action(lambda self: self.update_statuses(), precedence=0.2) terminate_btn = param.Action(lambda self: None, label='Terminate', precedence=0.3) yes_btn = param.Action(lambda self: self.terminate_job(), label='Yes', precedence=0.4) cancel_btn = param.Action(lambda self: None, label='Cancel', precedence=0.5) disable_update = param.Boolean() @param.depends('parent.selected_job', watch=True) def update_statuses(self): if self.selected_job is not None: if self.disable_update: qstat = self.selected_job.qstat if qstat is None: statuses = None elif self.is_array: statuses = pd.DataFrame.from_dict(qstat).T else: statuses = pd.DataFrame(qstat, index=[0]) else: jobs = [self.selected_job] if self.is_array: jobs += self.selected_job.sub_jobs statuses = PbsJob.update_statuses(jobs, as_df=True) self.update_terminate_btn() if statuses is not None: statuses.set_index('job_id', inplace=True) self.statuses = statuses def terminate_job(self): self.selected_job.terminate() time.sleep(10) self.update_statuses() def update_terminate_btn(self): self.param.terminate_btn.constant = self.selected_job.status not in ( 'Q', 'R', 'B') @param.depends('statuses') def statuses_panel(self): statuses_table = pn.widgets.DataFrame.from_param(self.param.statuses, width=1300) \ if self.statuses is not None else pn.pane.Alert('No status information available.', alert_type='info') if self.disable_update: buttons = None else: update_btn = pn.widgets.Button.from_param(self.param.update, button_type='primary', width=100) terminate_btn = pn.widgets.Button.from_param( self.param.terminate_btn, button_type='danger', width=100) yes_btn = pn.widgets.Button.from_param(self.param.yes_btn, button_type='danger', width=100) cancel_btn = pn.widgets.Button.from_param(self.param.cancel_btn, button_type='success', width=100) yes_btn.visible = False cancel_btn.visible = False msg = pn.indicators.String( value= 'Are you sure you want to terminate the job. This cannot be undone.', css_classes=['bk', 'alert', 'alert-danger'], default_color='inherit', font_size='inherit', visible=False, ) terminate_confirmation = pn.Column( msg, pn.Row(yes_btn, cancel_btn, margin=20), background='#ffffff', ) args = { 'update_btn': update_btn, 'terminate_btn': terminate_btn, 'statuses_table': statuses_table, 'msg': msg, 'yes_btn': yes_btn, 'cancel_btn': cancel_btn, 'term_col': terminate_confirmation } terminate_code = 'update_btn.disabled=true; terminate_btn.visible=false; ' \ 'msg.visible=true; yes_btn.visible=true; cancel_btn.visible=true; ' \ 'term_col.css_classes=["panel-widget-box"]' cancel_code = 'update_btn.disabled=false; terminate_btn.visible=true; ' \ 'msg.visible=false; yes_btn.visible=false; cancel_btn.visible=false; term_col.css_classes=[]' terminate_btn.js_on_click(args=args, code=terminate_code) cancel_btn.js_on_click(args=args, code=cancel_code) code = 'btn.css_classes.push("pn-loading", "arcs"); btn.properties.css_classes.change.emit(); ' \ 'other_btn.disabled=true; ' \ 'statuses_table.push("pn-loading", "arcs"); statuses_table.properties.css_classes.change.emit();' update_btn.js_on_click( args={ 'btn': update_btn, 'other_btn': terminate_btn, 'statuses_table': statuses_table }, code=code, ) yes_btn.js_on_click( args={ 'btn': terminate_btn, 'other_btn': update_btn, 'statuses_table': statuses_table }, code=code, ) buttons = pn.Row(update_btn, terminate_btn, terminate_confirmation) return pn.Column( statuses_table, buttons, sizing_mode='stretch_width', ) @param.depends('parent.selected_job') def panel(self): if self.selected_job: return self.statuses_panel else: return pn.pane.HTML('<h2>No jobs are available</h2>')
class Test(param.Parameterized): df = param.DataFrame(valid_df)
class DeepLearningConfig(GenericConfig, CudaAwareConfig): """ A class that holds all settings that are shared across segmentation models and regression/classification models. """ _model_category: ModelCategory = param.ClassSelector( class_=ModelCategory, doc="The high-level model category described by this config.") _model_name: str = param.String( None, doc="The human readable name of the model (for example, Liver). This is " "usually set from the class name.") random_seed: int = param.Integer( 42, doc="The seed to use for all random number generators.") azure_dataset_id: str = param.String( doc= "If provided, the ID of the dataset to use. This dataset must exist as a " "folder of the same name in the 'datasets' " "container in the datasets storage account.") local_dataset: Optional[Path] = param.ClassSelector( class_=Path, default=None, allow_None=True, doc="The path of the dataset to use, when training is running " "outside Azure.") num_dataload_workers: int = param.Integer( 8, bounds=(0, None), doc="The number of data loading workers (processes). When set to 0," "data loading is running in the same process (no process startup " "cost, hence good for use in unit testing. However, it " "does not give the same result as running with 1 worker process)") shuffle: bool = param.Boolean( True, doc="If true, the dataset will be shuffled randomly during training.") num_epochs: int = param.Integer(100, bounds=(1, None), doc="Number of epochs to train.") start_epoch: int = param.Integer( 0, bounds=(0, None), doc="The first epoch to train. Set to 0 to start a new " "training. Set to a value larger than zero for starting" " from a checkpoint.") l_rate: float = param.Number(1e-4, doc="The initial learning rate", bounds=(0, None)) _min_l_rate: float = param.Number( 0.0, doc= "The minimum learning rate for the Polynomial and Cosine schedulers.", bounds=(0.0, None)) l_rate_scheduler: LRSchedulerType = param.ClassSelector( default=LRSchedulerType.Polynomial, class_=LRSchedulerType, instantiate=False, doc="Learning rate decay method (Cosine, Polynomial, " "Step, MultiStep or Exponential)") l_rate_exponential_gamma: float = param.Number( 0.9, doc="Controls the rate of decay for the Exponential " "LR scheduler.") l_rate_step_gamma: float = param.Number( 0.1, doc="Controls the rate of decay for the " "Step LR scheduler.") l_rate_step_step_size: int = param.Integer( 50, bounds=(0, None), doc="The step size for Step LR scheduler") l_rate_multi_step_gamma: float = param.Number( 0.1, doc="Controls the rate of decay for the " "MultiStep LR scheduler.") l_rate_multi_step_milestones: Optional[List[int]] = param.List( None, bounds=(1, None), allow_None=True, class_=int, doc="The milestones for MultiStep decay.") l_rate_polynomial_gamma: float = param.Number( 1e-4, doc="Controls the rate of decay for the " "Polynomial LR scheduler.") l_rate_warmup: LRWarmUpType = param.ClassSelector( default=LRWarmUpType.NoWarmUp, class_=LRWarmUpType, instantiate=False, doc="The type of learning rate warm up to use. " "Can be NoWarmUp (default) or Linear.") l_rate_warmup_epochs: int = param.Integer( 0, bounds=(0, None), doc="Number of warmup epochs (linear warmup) before the " "scheduler starts decaying the learning rate. " "For example, if you are using MultiStepLR with " "milestones [50, 100, 200] and warmup epochs = 100, warmup " "will last for 100 epochs and the first decay of LR " "will happen on epoch 150") optimizer_type: OptimizerType = param.ClassSelector( default=OptimizerType.Adam, class_=OptimizerType, instantiate=False, doc="The optimizer_type to use") opt_eps: float = param.Number( 1e-4, doc="The epsilon parameter of RMSprop or Adam") rms_alpha: float = param.Number(0.9, doc="The alpha parameter of RMSprop") adam_betas: TupleFloat2 = param.NumericTuple( (0.9, 0.999), length=2, doc="The betas parameter of Adam, default is (0.9, 0.999)") momentum: float = param.Number( 0.6, doc="The momentum parameter of the optimizers") weight_decay: float = param.Number( 1e-4, doc="The weight decay used to control L2 regularization") save_start_epoch: int = param.Integer( 100, bounds=(0, None), doc="Save epoch checkpoints only when epoch is " "larger or equal to this value.") save_step_epochs: int = param.Integer( 50, bounds=(0, None), doc="Save epoch checkpoints when epoch number is a " "multiple of save_step_epochs") train_batch_size: int = param.Integer( 4, bounds=(0, None), doc="The number of crops that make up one minibatch during training.") detect_anomaly: bool = param.Boolean( False, doc="If true, test gradients for anomalies (NaN or Inf) during " "training.") use_mixed_precision: bool = param.Boolean( False, doc="If true, mixed precision training is activated during " "training.") use_model_parallel: bool = param.Boolean( False, doc="If true, neural network model is partitioned across all " "available GPUs to fit in a large model. It shall not be used " "together with data parallel.") test_diff_epochs: Optional[int] = param.Integer( None, doc="Number of different epochs of the same model to test", allow_None=True) test_step_epochs: Optional[int] = param.Integer( None, doc="How many epochs to move for each test", allow_None=True) test_start_epoch: Optional[int] = param.Integer( None, doc="The first epoch on which testing should run.", allow_None=True) monitoring_interval_seconds: int = param.Integer( 0, doc="Seconds delay between logging GPU/CPU resource " "statistics. If 0 or less, do not log any resource " "statistics.") number_of_cross_validation_splits: int = param.Integer( 0, bounds=(0, None), doc="Number of cross validation splits for k-fold cross " "validation") cross_validation_split_index: int = param.Integer( DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, bounds=(-1, None), doc="The index of the cross validation fold this model is " "associated with when performing k-fold cross validation") file_system_config: DeepLearningFileSystemConfig = param.ClassSelector( default=DeepLearningFileSystemConfig(), class_=DeepLearningFileSystemConfig, instantiate=False, doc="File system related configs") pin_memory: bool = param.Boolean( True, doc="Value of pin_memory argument to DataLoader") _overrides: Dict[str, Any] = param.Dict( instantiate=True, doc="Model config properties that were overridden from the commandline" ) restrict_subjects: Optional[str] = \ param.String(doc="Use at most this number of subjects for train, val, or test set (must be > 0 or None). " "If None, do not modify the train, val, or test sets. If a string of the form 'i,j,k' where " "i, j and k are integers, modify just the corresponding sets (i for train, j for val, k for " "test). If any of i, j or j are missing or are negative, do not modify the corresponding " "set. Thus a value of 20,,5 means limit training set to 20, keep validation set as is, and " "limit test set to 5. If any of i,j,k is '+', discarded members of the other sets are added " "to that set.", allow_None=True) perform_training_set_inference: bool = \ param.Boolean(False, doc="If False (default), run full image inference on validation and test set after training. If " "True, also run full image inference on the training set") perform_validation_and_test_set_inference: bool = \ param.Boolean(True, doc="If True (default), run full image inference on validation and test set after training.") _metrics_data_frame_loggers: MetricsDataframeLoggers = param.ClassSelector( default=None, class_=MetricsDataframeLoggers, instantiate=False, doc="Data frame loggers for this model " "config") _dataset_data_frame: Optional[DataFrame] = \ param.DataFrame(default=None, doc="The dataframe that contains the dataset for the model. This is usually read from disk " "from dataset.csv") _use_gpu: Optional[bool] = param.Boolean( None, doc="If true, a CUDA capable GPU with at least 1 device is " "available. If None, the use_gpu property has not yet been called.") avoid_process_spawn_in_data_loaders: bool = \ param.Boolean(is_windows(), doc="If True, use a data loader logic that avoid spawning new processes at the " "start of each epoch. This speeds up training on both Windows and Linux, but" "on Linux, inference is currently disabled as the data loaders hang. " "If False, use the default data loader logic that starts new processes for " "each epoch.") # The default multiprocessing start_method in both PyTorch and the Python standard library is "fork" for Linux and # "spawn" (the only available method) for Windows. There is some evidence that using "forkserver" on Linux # can reduce the chance of stuck jobs. multiprocessing_start_method: MultiprocessingStartMethod = \ param.ClassSelector(class_=MultiprocessingStartMethod, default=(MultiprocessingStartMethod.spawn if is_windows() else MultiprocessingStartMethod.fork), doc="Method to be used to start child processes in pytorch. Should be one of forkserver, " "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. " "Set to forkserver as a possible remedy for stuck jobs.") output_to: Optional[str] = \ param.String(default=None, doc="If provided, the run outputs will be written to the given folder. If not provided, outputs " "will go into a subfolder of the project root folder.") max_batch_grad_cam: int = param.Integer( default=0, doc="Max number of validation batches for which " "to save gradCam images. By default " "visualizations are saved for all images " "in the validation set") label_smoothing_eps: float = param.Number( 0.0, bounds=(0.0, 1.0), doc="Target smoothing value for label smoothing") log_to_parent_run: bool = param.Boolean( default=False, doc="If true, hyperdrive child runs will log their metrics" "to their parent run.") use_imbalanced_sampler_for_training: bool = param.Boolean( default=False, doc="If True, use an imbalanced sampler during training.") drop_last_batch_in_training: bool = param.Boolean( default=False, doc="If True, drop the last incomplete batch during" "training. If all batches are complete, no batch gets " "dropped. If False, keep all batches.") log_summaries_to_files: bool = param.Boolean( default=True, doc= "If True, model summaries are logged to files in logs/model_summaries; " "if False, to stdout or driver log") mean_teacher_alpha: float = param.Number( bounds=(0, 1), allow_None=True, default=None, doc="If this value is set, the mean teacher model will be computed. " "Currently only supported for scalar models. In this case, we only " "report metrics and cross-validation results for " "the mean teacher model. Likewise the model used for inference " "is the mean teacher model. The student model is only used for " "training. Alpha is the momentum term for weight updates of the mean " "teacher model. After each training step the mean teacher model " "weights are updated using mean_teacher_" "weight = alpha * (mean_teacher_weight) " " + (1-alpha) * (current_student_weights). ") def __init__(self, **params: Any) -> None: self._model_name = type(self).__name__ # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here. self._datasets_for_training: Optional[Dict[ModelExecutionMode, Any]] = None self._datasets_for_inference: Optional[Dict[ModelExecutionMode, Any]] = None super().__init__(throw_if_unknown_param=True, **params) logging.info("Creating the default output folder structure.") self.create_filesystem(fixed_paths.repository_root_directory()) def validate(self) -> None: """ Validates the parameters stored in the present object. """ if len(self.adam_betas) < 2: raise ValueError( "The adam_betas parameter should be the coefficients used for computing running averages of " "gradient and its square") if self.azure_dataset_id is None and self.local_dataset is None: raise ValueError( "Either of local_dataset or azure_dataset_id must be set.") if self.number_of_cross_validation_splits == 1: raise ValueError( f"At least two splits required to perform cross validation found " f"number_of_cross_validation_splits={self.number_of_cross_validation_splits}" ) if 0 < self.number_of_cross_validation_splits <= self.cross_validation_split_index: raise ValueError( f"Cross validation split index is out of bounds: {self.cross_validation_split_index}, " f"which is invalid for CV with {self.number_of_cross_validation_splits} splits." ) elif self.number_of_cross_validation_splits == 0 and self.cross_validation_split_index != -1: raise ValueError( f"Cross validation split index must be -1 for a non cross validation run, " f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} " f"and cross_validation_split_index={self.cross_validation_split_index}" ) if self.l_rate_scheduler == LRSchedulerType.MultiStep: if not self.l_rate_multi_step_milestones: raise ValueError( "Must specify l_rate_multi_step_milestones to use LR scheduler MultiStep" ) if sorted(set(self.l_rate_multi_step_milestones) ) != self.l_rate_multi_step_milestones: raise ValueError( "l_rate_multi_step_milestones must be a strictly increasing list" ) if self.l_rate_multi_step_milestones[0] <= 0: raise ValueError( "l_rate_multi_step_milestones cannot be negative or 0.") @property def model_name(self) -> str: """ Gets the human readable name of the model (e.g., Liver). This is usually set from the class name. :return: A model name as a string. """ return self._model_name @property def model_category(self) -> ModelCategory: """ Gets the high-level model category that this configuration objects represents (segmentation or scalar output). """ return self._model_category @property def is_segmentation_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Segmentation. """ return self.model_category == ModelCategory.Segmentation @property def is_scalar_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Scalar i.e. for Classification or Regression models. """ return self.model_category.is_scalar @property def compute_grad_cam(self) -> bool: return self.max_batch_grad_cam > 0 @property def min_l_rate(self) -> float: return self._min_l_rate @min_l_rate.setter def min_l_rate(self, value: float) -> None: if value > self.l_rate: raise ValueError( "l_rate must be >= min_l_rate, found: {}, {}".format( self.l_rate, value)) self._min_l_rate = value @property def outputs_folder(self) -> Path: """Gets the full path in which the model outputs should be stored.""" return self.file_system_config.outputs_folder @property def logs_folder(self) -> Path: """Gets the full path in which the model logs should be stored.""" return self.file_system_config.logs_folder @property def checkpoint_folder(self) -> str: """Gets the full path in which the model checkpoints should be stored during training.""" return str(self.outputs_folder / CHECKPOINT_FOLDER) @property def visualization_folder(self) -> Path: """Gets the full path in which the visualizations notebooks should be saved during training.""" return self.outputs_folder / VISUALIZATION_FOLDER @property def perform_cross_validation(self) -> bool: """ True if cross validation will be be performed as part of the training procedure. :return: """ return self.number_of_cross_validation_splits > 1 @property def overrides(self) -> Optional[Dict[str, Any]]: return self._overrides @property def dataset_data_frame(self) -> Optional[DataFrame]: """ Gets the pandas data frame that the model uses. :return: """ return self._dataset_data_frame @dataset_data_frame.setter def dataset_data_frame(self, data_frame: Optional[DataFrame]) -> None: """ Sets the pandas data frame that the model uses. :param data_frame: The data frame to set. """ self._dataset_data_frame = data_frame @property def metrics_data_frame_loggers(self) -> MetricsDataframeLoggers: """ Gets the metrics data frame loggers for this config. :return: """ return self._metrics_data_frame_loggers def set_output_to(self, output_to: PathOrString) -> None: """ Adjusts the file system settings in the present object such that all outputs are written to the given folder. :param output_to: The absolute path to a folder that should contain the outputs. """ if isinstance(output_to, Path): output_to = str(output_to) self.output_to = output_to self.create_filesystem() def create_filesystem( self, project_root: Path = fixed_paths.repository_root_directory() ) -> None: """ Creates new file system settings (outputs folder, logs folder) based on the information stored in the present object. If any of the folders do not yet exist, they are created. :param project_root: The root folder for the codebase that triggers the training run. """ self.file_system_config = DeepLearningFileSystemConfig.create( project_root=project_root, model_name=self.model_name, is_offline_run=self.is_offline_run, output_to=self.output_to) def create_dataframe_loggers(self) -> None: """ Initializes the metrics loggers that are stored in self._metrics_data_frame_loggers :return: """ self._metrics_data_frame_loggers = MetricsDataframeLoggers( outputs_folder=self.outputs_folder) def should_load_checkpoint_for_training(self) -> bool: """Returns true if start epoch > 0, that is, if an existing checkpoint is used to continue training.""" return self.start_epoch > 0 def should_save_epoch(self, epoch: int) -> bool: """Returns True if the present epoch should be saved, as per the save_start_epoch and save_step_epochs settings. Epoch writing starts with the first epoch that is >= save_start_epoch, and that is evenly divisible by save_step_epochs. A checkpoint is always written for the last epoch (num_epochs), such that it is easy to overwrite num_epochs on the commandline without having to change the test parameters at the same time. :param epoch: The current epoch. The first epoch is assumed to be 1.""" should_save_epoch = epoch >= self.save_start_epoch \ and epoch % self.save_step_epochs == 0 is_last_epoch = epoch == self.num_epochs return should_save_epoch or is_last_epoch def get_train_epochs(self) -> List[int]: """ Returns the epochs for which training will be performed. :return: """ return list(range(self.start_epoch + 1, self.num_epochs + 1)) def get_total_number_of_training_epochs(self) -> int: """ Returns the number of epochs for which a model will be trained. :return: """ return len(self.get_train_epochs()) def get_total_number_of_save_epochs(self) -> int: """ Returns the number of epochs for which a model checkpoint will be saved. :return: """ return len( list(filter(self.should_save_epoch, self.get_train_epochs()))) def get_total_number_of_validation_epochs(self) -> int: """ Returns the number of epochs for which a model will be validated. :return: """ return self.get_total_number_of_training_epochs() def get_test_epochs(self) -> List[int]: """ Returns the list of epochs for which the model should be evaluated on full images in the test set. These are all epochs starting at self.test_start_epoch, in intervals of self.n_steps_epoch. The last training epoch is always included. If either of the self.test_* fields is missing (set to None), only the last training epoch is returned. :return: """ test_epochs = {self.num_epochs} if self.test_diff_epochs is not None and self.test_start_epoch is not None and \ self.test_step_epochs is not None: for j in range(self.test_diff_epochs): epoch = self.test_start_epoch + self.test_step_epochs * j if epoch > self.num_epochs: break test_epochs.add(epoch) return sorted(test_epochs) def get_path_to_checkpoint(self, epoch: int) -> Path: """ Returns full path to a checkpoint given an epoch :param epoch: the epoch number :param for_mean_teacher_model: if True looking returns path to the mean teacher checkpoint. Else returns the path to the (main / student) model checkpoint. :return: path to a checkpoint given an epoch """ return create_checkpoint_path( path=fixed_paths.repository_root_directory() / self.checkpoint_folder, epoch=epoch) def get_effective_random_seed(self) -> int: """ Returns the random seed set as part of this configuration. If the configuration corresponds to a cross validation split, then the cross validation fold index will be added to the set random seed in order to return the effective random seed. :return: """ seed = self.random_seed if self.perform_cross_validation: # offset the random seed based on the cross validation split index so each # fold has a different initial random state. seed += self.cross_validation_split_index return seed @property # type: ignore def use_gpu(self) -> bool: # type: ignore """ Returns True if a CUDA capable GPU is present and should be used, False otherwise. """ if self._use_gpu is None: # Use a local import here because we don't want the whole file to depend on pytorch. from InnerEye.ML.utils.ml_util import is_gpu_available self._use_gpu = is_gpu_available() return self._use_gpu @use_gpu.setter def use_gpu(self, value: bool) -> None: """ Sets the flag that controls the use of the GPU. Raises a ValueError if the value is True, but no GPU is present. """ if value: # Use a local import here because we don't want the whole file to depend on pytorch. from InnerEye.ML.utils.ml_util import is_gpu_available if not is_gpu_available(): raise ValueError( "Can't set use_gpu to True if there is not CUDA capable GPU present." ) self._use_gpu = value @property def use_data_parallel(self) -> bool: """ Data parallel is used if GPUs are usable and the number of CUDA devices are greater than 1. :return: """ _devices = self.get_cuda_devices() return _devices is not None and len(_devices) > 1 def write_args_file(self, root: Optional[Path] = None) -> None: """ Writes the current config to disk. The file is written either to the given folder, or if omitted, to the default outputs folder. """ dst = (root or self.outputs_folder) / ARGS_TXT dst.write_text(data=str(self)) def should_wait_for_other_cross_val_child_runs(self) -> bool: """ Returns True if the current run is an online run and is the 0th cross validation split. In this case, this will be the run that will wait for all other child runs to finish in order to aggregate their results. :return: """ return ( not self.is_offline_run) and self.cross_validation_split_index == 0 @property def is_offline_run(self) -> bool: """ Returns True if the run is executing outside AzureML, or False if inside AzureML. """ return is_offline_run_context(RUN_CONTEXT) @property def compute_mean_teacher_model(self) -> bool: """ Returns True if the mean teacher model should be computed. """ return self.mean_teacher_alpha is not None def __str__(self) -> str: """Returns a string describing the present object, as a list of key == value pairs.""" arguments_str = "\nArguments:\n" property_dict = vars(self) keys = sorted(property_dict) for key in keys: arguments_str += "\t{:18}: {}\n".format(key, property_dict[key]) return arguments_str
class Test(param.Parameterized): df = param.DataFrame(default=empty)
class Test(param.Parameterized): test = param.DataFrame(default=valid_df, columns=['b', 'a', 'c'])
class PanelDeck(param.Parameterized): """ PanelDeck class for panel.pane.DeckGL + multi_select(Boolean) parameter """ x = param.String("x") data = param.DataFrame() colors = param.DataFrame() indices = set() multi_select = param.Boolean(False, doc="multi-select") callback = param.Callable() spec = param.Dict() default_color = param.List([211, 211, 211, 50]) sizing_mode = param.String("stretch_both") height = param.Integer(400) width = param.Integer(400) tooltip_include_cols = param.List( [], doc="list of columns to include in tooltip") def get_tooltip_html(self): """ get tooltip info from dataframe columns, if not already present """ html_str = "" tooltip_columns = (list( set(self.data.columns) - set(["index", "coordinates"] + list(self.colors.columns))) if len( self.tooltip_include_cols) == 0 else self.tooltip_include_cols) for i in tooltip_columns: html_str += f"<b> {i} </b>: {{{i}}} <br>" return html_str def __init__(self, **params): """ initialize pydeck object, and set a listener on self.data """ super(PanelDeck, self).__init__(**params) self._view_state = pdk.ViewState(**self.spec["initialViewState"], bearing=0.45) self._layers = pdk.Layer("PolygonLayer", data=self.data, **self.spec["layers"][0]) self._tooltip = {"html": self.get_tooltip_html()} self._deck = pdk.Deck( mapbox_key=self.spec["mapboxApiAccessToken"], views=[ pdk.View( type="MapView", controller=True, height="100%", width="100%", ) ], layers=[self._layers], initial_view_state=self._view_state, tooltip=self._tooltip, ) if self.spec["map_style"]: self._deck.map_style = self.spec["map_style"] self.pane = pn.pane.DeckGL( self._deck, sizing_mode=self.sizing_mode, height=self.height, css_classes=["deck-chart"], ) self.param.watch(self._update, ["data"]) def selected_points(self): """ returns a list of currently selected column_x values as a list """ return self.data[self.x].loc[self.indices].tolist() @pn.depends("pane.click_state") def click_event(self): """ callback for click events, highlights the selected indices (single_select/multi_select) and sets the color of unselected indices to default_color """ index = self.pane.click_state.get("index", -1) old_indices = list(self.indices) if index == -1: index = slice(0, 0) self.indices = set() self.data[self.colors.columns] = self.colors else: if self.multi_select: if index not in self.indices: self.indices.add(index) else: self.indices.remove(index) else: if index in self.indices: self.indices.clear() else: self.indices.clear() self.indices.add(index) temp_colors = self.colors.copy() if len(self.indices) > 0: temp_colors.loc[set(self.data.index) - self.indices, self.colors.columns] = self.default_color self.data[self.colors.columns] = temp_colors self._layers.data = self.data self.pane.param.trigger("object") self.callback( self.data[self.x].loc[old_indices].tolist(), self.data[self.x].loc[list(self.indices)].tolist(), ) def _update(self, event): """ trigger deck_gl pane when layer data is updated """ if event.name == "data": self._layers.data = self.data self.pane.param.trigger("object") def view(self): """ view object """ x = pn.Column( self.param.multi_select, sizing_mode=self.sizing_mode, css_classes=["multi-select"], ) return pn.Column( x, self.click_event, self.pane, width=self.width, height=self.height, sizing_mode=self.sizing_mode, )