示例#1
0
class topographic_grid(xy_grid):
    """
    By default, plot the XPreference and YPreference preferences for all
    Sheets for which they are defined, using MatPlotLib.

    If sheet_views other than XPreference and YPreference are desired,
    the names of these can be passed in as arguments.
    """

    xsheet_view_name = param.String(default='XPreference',doc="""
        Name of the SheetView holding the X position locations.""")

    ysheet_view_name = param.String(default='YPreference',doc="""
        Name of the SheetView holding the Y position locations.""")

    # Disable and hide parameters inherited from the base class
    x = param.Array(constant=True, precedence=-1)
    y = param.Array(constant=True, precedence=-1)

    def __call__(self, **params):
        p = ParamOverrides(self, params)

        for sheet in topo.sim.objects(Sheet).values():
            if ((p.xsheet_view_name in sheet.views.Maps) and
                    (p.ysheet_view_name in sheet.views.Maps)):
                x = sheet.views.Maps[p.xsheet_view_name].last.data
                y = sheet.views.Maps[p.ysheet_view_name].last.data

                filename_suffix = "_" + sheet.name
                title = 'Topographic mapping to ' + sheet.name + ' at time ' \
                        + topo.sim.timestr()
                super(topographic_grid, self).__call__(x=x, y=y, title=title,
                                                       filename_suffix=filename_suffix)
示例#2
0
class xy_grid(PylabPlotCommand):
    """
    By default, plot the x and y coordinate preferences as a grid.
    """

    axis = param.Parameter(default=[-0.5, 0.5, -0.5, 0.5],
                           doc="""
        Four-element list of the plot bounds, i.e. [xmin, xmax, ymin, ymax]."""
                           )

    skip = param.Integer(default=1,
                         bounds=[1, None],
                         softbounds=[1, 10],
                         doc="""
        Plot every skipth line in each direction.
        E.g. skip=4 means to keep only every fourth horizontal line
        and every fourth vertical line, except that the first and last
        are always included. The default is to include all data points.""")

    x = param.Array(doc="Numpy array of x positions in the grid.")

    y = param.Array(doc="Numpy array of y positions in the grid.")

    def __call__(self, **params):

        p = ParamOverrides(self, params)
        fig = plt.figure(figsize=(5, 5))

        # This one-liner works in Octave, but in matplotlib it
        # results in lines that are all connected across rows and columns,
        # so here we plot each line separately:
        #   plt.plot(x,y,"k-",transpose(x),transpose(y),"k-")
        # Here, the "k-" means plot in black using solid lines;
        # see matplotlib for more info.
        isint = plt.isinteractive()  # Temporarily make non-interactive for
        # plotting
        plt.ioff()
        for r, c in zip(p.y[::p.skip], p.x[::p.skip]):
            plt.plot(c, r, "k-")
        for r, c in zip(
                np.transpose(p.y)[::p.skip],
                np.transpose(p.x)[::p.skip]):
            plt.plot(c, r, "k-")

        # Force last line avoid leaving cells open
        if p.skip != 1:
            plt.plot(p.x[-1], p.y[-1], "k-")
            plt.plot(np.transpose(p.x)[-1], np.transpose(p.y)[-1], "k-")

        plt.xlabel('x')
        plt.ylabel('y')
        # Currently sets the input range arbitrarily; should presumably figure out
        # what the actual possible range is for this simulation (which would presumably
        # be the maximum size of any GeneratorSheet?).
        plt.axis(p.axis)

        if isint: plt.ion()
        self._generate_figure(p)
        return fig
示例#3
0
def params_from_kwargs(**kwargs):
    """
    Utility to promote keywords with literal values to the appropriate
    parameter type with the specified default value unless the value is
    already a parameter.
    """
    params = {}
    for k, v in kwargs.items():
        kws = dict(default=v)
        if isinstance(v, param.Parameter):
            params[k] = v
        elif isinstance(v, bool):
            params[k] = param.Boolean(**kws)
        elif isinstance(v, int):
            params[k] = param.Integer(**kws)
        elif isinstance(v, float):
            params[k] = param.Number(**kws)
        elif isinstance(v, str):
            params[k] = param.String(**kws)
        elif isinstance(v, dict):
            params[k] = param.Dict(**kws)
        elif isinstance(v, tuple):
            params[k] = param.Tuple(**kws)
        elif isinstance(v, list):
            params[k] = param.List(**kws)
        elif isinstance(v, np.ndarray):
            params[k] = param.Array(**kws)
        else:
            params[k] = param.Parameter(**kws)
    return params
示例#4
0
文件: audio.py 项目: ivanov/imagen
class AudioFile(TimeSeries):
    """
    Requires an audio file in any format accepted by audiolab (wav, aiff, flac).
    """

    time_series = param.Array(precedence=(-1))
    sample_rate = param.Number(precedence=(-1))

    filename = param.Filename(default='sounds/complex/daisy.wav',
                              doc="""
        File path (can be relative to Param's base path) to an audio file. 
        The audio can be in any format accepted by audiolab, e.g. WAV, AIFF, or FLAC."""
                              )

    precision = param.Parameter(default=float64,
                                doc="""
        The float precision to use for loaded audio files.""")

    def __init__(self, **params):
        super(AudioFile, self).__init__(**params)
        self._load_audio_file()

    def _load_audio_file(self):
        source = audiolab.Sndfile(self.filename, 'r')

        # audiolab scales the range by the bit depth automatically so the dynamic range is now [-1.0, 1.0]
        # we rescale it to the range [0.0, 1.0]
        self.time_series = (
            source.read_frames(source.nframes, dtype=self.precision) + 1) / 2
        self.sample_rate = source.samplerate
示例#5
0
class HvDataset(param.Parameterized):
    '''Converts a numpy image to holoviews Dataset dynamic map'''

    img = param.Array(np.zeros((2, 2), dtype=np.uint8),
                      doc='numpy iamge array',
                      precedence=-1)
    label = param.String('channel',
                         doc='label for the generated hv.Dataset',
                         precedence=-1)
    spacing = param.Parameter((1, ), doc='pixel/voxel size', precedence=-1)

    _update_counter = param.Integer(0, precedence=-1)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._broadcast_spacing()

    @param.depends()
    def _broadcast_spacing(self):
        self.spacing = tuple(
            np.broadcast_to(np.array(self.spacing), self.img.ndim).tolist())

    @param.depends('img', watch=True)
    def _update_img(self):
        self._broadcast_spacing()
        self._update_counter += 1

    # NOTE dynamic map with dependency directly on array is less responsive (hash computation overhead?)
    @param.depends('_update_counter', 'label')
    def _build_dataset(self):
        return image_to_hvds(self.img, self.label, self.spacing)

    @param.depends('spacing')
    def dmap(self):
        return hv.DynamicMap(self._build_dataset, cache_size=1)
示例#6
0
class TestSet(param.Parameterized):

    numpy_params = ['r']
    pandas_params = ['s','t','u']
    conditionally_unsafe = ['f', 'o']

    a = param.Integer(default=5, doc='Example doc', bounds=(2,30), inclusive_bounds=(True, False))
    b = param.Number(default=4.3, allow_None=True)
    c = param.String(default='foo')
    d = param.Boolean(default=False)
    e = param.List([1,2,3], class_=int)
    f = param.List([1,2,3])
    g = param.Date(default=datetime.datetime.now())
    h = param.Tuple(default=(1,2,3), length=3)
    i = param.NumericTuple(default=(1,2,3,4))
    j = param.XYCoordinates(default=(32.1, 51.5))
    k = param.Integer(default=1)
    l = param.Range(default=(1.1,2.3), bounds=(1,3))
    m = param.String(default='baz', allow_None=True)
    n = param.ObjectSelector(default=3, objects=[3,'foo'], allow_None=False)
    o = param.ObjectSelector(default=simple_list, objects=[simple_list], allow_None=False)
    p = param.ListSelector(default=[1,4,5], objects=[1,2,3,4,5,6])
    q = param.CalendarDate(default=datetime.date.today())
    r = None if np is None else param.Array(default=ndarray)
    s = None if pd is None else param.DataFrame(default=df1, columns=2)
    t = None if pd is None else param.DataFrame(default=pd.DataFrame(
        {'A':[1,2,3], 'B':[1.1,2.2,3.3]}), columns=(1,4), rows=(2,5))
    u = None if pd is None else param.DataFrame(default=df2, columns=['A', 'B'])
    v = param.Dict({'1':2})
示例#7
0
class Lasso(LinkedStream):
    """
    A stream representing a lasso selection in 2D space as a two-column
    array of coordinates.
    """

    geometry = param.Array(constant=True,
                           doc="""
        The coordinates of the lasso geometry as a two-column array.""")
 class _BigDumbParams(param.Parameterized):
     action = param.Action(default_action, allow_None=True)
     array = param.Array(np.array([1.0, 2.0]))
     boolean = param.Boolean(True, allow_None=True)
     callable = param.Callable(default_action, allow_None=True)
     class_selector = param.ClassSelector(int, is_instance=False, allow_None=True)
     color = param.Color("#FFFFFF", allow_None=True)
     composite = param.Composite(["action", "array"], allow_None=True)
     try:
         data_frame = param.DataFrame(
             pd.DataFrame({"A": 1.0, "B": np.arange(5)}), allow_None=True
         )
     except TypeError:
         data_frame = param.DataFrame(pd.DataFrame({"A": 1.0, "B": np.arange(5)}))
     date = param.Date(datetime.now(), allow_None=True)
     date_range = param.DateRange((datetime.min, datetime.max), allow_None=True)
     dict_ = param.Dict({"foo": "bar"}, allow_None=True, doc="dict means dictionary")
     dynamic = param.Dynamic(default=default_action, allow_None=True)
     file_selector = param.FileSelector(
         os.path.join(FILE_DIR_DIR, "LICENSE"),
         path=os.path.join(FILE_DIR_DIR, "*"),
         allow_None=True,
     )
     filename = param.Filename(
         os.path.join(FILE_DIR_DIR, "LICENSE"), allow_None=True
     )
     foldername = param.Foldername(os.path.join(FILE_DIR_DIR), allow_None=True)
     hook_list = param.HookList(
         [CallableObject(), CallableObject()], class_=CallableObject, allow_None=True
     )
     integer = param.Integer(10, allow_None=True)
     list_ = param.List([1, 2, 3], allow_None=True, class_=int)
     list_selector = param.ListSelector([2, 2], objects=[1, 2, 3], allow_None=True)
     magnitude = param.Magnitude(0.5, allow_None=True)
     multi_file_selector = param.MultiFileSelector(
         [],
         path=os.path.join(FILE_DIR_DIR, "*"),
         allow_None=True,
         check_on_set=True,
     )
     number = param.Number(-10.0, allow_None=True, doc="here is a number")
     numeric_tuple = param.NumericTuple((5.0, 10.0), allow_None=True)
     object_selector = param.ObjectSelector(
         False, objects={"False": False, "True": 1}, allow_None=True
     )
     path = param.Path(os.path.join(FILE_DIR_DIR, "LICENSE"), allow_None=True)
     range_ = param.Range((-1.0, 2.0), allow_None=True)
     series = param.Series(pd.Series(range(5)), allow_None=True)
     string = param.String("foo", allow_None=True, doc="this is a string")
     tuple_ = param.Tuple((3, 4, "fi"), allow_None=True)
     x_y_coordinates = param.XYCoordinates((1.0, 2.0), allow_None=True)
示例#9
0
    def define(cls, name, **kwargs):
        """
        Utility to quickly and easily declare Stream classes. Designed
        for interactive use such as notebooks and shouldn't replace
        parameterized class definitions in source code that is imported.

        Takes a stream class name and a set of keywords where each
        keyword becomes a parameter. If the value is already a
        parameter, it is simply used otherwise the appropriate parameter
        type is inferred and declared, using the value as the default.

        Supported types: bool, int, float, str, dict, tuple and list
        """
        params = {'name': param.String(default=name)}
        for k, v in kwargs.items():
            kws = dict(default=v, constant=True)
            if isinstance(v, param.Parameter):
                params[k] = v
            elif isinstance(v, bool):
                params[k] = param.Boolean(**kws)
            elif isinstance(v, int):
                params[k] = param.Integer(**kws)
            elif isinstance(v, float):
                params[k] = param.Number(**kws)
            elif isinstance(v, str):
                params[k] = param.String(**kws)
            elif isinstance(v, dict):
                params[k] = param.Dict(**kws)
            elif isinstance(v, tuple):
                params[k] = param.Tuple(**kws)
            elif isinstance(v, list):
                params[k] = param.List(**kws)
            elif isinstance(v, np.ndarray):
                params[k] = param.Array(**kws)
            else:
                params[k] = param.Parameter(**kws)

        # Dynamic class creation using type
        return type(name, (Stream,), params)
示例#10
0
class TestSet(param.Parameterized):
    array = None if np is None else param.Array(default=ndarray)
    data_frame = None if pd is None else param.DataFrame(default=df)
示例#11
0
 class Z(param.Parameterized):
     z = param.Array(numpy.array([1]))
示例#12
0
class OrthoSegmentationDashBoard(BaseImageDashBoard):
    '''Dashboard to views 3D, multi-channel images as color composite.'''

    channel_config = param.Dict({}, doc='dictionnary configuring each channel')
    composite_channels = param.List(
        doc='ids of channels to be displayed as color composite')
    overlay_channels = param.List(
        doc='ids of channels to be displayed as overlay on top of the composite'
    )
    segmentation_viewer = param.Parameter(SegmentationViewer())
    hv_datasets = param.List()
    ortho_viewer = param.Parameter(OrthoViewer(add_crosshairs=False))
    spacing = param.Parameter((1, ), doc='pixel/voxel size', precedence=-1)
    init_position = param.Array(np.array([-1, -1, -1]))

    last_clicked_position = param.Array(np.array([]))

    _widget_update_counter = param.Integer(0)

    @param.depends('ortho_viewer.z_viewer.slice_id',
                   'ortho_viewer.y_viewer.slice_id',
                   'ortho_viewer.x_viewer.slice_id',
                   watch=True)
    def watch_position(self):
        z = self.ortho_viewer.z_viewer.slice_id
        y = self.ortho_viewer.y_viewer.slice_id
        x = self.ortho_viewer.x_viewer.slice_id

        pos = np.array(np.array((z, y, x)) / self.spacing)
        self.last_clicked_position = np.round(pos).astype(int)

    @param.depends('_dynamic_update_counter', watch=True)
    def _dynamic_img_update(self):
        for hv_ds, img in zip(self.hv_datasets, self.loaded_objects.values()):
            hv_ds.img = img

    def dmap(self):

        if not self.segmentation_viewer.channel_viewers or self._has_multiselect_changed:
            selected_channel_config = {
                key: self.channel_config[key]
                for key in self.loaded_objects.keys()
            }
            self.segmentation_viewer = SegmentationViewer.from_channel_config(
                selected_channel_config,
                composite_channels=self.composite_channels,
                overlay_channels=self.overlay_channels)
            self._widget_update_counter += 1

        self.hv_datasets = [
            HvDataset(img=img,
                      label=self.index_to_str(key),
                      spacing=self.spacing)
            for key, img in self.loaded_objects.items()
        ]
        dmaps = [hv_ds.dmap() for hv_ds in self.hv_datasets]

        dmaps = [self.ortho_viewer(dmap) for dmap in dmaps]

        # invert slices and channels
        dmaps = list(zip(*dmaps))

        # add crosshair overlay, bug if adding to an existing overlay
        cross = self.ortho_viewer.get_crosshair()
        dmaps = [dmap + cr for dmap, cr in zip(dmaps, cross)]

        dmaps = [self.segmentation_viewer(dmap) for dmap in dmaps]

        return dmaps

    @param.depends('_widget_update_counter')
    def widgets(self):

        return pn.Column(self.io_widgets, self.segmentation_viewer.widgets)

    @param.depends('_complete_update_counter')
    def _rebuild_panel(self):
        self.ortho_viewer = OrthoViewer(add_crosshairs=False,
                                        target_position=self.init_position)

        panel = self.ortho_viewer.panel(self.dmap())

        # add the composite viewer above the orthoview widget (navigation checkbox)
        panel[1][1] = pn.Column(self.widgets(), panel[1][1])

        return panel

    def panel(self):
        return pn.Row(self._rebuild_panel)
示例#13
0
class trainer(param.Parameterized):

    display_df = param.DataFrame(default=pd.DataFrame())

    results = param.Boolean(default=False)

    X = param.Array(default=None)

    result_string = param.String(default='')

    result_string = param.String('')

    def __init__(self, **params):
        super().__init__(**params)
        self.name_of_page = 'Test and Train'

        self.test_slider = pn.widgets.IntSlider(name='Test Percentage',
                                                start=0,
                                                end=100,
                                                step=10,
                                                value=20)

        self.tt_button = pn.widgets.Button(name='Train and Test',
                                           button_type='primary')
        self.tt_button.on_click(self.train_test)

        self.tt_model = pn.widgets.Select(
            name='Select', options=['Random Forrest Classifier'])

    def train_test(self, event):

        #get values from sentiment.
        self.display_df = convert_sentiment_values(self.display_df)

        y = self.display_df['label']

        #get train test sets
        X_train, X_test, y_train, y_test = train_test_split(
            self.X, y, test_size=self.test_slider.value / 100, random_state=0)

        if self.tt_model.value == 'Random Forrest Classifier':
            sentiment_classifier = RandomForestClassifier(n_estimators=1000,
                                                          random_state=0)

            sentiment_classifier.fit(X_train, y_train)

            y_pred = sentiment_classifier.predict(X_test)

        self.y_test = y_test
        self.y_pred = y_pred
        self.analyze()

    def analyze(self):
        self.cm = confusion_matrix(self.y_test, self.y_pred)
        self.cr = classification_report(self.y_test, self.y_pred)
        self.acc_score = accuracy_score(self.y_test, self.y_pred)

        splits = self.cr.split('\n')
        cml = self.cm.tolist()
        self.result_string = f"""
            ### Classification Report
            <pre>
            {splits[0]}
            {splits[1]}
            {splits[2]}
            {splits[3]}
            {splits[4]}
            {splits[5]}
            {splits[6]}
            {splits[7]}
            {splits[8]}
            </pre>
            ### Confusion Matrix
            <pre>
            {cml[0]}
            {cml[1]}

            </pre>

            ### Accuracy Score
            <pre>
            {round(self.acc_score, 4)}
            </pre
            """

        self.results = True

    def options_page(self, help_text):

        return pn.WidgetBox(help_text,
                            self.tt_model,
                            self.test_slider,
                            self.tt_button,
                            height=375,
                            width=300)

    @pn.depends('results')
    def df_pane(self):

        if self.results == False:
            self.result_pane = self.display_df

        else:
            self.result_pane = pn.pane.Markdown(f"""
                {self.result_string}
                """,
                                                width=500,
                                                height=350)

        return pn.WidgetBox(self.result_pane, height=375, width=450)

    def panel(self):

        help_text = (
            "Your text will now be trained and tested using a selected model.  You may "
            +
            "choose a percentage of your data to reserve for testing, the rest will be used for "
            +
            "training.  For example, if I reserve 20%, the rest of the 80% will be used for training "
            +
            "and the 20% will be used to determine how well the trained model does assigning a "
            +
            "sentiment label to the testing text.  Currently, the only model available is the sklearn "
            + "Random Forrest Classifier model.")

        return pn.Row(
            pn.Column(
                pn.pane.Markdown(f'##Train and Test'),
                self.options_page(help_text),
            ), pn.Column(
                pn.Spacer(height=52),
                self.df_pane,
            ))
class WordEmbedder(base_page):
    
    spark_df = param.ClassSelector(
        class_= sdf
    )
    
    display_df = param.DataFrame(default = pd.DataFrame())
    
    df = param.DataFrame()
    X = param.Array(default = None)
    
    def __init__(self, **params):
        super().__init__(**params)
#         self.spark_df = spark_df
        
        self.param.name_of_page.default = 'Word Embedding'
        self.we_model = pn.widgets.Select(name='Select', options=['SKLearn Count Vectorizer', 'Glove', 'Bert'])

        self.we_button = pn.widgets.Button(name='Transform', button_type='primary')
        self.we_button.on_click(self.transform)
        
    def options_page(self):
        
        return pn.WidgetBox(self.we_model,
                            self.we_button,
                height = 300,
                width = 300
        
        )
    
    def transform(self, event):
        print('embedding')
        
        if self.we_model.value == 'Glove':
            print('glove')
            from sparknlp.annotator import WordEmbeddingsModel
            word_embeddings=WordEmbeddingsModel.pretrained()
            word_embeddings.setInputCols(['document','stem'])
            word_embeddings.setOutputCol('embeddings')

            self.spark_df = word_embeddings.transform(self.spark_df)
            
            embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings')
            
        if self.we_model.value == 'SKLearn Count Vectorizer':
            from sklearn.feature_extraction.text import CountVectorizer
            print('join lines')
            corpus = join_lines(self.display_df)
            print('doing vectorizer')
            vectorizer = CountVectorizer(max_features=1500)
            print('vectorizing 2')
            X = vectorizer.fit_transform(corpus).toarray()

            cnt = self.spark_df.count()
            print('getting sentiment from spark df')
            labels = self.spark_df.select('sentiment').take(cnt)

            for n in range(cnt):
                labels[n] = labels[n][0]
            print('done getting sentiment, creating dataframe')
            xlist = []
            for n in range(len(X)):
                xlist.append(list(X[n]))
            self.X = X
            embeddings_df = pd.DataFrame({'embeddings': xlist, 'sentiment': labels})
        
        else: 
            print('bert')
            from sparknlp.annotator import BertEmbeddings
            bertEmbeddings = BertEmbeddings.pretrained()
            
            bertEmbeddings.setInputCols(['document','stem'])
            bertEmbeddings.setOutputCol('embeddings')

            embeddings_df=bertEmbeddings.transform(self.spark_df)
        
            self.spark_df = embeddings_df
            
            embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings')
        

        self.display_df = embeddings_df
        self.continue_button.disabled = False
    
    
    @param.output('X', 'display_df')
    def output(self):
        return self.X, self.display_df
        
        
        
示例#15
0
class OrthoViewer(BaseViewer):
    '''Slices a 3D dataset along x,y and z axes and synchronizes the views.'''

    navigaton_on = param.Boolean(True)
    z_viewer = param.Parameter(SliceViewer(axis='z'))
    x_viewer = param.Parameter(SliceViewer(axis='x'))
    y_viewer = param.Parameter(SliceViewer(axis='y'))

    xy_tap = param.Parameter(hv.streams.SingleTap(transient=True),
                             instantiate=True)
    zy_tap = param.Parameter(hv.streams.SingleTap(transient=True),
                             instantiate=True)
    xz_tap = param.Parameter(hv.streams.SingleTap(transient=True),
                             instantiate=True)

    target_position = param.Array(np.array([-1, -1, -1]))
    _updating_position = param.Boolean(False)

    add_crosshairs = param.Boolean(True)

    @param.depends()
    def _invert_axes(self, elem):
        # NOTE should use opts(invert_axes) instead but for some reason
        # it fails after zooming or panning
        return elem.reindex(elem.kdims[::-1])

    def get_crosshair(self):
        self.xy_v = hv.VLine(self.x_viewer._widget.value,
                             kdims=['x', 'y'],
                             label='xyV',
                             group='orthoview')
        self.xy_h = hv.HLine(self.y_viewer._widget.value,
                             kdims=['x', 'y'],
                             label='xyH',
                             group='orthoview')

        self.zy_v = hv.VLine(self.z_viewer._widget.value,
                             kdims=['za', 'y'],
                             label='zyV',
                             group='orthoview')
        self.zy_h = hv.HLine(self.y_viewer._widget.value,
                             kdims=['za', 'y'],
                             label='zyH',
                             group='orthoview')

        self.xz_v = hv.VLine(self.x_viewer._widget.value,
                             kdims=['x', 'zb'],
                             label='xzV',
                             group='orthoview')
        self.xz_h = hv.HLine(self.z_viewer._widget.value,
                             kdims=['x', 'zb'],
                             label='xzH',
                             group='orthoview')

        return [(self.xy_v, self.xy_h), (self.zy_v, self.zy_h),
                (self.xz_v, self.xz_h)]

    def _link_crosshairs(self):
        self.xy_v.data = self.x_viewer.slice_id
        self.xy_h.data = self.y_viewer.slice_id
        self.zy_v.data = self.z_viewer.slice_id
        self.zy_h.data = self.y_viewer.slice_id
        self.xz_v.data = self.x_viewer.slice_id
        self.xz_h.data = self.z_viewer.slice_id

        self._jslink_discrete_slider(self.x_viewer._widget, self.xy_v)
        self._jslink_discrete_slider(self.y_viewer._widget, self.xy_h)
        self._jslink_discrete_slider(self.z_viewer._widget, self.zy_v)
        self._jslink_discrete_slider(self.y_viewer._widget, self.zy_h)
        self._jslink_discrete_slider(self.x_viewer._widget, self.xz_v)
        self._jslink_discrete_slider(self.z_viewer._widget, self.xz_h)

    def _jslink_discrete_slider(self, widget, line):
        '''hack to jslink pn.widgets.DiscreteSlider to vertical/horizontal lines.
        links the underlying IntSlider and index list of available values'''

        code = '''
                    var vals = {};  
                    glyph.location = vals[source.value]
                '''.format(str(widget.values))

        return widget._slider.jslink(line, code={'value': code})

    @param.depends()
    def _update_dynamic_values(self, xy, zy, xz):
        '''render dummy plots to force updating the sliders, getting plot size, etc.'''
        self.frame_y_size = hv.render(xy).frame_height
        hv.render(zy)  # init slicer
        self.frame_z_size = hv.render(xz).frame_height

    def _call(self, dmap):
        dmap_xy = self.z_viewer(dmap)
        dmap_zy = self.x_viewer(dmap).redim(z='za').apply(self._invert_axes)
        dmap_xz = self.y_viewer(dmap).redim(z='zb')

        self._init_tap_navigator(dmap_xy, dmap_zy, dmap_xz)

        self.z_viewer.slice_init = self.target_position[0]
        self.y_viewer.slice_init = self.target_position[1]
        self.x_viewer.slice_init = self.target_position[2]

        return (dmap_xy, dmap_zy, dmap_xz)

    @param.depends('target_position', watch=True)
    def _update_target_position(self):
        if not self._updating_position:
            self._updating_position = True

            self.z_viewer.moveto(self.target_position[0])
            self.y_viewer.moveto(self.target_position[1])
            self.x_viewer.moveto(self.target_position[2])

            self._updating_position = False

    @param.depends('xy_tap.x', 'xy_tap.y', watch=True)
    def _update_xy_sliders(self):
        if self.navigaton_on and self.xy_tap.x is not None and self.xy_tap.y is not None:
            self.target_position = np.array(
                [self.z_viewer.slice_id, self.xy_tap.y, self.xy_tap.x])

    @param.depends('zy_tap.x', 'zy_tap.y', watch=True)
    def _update_zy_sliders(self):
        if self.navigaton_on and self.zy_tap.x is not None and self.zy_tap.y is not None:
            self.target_position = np.array(
                [self.zy_tap.x, self.zy_tap.y, self.x_viewer.slice_id])

    @param.depends('xz_tap.x', 'xz_tap.y', watch=True)
    def _update_xz_sliders(self):
        if self.navigaton_on and self.xz_tap.x is not None and self.xz_tap.y is not None:
            self.target_position = np.array(
                [self.xz_tap.y, self.y_viewer.slice_id, self.xz_tap.x])

    def _init_tap_navigator(self, xy, zy, xz):
        self.xy_tap.source = xy
        self.zy_tap.source = zy
        self.xz_tap.source = xz

    def panel(self, dmaps):
        xy, zy, xz = dmaps

        self._update_dynamic_values(xy, zy, xz)

        zy.opts(
            opts.Image(frame_width=self.frame_z_size,
                       frame_height=self.frame_y_size),
            opts.RGB(frame_width=self.frame_z_size,
                     frame_height=self.frame_y_size),
        )

        if self.add_crosshairs:
            self.get_crosshair()
            panel_xy = self.z_viewer.panel(
                (xy * self.xy_h * self.xy_v).relabel(group='orthoview'))
            panel_zy = self.x_viewer.panel(
                (zy * self.zy_h * self.zy_v).relabel(group='orthoview'))
            panel_xz = self.y_viewer.panel(
                (xz * self.xz_h * self.xz_v).relabel(group='orthoview'))
        else:
            panel_xy = self.z_viewer.panel(xy.relabel(group='orthoview'))
            panel_zy = self.x_viewer.panel(zy.relabel(group='orthoview'))
            panel_xz = self.y_viewer.panel(xz.relabel(group='orthoview'))

        self._link_crosshairs()

        return pn.Column(pn.Row(panel_xy, panel_zy),
                         pn.Row(panel_xz, self.param.navigaton_on))
示例#16
0
class PreProcessor(param.Parameterized):
    
    # df will be the variable holding the dataframe of text
    df = param.DataFrame()
    # title to display for each tab
    name_of_page = param.String(default = 'Name of page')
    # dataframe to display.
    display_df = param.DataFrame(default = pd.DataFrame())
    # stopword_df is the dataframe containing the stopewords
    stopword_df = param.DataFrame(default = pd.DataFrame())
    
    stopwords = param.List(default = [])
    X = param.Array(default = None)
    
    ready = param.Boolean(
        default=False,
        doc='trigger for moving to the next page',
        )   
    
    def __init__(self, **params):
        super().__init__(**params)
        
        
        
        # button for the pre-processing page
        self.continue_button = pn.widgets.Button(name='Continue',
                                                 width = 100,
                                                 button_type='primary')

        self.continue_button.on_click(self.continue_ready)
        
        # load text widgets 
        self.header_checkbox = pn.widgets.Checkbox(name='Header included in file')
        self.load_file = pn.widgets.FileInput()
        self.load_file.link(self.df, callbacks={'value': self.load_df})
        self.header_checkbox = pn.widgets.Checkbox(name='Header included in file')
        
        # tokenize widgets
        self.search_pattern_input = pn.widgets.TextInput(name='Search Pattern', value = '\w+', width = 100)
        
        # remove stop words widgets
        self.load_words_button = pn.widgets.FileInput()
        self.load_words_button.link(self.stopwords, callbacks={'value': self.load_stopwords})
        
        # stem widgets
        self.stem_choice = pn.widgets.Select(name='Select', options=['Porter', 'Snowball'])
        
        # embedding widgets
        
        self.we_model = pn.widgets.Select(name='Select', options=['SKLearn Count Vectorizer'])

        
    @param.output('X', 'display_df')
    def output(self):
        return self.X, self.display_df
    
    
    @param.depends('display_df')
    def df_pane(self):
        return pn.WidgetBox(self.display_df,
                           height = 300,
                           width = 400)
    
    # load text page functions
    #-----------------------------------------------------------------------------------------------------
    def load_df(self, df, event):
        info = io.BytesIO(self.load_file.value)
        if self.header_checkbox.value==True:
            self.df = pd.read_csv(info)
        else:
            self.df = pd.read_csv(info, sep='\n', header = None, names=['text'])
        
        self.display_df = self.df
    
    def load_text_page(self):
        helper_text = (
            "This simple Sentiment Analysis NLP app will allow you to select a few different options " +
            "for some preprocessing steps to prepare your text for testing and training. " +
            "It will then allow you to choose a model to train, the percentage of data to " +
            "preserve for test, while the rest will be used to train the model.  Finally, " +
            "some initial metrics will be displayed to determine how well the model did to predict " +
            "the testing results." +
            " " +
            "Please choose a csv file that contains lines of text to analyze.  This text should " +
            "have a text column as well as a sentiment column.  If there is a header included in the file, " +
            "make sure to check the header checkbox."
        )
        return pn.Row(
                pn.Column(
                    pn.pane.Markdown(f'##Load Text:'),
                    pn.Column(
                        helper_text,
                         self.header_checkbox,
                         self.load_file
                        ),
                ),
                pn.Column(
                    pn.Spacer(height=52),
                    self.df_pane,
                    
                )
        
        )

    #-----------------------------------------------------------------------------------------------------
    
    # tokenize page options
    #-----------------------------------------------------------------------------------------------------
    def tokenize_option_page(self):
        
        help_text = ("Tokenization will break your text into a list of single articles " +
            "(ex. ['A', 'cat', 'walked', 'into', 'the', 'house', '.']).  Specify a regular " +
            "expression (regex) search pattern to use for splitting the text.")
        
        return pn.Column(
                    pn.pane.Markdown(f'##Tokenize options:'),
                    pn.WidgetBox(help_text, self.search_pattern_input,
                                    height = 300,
                                    width = 300
        
                                )
                )
    
    #-----------------------------------------------------------------------------------------------------
    
    
    # remove stopwords page 
    #-----------------------------------------------------------------------------------------------------
    
    def remove_stopwords_page(self):
        
        help_text = (
            "Stop words are words that do not add any value to the sentiment of the text. " +
            "Removing them may improve your sentiment results.  You may load a list of stop words " +
            "to exclude from your text."
        )
        return pn.Row(
                pn.Column(
                    pn.pane.Markdown(f'##Load Stopwords:'),
                    pn.WidgetBox(help_text, self.load_words_button,
                                    height = 300,
                                    width = 300
        
                    )
                ),
                pn.Column(
                    pn.Spacer(height=52),
                    pn.WidgetBox(self.stopword_df,
                           height = 300,
                           width = 400)
                    
                )
        )
    
    def load_stopwords(self, stopwords, event):
        info = io.BytesIO(self.load_words_button.value)
        self.stopwords = pd.read_pickle(info)
        self.stopword_df = pd.DataFrame({'stop words': self.stopwords})

    #-----------------------------------------------------------------------------------------------------
    
    # stemming page 
    #-----------------------------------------------------------------------------------------------------
    
    def stemmer_page(self):
        help_text = (
            "Stemming is a normalization step for the words in your text.  Something that is " +
            "plural should probably still be clumped together with a singular version of a word, " +
            "for example.  Stemming will basically remove the ends of words.  Here you can choose " + 
            "between a Porter Stemmer or Snowball Stemmer. Porter is a little less aggressive than " +
            "Snowball, however, Snowball is considered a slight improvement over Porter."
        )
        return pn.Column(
                    pn.pane.Markdown(f'##Stemmer options:'),
                    pn.WidgetBox(help_text, self.stem_choice,
                height = 300,
                width = 300)
                )
    
    #-----------------------------------------------------------------------------------------------------
    
    # embedding page 
    #-----------------------------------------------------------------------------------------------------
    
    def word_embedding_page(self):
        
        help_text = ("Embedding the process of turning words into numerical vectors. " +
                    "There have been several algorithms developed to do this, however, currently in this " +
                    "app, the sklearn count vectorizer is available. This algorithm will return a sparse " +
                    "matrix represention of all the words in your text."
                    )
        
        
        
        return pn.Column(
                    pn.pane.Markdown(f'##Choose embedding model:'),
                    pn.WidgetBox(help_text, self.we_model,
                            height = 300,
                            width = 300
        
                    )
        
                )
    
    #-----------------------------------------------------------------------------------------------------
          
    def continue_ready(self, event):

        # Set up for tokenization
        tokenizer = RegexpTokenizer(self.search_pattern_input.value)

        # Set up for stemming
        if self.stem_choice.value == 'Porter':
            stemmer = PorterStemmer() 
        else:
            stemmer = SnowballStemmer()

        # Set up for embedding
        if self.we_model.value == 'SKLearn Count Vectorizer':
            # Create a vectorizer instance
            vectorizer = CountVectorizer(max_features=1000)

        corpus = []
        #loop through each line of data
        for n in range(len(self.display_df)):  
            sentence = self.display_df.iloc[n].text

            #1. Tokenize
            tokens = tokenizer.tokenize(sentence)

            #2. remove stop words
            tokens_no_sw = [word for word in tokens if not word in self.stopwords]

            #3. stem the remaining words
            stem_words = [stemmer.stem(x) for x in tokens_no_sw]

            #Join the words back together as one string and append this string to your corpus.
            corpus.append(' '.join(stem_words))

        X = vectorizer.fit_transform(corpus).toarray()
        labels = self.display_df['sentiment']

        xlist = []
        for n in range(len(X)):
            xlist.append(list(X[n]))
        self.X = X
        self.display_df = pd.DataFrame({'embeddings': xlist, 'sentiment': labels})
        
        self.ready = True
    
    def panel(self):
        
        return pn.Column(
            pn.Tabs(
                ('Load Text', self.load_text_page),
                ('Tokenize', self.tokenize_option_page),
                ('Remove Stopwords', self.remove_stopwords_page),
                ('Stem', self.stemmer_page),
                ('Embed', self.word_embedding_page)
                ),
            self.continue_button
        )
示例#17
0
 class Z(param.Parameterized):
     z = param.Array(default=numpy.array([1]))
示例#18
0
class EditableHvDataset(HvDataset):
    '''Extract a data array from a holoviews element and makes it editable'''

    locked_mask = param.Array(
        precedence=-1, doc='''mask of region that should not be updated''')
    drawing_label = param.Selector(default=1, objects=[-1, 0, 1])
    editor_switches = param.ObjectSelector(
        default='pick label', objects=['-', 'pick label', 'fill label'])
    locking_switches = param.ListSelector(default=[],
                                          objects=['background', 'foreground'])

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.update_locked_mask()
        self.update_drawing_label_list()

    def click_callback(self, coords):
        if len(coords) != self.img.ndim:
            raise ValueError(
                'Supplied coordinates: {} does not match the image dimensions: {}'
                .format(coords, self.img.ndim))

        coords = tuple(int(round(c)) for c in coords)
        clicked_label = self.img[coords]

        if 'pick label' == self.editor_switches:
            self.drawing_label = clicked_label
        elif 'fill label' == self.editor_switches:
            mask = self.img == clicked_label
            self.write_label(mask)

    @param.depends('img', 'locking_switches', watch=True)
    def update_locked_mask(self):
        mask = np.zeros_like(self.img, dtype=bool)

        if 'background' in self.locking_switches:
            mask[self.img == 0] = True

        if 'foreground' in self.locking_switches:
            mask[self.img > 0] = True

        self.locked_mask = mask

    def write_label(self, mask):
        new_array = self.img.astype(np.int16)
        new_array[mask & (~self.locked_mask)] = self.drawing_label

        # assign new array to trigger updates
        self.img = new_array

    @param.depends('img', watch=True)
    def update_drawing_label_list(self):
        '''List of label to choose from.'''

        max_label = self.img.max()
        # add an extra label to annotate new objects
        unique_labels = list(range(-1, max_label + 2))

        self.param.drawing_label.objects = unique_labels

        if self.drawing_label not in unique_labels:
            self.drawing_label = -1

    def delete_label(self, event=None):
        self.img[self.img == self.drawing_label] = -1
        self.img = self.img

    @param.depends('img')
    def _drawing_label_wg(self):
        return pn.panel(self.param.drawing_label)

    def widgets(self):
        delete_button = pn.widgets.Button(name='delete selected label')
        delete_button.on_click(self.delete_label)

        editor_switches_wg = pn.Param(
            self.param.editor_switches,
            show_name=True,
            name="on click",
            widgets={'editor_switches': {
                'type': pn.widgets.RadioButtonGroup
            }})

        locking_switches_wg = pn.Param(self.param.locking_switches,
                                       show_name=True,
                                       name='lock',
                                       widgets={
                                           'locking_switches': {
                                               'type':
                                               pn.widgets.CheckButtonGroup
                                           }
                                       })

        return pn.WidgetBox(self._drawing_label_wg, editor_switches_wg,
                            locking_switches_wg, delete_button)