示例#1
0
    def _load_comments(self, label):
        """
        Populate the comments and flag columns.
        Attempt to load comments from file.

        Parameters
        ----------
        label : str
            The label of the data in
            session.data_collection.
        """

        #Make sure its the right data
        #(beacuse subset data is masked)
        idx = self._data_collection_index(label)
        if idx == -1:
            return False
        data = self.session.data_collection[idx]

        #Fill in default comments:
        length = data.shape[0]
        new_comments = np.array(["" for i in range(length)], dtype=object)
        new_flags = np.array(["0" for i in range(length)], dtype=object)

        #Fill in any saved comments:
        meta = data.meta
        obj_names = data.get_component(
            self.catalog.meta["special_columns"]["source_id"]).labels

        if "MOSViz_comments" in meta.keys():
            try:
                comments = meta["MOSViz_comments"]
                for key in comments.keys():
                    index = self._id_to_index_hash(key, obj_names)
                    if index is not None:
                        line = comments[key]
                        new_comments[index] = line
            except Exception as e:
                print("MOSViz Comment Load Failed: ", e)

        if "MOSViz_flags" in meta.keys():
            try:
                flags = meta["MOSViz_flags"]
                for key in flags.keys():
                    index = self._id_to_index_hash(key, obj_names)
                    if index is not None:
                        line = flags[key]
                        new_flags[index] = line
            except Exception as e:
                print("MOSViz Flag Load Failed: ", e)

        #Send to DC
        data.add_component(CategoricalComponent(new_flags, "flag"), "flag")
        data.add_component(CategoricalComponent(new_comments, "comments"),
                           "comments")
        return True
示例#2
0
def test_categorical_data():

    data = Data(label="Test Cat Data 1")
    data2 = Data(label="Teset Cat Data 2")

    comp_x1 = CategoricalComponent(np.array(['a', 'a', 'b']))
    comp_y1 = Component(np.array([1, 2, 3]))
    comp_x2 = CategoricalComponent(np.array(['c', 'a', 'b']))
    comp_y2 = Component(np.array([1, 3, 5]))
    data.add_component(comp_x1, 'x1')
    data.add_component(comp_y1, 'y1')
    data2.add_component(comp_x2, 'x2')
    data2.add_component(comp_y2, 'y2')
    return data, data2
示例#3
0
def test_categorical_component():
    c = CategoricalComponent(['a', 'b', 'c', 'a', 'b'], categories=['a', 'b', 'c'])
    c2 = clone(c)
    assert isinstance(c2, CategoricalComponent)
    np.testing.assert_array_equal(c.codes, [0, 1, 2, 0, 1])
    np.testing.assert_array_equal(c.labels, ['a', 'b', 'c', 'a', 'b'])
    np.testing.assert_array_equal(c.categories, ['a', 'b', 'c'])
示例#4
0
文件: pandas.py 项目: robintw/glue
def panda_process(indf):
    """
    Build a data set from a table using pandas. This attempts to respect
    categorical data input by letting pandas.read_csv infer the type

    """
    result = Data()
    for name, column in indf.iteritems():
        if (column.dtype == np.object) | (column.dtype == np.bool):
            # try to salvage numerical data
            coerced = column.convert_objects(convert_numeric=True)
            if (coerced.dtype !=
                    column.dtype) and coerced.isnull().mean() < 0.4:
                c = Component(coerced.values)
            else:
                # pandas has a 'special' nan implementation and this doesn't
                # play well with np.unique
                c = CategoricalComponent(column.fillna(''))
        else:
            c = Component(column.values)

        # convert header to string - in some cases if the first row contains
        # numbers, these are cast to numerical types, so we want to change that
        # here.
        if not isinstance(name, six.string_types):
            name = str(name)

        # strip off leading #
        name = name.strip()
        if name.startswith('#'):
            name = name[1:].strip()

        result.add_component(c, name)

    return result
示例#5
0
def mosviz_tabular_data(*args, **kwargs):
    """
     Build a data set from a table. We restrict ourselves to tables
     with 1D columns.

     All arguments are passed to
         astropy.table.Table.read(...).
     """

    result = Data()

    table = astropy_table_read(*args, **kwargs)

    result.meta = table.meta

    # Loop through columns and make component list
    for column_name in table.columns:
        c = table[column_name]
        u = c.unit if hasattr(c, 'unit') else c.units

        if table.masked:
            # fill array for now
            try:
                c = c.filled(fill_value=np.nan)
            except (ValueError, TypeError):  # assigning nan to integer dtype
                c = c.filled(fill_value=-1)

        dtype = c.dtype.type
        if dtype is np.string_ or dtype is np.str_:
            nc = CategoricalComponent(c, units=u)
        else:
            nc = Component.autotyped(c, units=u)
        result.add_component(nc, column_name)

    return result
示例#6
0
def _load_categorical_component(rec, context):
    if 'log' in rec:
        return context.object(rec['log']).component(rec['log_item'])

    return CategoricalComponent(categorical_data=context.object(rec['categorical_data']),
                                categories=context.object(rec['categories']),
                                jitter=context.object(rec['jitter_method']),
                                units=rec['units'])
示例#7
0
 def setup_method(self, method):
     self.data = Data(y=[-1, -1, -1, -2, -2, -2, -3, -5, -7])
     self.data.add_component(
         CategoricalComponent(['a', 'a', 'a', 'b', 'c', 'd', 'd', 'e',
                               'f']), 'x')
     self.subset = self.data.new_subset()
     self.collect = DataCollection(self.data)
     self.client = HistogramClient(self.collect, FIGURE)
     self.axes = self.client.axes
     FIGURE.canvas.draw = MagicMock()
     assert FIGURE.canvas.draw.call_count == 0
示例#8
0
    def test_high_cardinatility_timing(self):

        card = 50000
        data = Data()
        card_data = [str(num) for num in range(card)]
        data.add_component(Component(np.arange(card * 5)), 'y')
        data.add_component(CategoricalComponent(np.repeat([card_data], 5)),
                           'xcat')
        self.add_data(data)
        comp = data.find_component_id('xcat')
        timer_func = partial(self.client._set_xydata, 'x', comp)

        timer = timeit(timer_func, number=1)
        assert timer < 3  # this is set for Travis speed
示例#9
0
    def test_ticks_go_back_after_changing(self):
        """ If you change to a categorical axis and then change back
        to a numeric, the axis ticks should fix themselves properly.
        """
        data = Data()
        data.add_component(Component(np.arange(100)), 'y')
        data.add_component(CategoricalComponent(['a'] * 50 + ['b'] * 50),
                           'xcat')
        data.add_component(Component(2 * np.arange(100)), 'xcont')

        self.add_data(data=data)
        self.client.yatt = data.find_component_id('y')
        self.client.xatt = data.find_component_id('xcat')
        self.check_ticks(self.client.axes.xaxis, False, True)
        self.check_ticks(self.client.axes.yaxis, False, False)

        self.client.xatt = data.find_component_id('xcont')
        self.check_ticks(self.client.axes.yaxis, False, False)
        self.check_ticks(self.client.axes.xaxis, False, False)
示例#10
0
def pandas_to_glue(df, label='data', name_map=default_name_map):
    """Convert dataframe to glue.core.data.Data. Glue categorical variables require hashing,
    store array of unhashable components in ComponentID._unhashable. Override column names
    in name_map with dictionary values.

    """

    data = Data(label=label)
    for c in df.columns:
        c_name = map_column_names(c)
        try:
            data.add_component(df[c], c_name)
        except TypeError:
            # pd.factorize error with int list input to CategoricalComponent
            r = ['%09d' % i for i in range(len(df[c]))]
            cc = CategoricalComponent(r)
            c_id = ComponentID(c_name)
            c_id._unhashable = np.array(df[c])
            data.add_component(cc, c_id)
    return data