示例#1
0
    def test_multi(self):
        with wa.Waterwork() as ww:
            add0_tubes, add0_slots = empty + empty

            add0_slots['b'].set_plug(np.array([3, 4]))
            add0_tubes['a_is_smaller'].set_plug(False)

            add1_tubes, add1_slots = add0_tubes['target'] + empty

            add1_slots['b'].set_plug(
                lambda d: 0.5 * d['Add_0/slots/a'] + np.array([0.5, 1.0]))
            add1_tubes['a_is_smaller'].set_plug(
                lambda d: not d['output_1'].any())

            cl0_tubes, _ = td.clone(a=add0_tubes['smaller_size_array'])

            add2_tubes, _ = td.add(a=cl0_tubes['a'],
                                   b=add1_tubes['smaller_size_array'])

            add2_tubes['target'].set_plug(
                lambda d: d['output_1'] + np.array([1, 2]))

            cl0_tubes['b'].set_name('output_1')

        true_funnel_dict = {('Add_0', 'a'): np.array([1, 2])}
        funnel_dicts = [true_funnel_dict] * 3
        tap_dicts = ww.multi_pour(funnel_dicts, key_type='str')
        for tap_dict in tap_dicts:
            # tap_dict = ww.pour(true_funnel_dict, key_type='str')

            true_tap_dict = {
                "output_1": np.array([3, 4]),
                'Add_1/tubes/target': np.array([5, 8]),
                'Add_2/tubes/a_is_smaller': False,
                'Add_2/tubes/smaller_size_array': np.array([1, 2]),
                'Add_2/tubes/a_is_smaller': False,
                # 'Add_2/tubes/target': np.array([4, 6]),
            }

            self.assertEqual(set(tap_dict.keys()), set(true_tap_dict.keys()))
            for tap in tap_dict:
                th.assert_arrays_equal(self, tap_dict[tap], true_tap_dict[tap])

            self.assertEqual(
                ww._pump_tank_order(),
                [ww.tanks[k] for k in ['Add_2', 'Add_1', 'Clone_0', 'Add_0']])

        funnel_dicts = ww.multi_pump(tap_dicts, key_type='tuple')
        for funnel_dict in funnel_dicts:
            self.assertEqual(sorted(funnel_dict.keys()),
                             sorted(true_funnel_dict.keys()))
            for funnel in funnel_dict:
                th.assert_arrays_equal(self, funnel_dict[funnel],
                                       true_funnel_dict[funnel])

            ww.clear_vals()
            for d in [ww.slots, ww.tubes]:
                for key in d:
                    self.assertEqual(d[key].get_val(), None)
            pickle_name = os.path.join(self.temp_dir, 'ww.pickle')
示例#2
0
    def test_pour_pump_non_eager(self):
        with wa.Waterwork() as ww:
            add0_tubes, add0_slots = empty + empty

            add1_tubes, add1_slots = add0_tubes['target'] + empty

            cl0_tubes, _ = td.clone(a=add0_tubes['smaller_size_array'])

            add2_tubes, _ = td.add(a=cl0_tubes['a'],
                                   b=add1_tubes['smaller_size_array'])

            cl0_tubes['b'].set_name('output_1')

        true_funnel_dict = {
            ('Add_0', 'a'): np.array([1, 2]),
            ('Add_0', 'b'): np.array([3, 4]),
            ('Add_1', 'b'): np.array([1, 2])
        }
        for _ in xrange(2):
            self.assertEqual([str(t) for t in ww._pour_tank_order()],
                             ['Add_0', 'Add_1', 'Clone_0', 'Add_2'])

            tap_dict = ww.pour(true_funnel_dict, key_type='str')

            true_tap_dict = {
                "output_1": np.array([3, 4]),
                'Add_0/tubes/a_is_smaller': False,
                'Add_1/tubes/a_is_smaller': False,
                'Add_1/tubes/target': np.array([5, 8]),
                'Add_2/tubes/a_is_smaller': False,
                'Add_2/tubes/smaller_size_array': np.array([1, 2]),
                'Add_2/tubes/a_is_smaller': False,
                'Add_2/tubes/target': np.array([4, 6]),
            }

            self.assertEqual(set(tap_dict.keys()), set(true_tap_dict.keys()))
            for tap in tap_dict:
                th.assert_arrays_equal(self, tap_dict[tap], true_tap_dict[tap])

            self.assertEqual(
                ww._pump_tank_order(),
                [ww.tanks[k] for k in ['Add_2', 'Add_1', 'Clone_0', 'Add_0']])

            funnel_dict = ww.pump(true_tap_dict, key_type='tuple')

            self.assertEqual(sorted(funnel_dict.keys()),
                             sorted(true_funnel_dict.keys()))
            for funnel in funnel_dict:
                th.assert_arrays_equal(self, funnel_dict[funnel],
                                       true_funnel_dict[funnel])

            ww.clear_vals()
            for d in [ww.slots, ww.tubes]:
                for key in d:
                    self.assertEqual(d[key].get_val(), None)
            pickle_name = os.path.join(self.temp_dir, 'ww.pickle')

            ww.save_to_file(pickle_name)
            ww = wa.Waterwork(from_file=pickle_name)
示例#3
0
    def test_pour_pump_eager(self):
        with wa.Waterwork() as ww:
            cl0_tubes, cl0_slots = td.clone(a=np.array([1, 2]))
            cl0_slots['a'].unplug()

            add0_tubes, add0_slots = cl0_tubes['a'] + np.array([3, 4])
            add0_slots['b'].unplug()

            add1_tubes, _ = add0_tubes['target'] + cl0_tubes['b']
            cl1_tubes, _ = td.clone(a=add0_tubes['smaller_size_array'])
            add2_tubes, _ = cl1_tubes['a'] * add1_tubes['smaller_size_array']

            add2_tubes['target'].set_name('answer')

        true_funnel_dict = {
            cl0_slots['a']: np.array([1, 2]),
            add0_slots['b']: np.array([3, 4])
        }

        # print [str(t) for t in ww._pour_tank_order()]
        # self.assertEqual(ww._pour_tank_order(), [cl0, add0, cl1, add1, add2])
        true_tap_dict = {
            cl1_tubes['b']: np.array([3, 4]),
            add1_tubes['target']: np.array([5, 8]),
            add2_tubes['smaller_size_array']: np.array([1, 2]),
            "answer": np.array([3, 8]),
        }
        temp_tap_dict = {}
        temp_tap_dict.update(true_tap_dict)
        temp_tap_dict[add2_tubes['target']] = temp_tap_dict['answer']
        del temp_tap_dict['answer']
        for tap in temp_tap_dict:
            th.assert_arrays_equal(self, tap.get_val(), temp_tap_dict[tap])

        # print [str(t) for t in ww._pump_tank_order()]
        # self.assertEqual(ww._pump_tank_order(), [add2, cl1, add1, add0, cl0])

        # print [str(k) for k in ww.taps]
        funnel_dict = ww.pump(true_tap_dict)

        self.assertEqual(sorted(funnel_dict.keys()),
                         sorted(true_funnel_dict.keys()))
        for funnel in funnel_dict:
            th.assert_arrays_equal(self, funnel_dict[funnel],
                                   true_funnel_dict[funnel])
示例#4
0
  def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
    """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
    # Convert the category values to indices.
    cti, cti_slots = td.cat_to_index(
      array, self.cat_val_to_index,
      tube_plugs={'input_dtype': lambda z: self.input_dtype}
    )
    cti_slots['cats'].set_name('array')
    cti['missing_vals'].set_name('missing_vals')

    # Clone the indices so that a copy of 'indices' can be outputted as a tap.
    cloned, _ = td.clone(cti['target'])
    cloned['a'].set_name('indices')

    # Convert the indices into one-hot vectors.
    one_hots, _ = td.one_hot(
      cloned['b'], len(self.cat_val_to_index),
      tube_plugs={
        'missing_vals': lambda z: np.ones(z[self._pre('indices', prefix)].shape)*-2
      }
    )

    if self.norm_mode == 'mean_std':
      one_hots, _ = td.sub(
        one_hots['target'], self.mean,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.mean}
      )
      one_hots, _ = td.div(
        one_hots['target'], self.std,
        tube_plugs={'a_is_smaller': False, 'smaller_size_array': self.std, 'missing_vals': np.array([]), 'remainder': np.array([])}
      )

    one_hots['target'].set_name('one_hots')

    if return_tubes is not None:
      ww = one_hots['target'].waterwork
      r_tubes = []
      for r_tube_key in return_tubes:
        r_tubes.append(ww.maybe_get_tube(r_tube_key))
      return r_tubes
示例#5
0
    def define_waterwork(self, array=empty, return_tubes=None):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        # Convert the category values to indices.
        cti, cti_slots = td.cat_to_index(
            array,
            self.cat_val_to_index,
        )
        cti['missing_vals'].set_name('missing_vals')

        # Clone the indices so that a copy of 'indices' can be outputted as a tap.
        cloned, _ = td.clone(cti['target'])
        cloned['a'].set_name('indices')

        # Convert the indices into one-hot vectors.
        one_hots, _ = td.one_hot(cloned['b'], len(self.cat_val_to_index))

        # Normalize the one_hots if the norm_mode is set.
        if self.norm_mode == 'mean_std':
            one_hots, _ = one_hots['target'] - self.mean
            one_hots, _ = one_hots['target'] / self.std

        one_hots['target'].set_name('one_hots')

        if return_tubes is not None:
            ww = one_hots['target'].waterwork
            r_tubes = []
            for r_tube_key in return_tubes:
                r_tubes.append(ww.maybe_get_tube(r_tube_key))
            return r_tubes
示例#6
0
    def define_waterwork(self, array=empty, return_tubes=None):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        assert self.input_dtype is not None, (
            "Run calc_global_values before running the transform")

        with ns.NameSpace(self.name):
            for trans_num, trans_key in enumerate(self.transform_order):
                trans = self.transforms[trans_key]

                with ns.NameSpace(trans.name):
                    if trans_num < len(self.transform_order) - 1:
                        tap_key = self.tap_keys[trans_num + 1]
                        return_tubes = [self._pre(tap_key)]
                    else:
                        return_tubes = None

                    tubes = trans.define_waterwork(array, return_tubes)

                    if tubes is None:
                        continue

                    old_name = tubes[0].name
                    tubes[0].set_name("to_be_cloned")

                    tube_dict, _ = td.clone(tubes[0])
                    array = tube_dict['a']

                    tube_dict['b'].set_name(old_name)
示例#7
0
    def define_waterwork(self, array=empty, return_tubes=None, prefix=''):
        """Get the waterwork that completely describes the pour and pump transformations.

    Parameters
    ----------
    array : np.ndarray or empty
      The array to be transformed.

    Returns
    -------
    Waterwork
      The waterwork with all the tanks (operations) added, and names set.

    """
        splits, splits_slots = td.split(array, [1], axis=1)
        splits_slots['a'].unplug()
        splits_slots['a'].set_name('array')

        splits, _ = td.iter_list(splits['target'], 2)

        # Tokenize the full strings into words
        tokens, tokens_slots = td.multi_tokenize(
            strings=splits[0],
            selector=splits[1],
            tokenizers=self.word_tokenizers,
            detokenizers=self.word_detokenizers,
            max_len=self.max_sent_len)

        # Set the names of various tubes and slots to make it easier to reference
        # them in further downstream.
        tokens['diff'].set_name('tokenize_diff')
        tokens_slots['max_len'].set_name('max_sent_len')
        tokens_slots['tokenizers'].set_name('tokenizers')
        tokens_slots['detokenizers'].set_name('detokenizers')

        # lower_case the strings, and set the diff strings of the tank to
        # 'lower_case_dff' for easier referencing.
        if self.lower_case:
            tokens, tokens_slots = td.lower_case(tokens['target'])
            tokens['diff'].set_name('lower_case_diff')

        # Half width the strings, and set the diff strings of the tank to
        # 'half_width_diff' for easier referencing.
        if self.half_width:
            tokens, tokens_slots = td.half_width(tokens['target'])
            tokens['diff'].set_name('half_width_diff')

        # Lemmatize the strings, and set the diff strings of the tank to
        # 'lemmatize_dff' for easier referencing.
        if self.lemmatize:
            tokens, tokens_slots = td.lemmatize(tokens['target'])
            tokens['diff'].set_name('lemmatize_diff')
            tokens_slots['lemmatizer'].set_name('lemmatizer')

        languages, _ = td.clone(splits[1])
        languages['b'].set_name('languages')

        dim_size, _ = td.dim_size(languages['a'], axis=0)
        shape, _ = td.tube_list(dim_size['target'], 1, 1)
        tile, _ = td.reshape(
            languages['a'],
            shape['target'],
            tube_plugs={
                'old_shape':
                lambda z: (z[self._pre('languages', prefix)].shape[0], 1)
            })
        tile, _ = td.tile(
            tile['target'], (1, 1, self.max_sent_len),
            tube_plugs={
                'old_shape':
                lambda z: (z[self._pre('languages', prefix)].shape[0], 1, 1)
            })

        # Find all the strings which are not in the list of known words and
        # replace them with the 'unknown token'.
        maps_with_empty_strings = {
            k: v + ['']
            for k, v in self.index_to_word_maps.iteritems()
        }
        isin, isin_slots = td.multi_isin(tokens['target'],
                                         maps_with_empty_strings,
                                         tile['target'])

        mask, _ = td.logical_not(isin['target'])
        tokens, _ = td.replace(
            isin['a'],
            mask['target'],
            '[UNK]',
            tube_plugs={
                'mask': lambda z: z[self._pre('indices', prefix)] == 0
            })

        # Keep track values that were overwritten with a 'unknown token'
        tokens['replaced_vals'].set_name('missing_vals')
        isin_slots['bs'].set_name('index_to_word_maps')

        # Convert the tokens into indices.
        indices, indices_slots = td.multi_cat_to_index(
            tokens['target'],
            tile['target'],
            self.word_to_index_maps,
            tube_plugs={
                'selector':
                lambda z: np.tile(
                    np.reshape(z[self._pre('languages')],
                               (z[self._pre('languages')].shape[0], 1, 1)),
                    (1, 1, self.max_sent_len)),
                'missing_vals':
                lambda z: np.full(
                    z[self._pre('indices')].shape, '', dtype=np.unicode),
                'input_dtype':
                self.input_dtype
            })

        # Set the names of the slots and tubes of this tank for easier referencing
        indices['target'].set_name('indices')
        # indices['selector'].set_name('languages')
        indices_slots['cat_to_index_maps'].set_name('word_to_index_maps')

        if return_tubes is not None:
            ww = indices['target'].waterwork
            r_tubes = []
            for r_tube_key in return_tubes:
                r_tubes.append(ww.maybe_get_tube(r_tube_key))
            return r_tubes
示例#8
0
    def _join_tubes_to_slots(self, input_dict, waterwork):
        """Join the tubes incoming from other tanks to this tank's slots. If the slot was previously identified as a 'funnel', i.e. a slot not attached to other tube then take it out of the list of funnels. If the tube was previously identified as a tap, i.e. a tube that is not connected to another slot, then take it out of the list of taps.

    Parameters
    ----------
    input_dict : dict(
        keys - Slot keys. Must be the same as the attribute slot_keys.
        values - Tube, None or some valid input data type.
      )
      The inputs to the tank.
    waterwork : Waterwork
      The waterwork that the part will be added to.

    Returns
    -------
    type
        Description of returned object.

    """
        for key in input_dict:
            slot = self.slots[key]
            tube = input_dict[key]

            if type(tube) is not tu.Tube:
                continue

            # If the tube was already used for another tank, then it'll have to be
            # cloned.
            if type(tube.slot) is not Empty:

                # Save the slot in order to connect it to the clone tube later.
                other_slot = tube.slot
                tube.slot = empty

                import wtrwrks.tanks.tank_defs as td
                c_tubes, c_slots = td.clone(a=tube)

                # Join the other slot to the 'b' tube of the clone tank
                other_slot.tube = c_tubes['b']
                c_tubes['b'].slot = other_slot

                # Join this slot to the 'a' tube of the clone tank
                slot.tube = c_tubes['a']
                c_tubes['a'].slot = slot

                # Remove the newly created clone tupes from the taps, since they are
                # immediately connected to slots.
                del waterwork.taps[c_tubes['a'].name]
                del waterwork.taps[c_tubes['b'].name]

                # Remove the slots from the funnels, since the clone slot is now the
                # funnel.
                if slot.name in waterwork.funnels:
                    del waterwork.funnels[slot.name]
                    if slot.plug is not None:
                        raise ValueError(
                            str(slot) +
                            ' was plugged but is no longer a funnel. Only funnels can have plugs.'
                        )
                if other_slot.name in waterwork.funnels:
                    del waterwork.funnels[other_slot.name]
                    if other_slot.plug is not None:
                        raise ValueError(
                            str(other_slot) +
                            ' was plugged but is no longer a funnel. Only funnels can have plugs.'
                        )
            else:
                tube.slot = slot
                slot.tube = tube

            if type(tube) is tu.Tube:
                if tube.name in waterwork.taps:
                    del waterwork.taps[tube.name]
                if slot.name in waterwork.funnels:
                    del waterwork.funnels[slot.name]
                if slot.plug is not None:
                    raise ValueError(
                        str(slot) +
                        ' was plugged but is no longer a funnel. Only funnels can have plugs.'
                    )
                if tube.plug is not None:
                    raise ValueError(
                        str(tube) +
                        ' was plugged but is no longer a tap. Only taps can have plugs.'
                    )