Пример #1
def load_col_chars(
    with h5py.File(col_chars_path, 'r') as hf5:
        nominal_enc = NominalDataEncoder(
            [char_enc.encoder[key.rpartition('_')[-1]] for key in hf5.keys()])
        labels_repr = [torch.tensor(hf5[dat][:]) for dat in hf5.keys()]

        blank_mevm_idx = nominal_enc.encoder[char_enc.encoder['~']]

        if blank_repr_div is not None:
            labels_repr[blank_mevm_idx] = labels_repr[blank_mevm_idx][:int(
                len(labels_repr[blank_mevm_idx]) / blank_repr_div)]

        # Handle unknown character as extra_negatives
        if (char_enc.unknown_idx in nominal_enc.encoder
                and unknown_char_extra_neg):
            unknown_mevm_idx = nominal_enc.encoder[char_enc.encoder['#']]

            extra_negatives = labels_repr[unknown_mevm_idx]

            # Given unknown char is treated as rest of unknowns, remove so
            # MEVM do not treat it as a known class.
            extra_negatives = None

    return nominal_enc, labels_repr, extra_negatives
Пример #2
def organize_data_pts_by_logits(argmax_logits, layers):
    # Organize the layers into lists per character class.
    unique_labels, label_counts = np.unique(argmax_logits, return_counts=True)

    logging.debug('Number of Unique Labels: %d', len(unique_labels))
    logging.debug('The unique labels: %s', unique_labels)
    logging.debug('Label counts: %s', label_counts)

    # Be able to obtain the label from the MEVM's indexing of classes
    nominal_encoder = NominalDataEncoder(unique_labels)

    labels_repr = []

    logging.info('Unique Labels contained within layer encoding:')
    for i, label in enumerate(unique_labels):
        logging.info('%d : %d', label, label_counts[i])

        label_indices = np.where(argmax_logits == label)[0]

        logging.debug('Label `%s`\'s indices = %s', label, label_indices)
            'Torch tensor shape of label %s = %s',

    return labels_repr, nominal_encoder
    def load(filepath, blank_idx, space_char, unknown_idx):
        """Loads the label set and creates char encoder"""
        nde = NominalDataEncoder.load(filepath)

        # TODO build CharEncoder s.t. it can simply copy the parts of the given
        # NDE
        return CharEncoder(blank, space_char, unknown_idx, list(nde.encoder))
    def __init__(self, labels=None, max_unknown=None, *args, **kwargs):
        super(MEVM, self).__init__(*args, **kwargs)

        # Create a NominalDataEncoder to map class inputs to the MEVM internal
        # class represntation.
        if isinstance(labels, NominalDataEncoder) or labels is None:
            self.label_enc = labels
        elif isinstance(labels, list) or isinstance(labels, np.ndarray):
            self.label_enc = NominalDataEncoder(labels)
            raise TypeError(' '.join([
                'Expected `labels` of types: None, list, np.ndarray, or',
                'NominalDataEncoder, not of type {type(labels)}'

        self.max_unknown = max_unknown
Пример #5
 def __init__(self, *args, **kwargs):
     self.label_enc = NominalDataEncoder(*args, **kwargs)
    def load(h5, labels=None, labels_dtype=None, train_hyperparams=None):
        """Performs the same lod functionality as in MultipleEVM but loads the
        ordered labels from the h5 file for the label encoder.
        if isinstance(h5, str):
            h5 = h5py.File(h5, 'r')

        # load evms
        _evms = []
        i = 1
        while "EVM-%d" % i in h5:
            _evms.append(EVM(h5["EVM-%d" % (i)], log_level='debug'))
            i += 1

        # Load the ordered label into the NominalDataEncoder
        if 'labels' in h5.keys():
            if labels is not None:
                logging.info(' '.join([
                    '`labels` key exists in the HDF5 MEVM state file, but',
                    'labels was given explicitly to MEVM.load(). Ignoring the',
                    'labels in the HDF5 file.',
                label_enc = NominalDataEncoder(labels)
                if labels_dtype is None:
                    labels_dtype = np.dtype(h5.attrs['labels_dtype'])
                label_enc = NominalDataEncoder(
                    h5['labels'][:].astype(labels_dtype), )
        elif labels is not None:
            label_enc = NominalDataEncoder(labels)
            logging.warning(' '.join([
                'No `labels` dataset available in given hdf5. Relying on the',
                'evm model\'s labels if they exist. Will fail if the MEVM',
                'state does not have any labels in each of its EVM.',

            label_enc = NominalDataEncoder([evm.label for evm in _evms], )

        # Load training vars if not given
        if train_hyperparams is None:
            # NOTE Able to specify which to load from h5 by passing a list.
            train_hyperparams = [

        if isinstance(train_hyperparams, list):
            train_hyperparams = {
                attr: h5.attrs[attr]
                for attr in train_hyperparams if attr in h5.attrs
        elif not isinstance(train_hyperparams, dict):
            raise TypeError(' '.join([
                '`train_hyperparams` expected type: None, list, or dict, but',
                f'recieved {type(train_hyperparams)}',

        mevm = MEVM(label_enc, **train_hyperparams)
        mevm._evms = _evms

        return mevm
    def fit(self, points, labels=None, extra_negatives=None):
        """Wraps the MultipleEVM's train() and uses the encoder to
        # If points and labels are aligned sequence pair (X, y): adjust form
        if (isinstance(points, np.ndarray) and
            (isinstance(labels, list) or isinstance(labels, np.ndarray))
                and len(points) == len(labels)):
            # Adjust sequence pair into list of torch.Tensors and unique labels
            unique = np.unique(labels)
            labels = np.array(labels)
            points = [torch.Tensor(points[labels == u]) for u in unique]
            labels = unique
        elif isinstance(points, list):
            if all([isinstance(pts, np.ndarray) for pts in points]):
                # If list of np.ndarrays, turn into torch.Tensors
                points = [torch.Tensor(pts) for pts in points]
            elif not all([isinstance(pts, torch.Tensor) for pts in points]):
                raise TypeError(' '.join([
                    'expected points to be of types: list(np.ndarray),',
                    'list(torch.tensor), or np.ndarray with labels as an',
                    'aligned list or np.ndarray',
            raise TypeError(' '.join([
                'expected points to be of types: list(np.ndarray),',
                'list(torch.tensor), or np.ndarray with labels as an',
                'aligned list or np.ndarray',

        # Set encoder if labels is not None
        if labels is not None:
            if len(points) != len(labels):
                raise ValueError(' '.join([
                    'The given number of labels does not equal the number of',
                    'classes represented by the list of points.',
                    'If giving an aligned sequence pair of points and labels,',
                    'then ensure `points` is of type `np.ndarray`.',

            if self.label_enc is not None:
                    '`encoder` is not None and is being overwritten!', )

            if isinstance(labels, NominalDataEncoder):
                self.label_enc = labels
            elif isinstance(labels, list) or isinstance(labels, np.ndarray):
                self.label_enc = NominalDataEncoder(labels)
                raise TypeError(' '.join([
                    'Expected `labels` of types: None, list, np.ndarray, or',
                    'NominalDataEncoder, not of type {type(labels)}'

        # Ensure extra_negatives is of expected form (no labels for these)
        if ((isinstance(extra_negatives, np.ndarray)
             and len(extra_negatives.shape) == 2)
                or isinstance(extra_negatives, list)):
            extra_negatives = torch.Tensor(extra_negatives)
        elif not isinstance(extra_negatives, torch.Tensor):
            raise TypeError(' '.join([
                'The extra_negatives must be either None, torch.Tensor of',
                'shape 2, or an object broadcastable to such a torch.Tensor.',

        # Points is now list(torch.Tensors) and encoder handled.

        # TODO handle adjust of extra negatives as a list of labels to be known
        # unknowns. For now, expects extra_negatives always of correct type.
        self.train(points, labels, extra_negatives)