Пример #1
0
def test_bisect_key():
    temp = SortedDict(modulo, ((val, val) for val in range(100)))
    temp._reset(7)
    assert all(temp.bisect(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(
        temp.bisect_right(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(temp.bisect_left(val) == (val % 10) * 10 for val in range(100))
Пример #2
0
class LCM(object):
    def __init__(self,
                 min_supp=.005,
                 n_jobs=1,
                 verbose=1,
                 return_tids=False):
        self.min_support = min_supp
        self.item_to_tids = SortedDict()
        self.verbose = verbose
        self.format = self._format_with_tids if return_tids else self._format
        self.extra_col = 'tids' if return_tids else 'support'
        self.n_transactions = 0

    def add(self, transaction):
        transaction = frozenset(transaction)
        for item in transaction:
            if item in self.item_to_tids:
                self.item_to_tids[item].add(self.n_transactions)
            else:
                self.item_to_tids[item] = RoaringBitmap([self.n_transactions])
        self.n_transactions += 1

    def discover_yield(self):
        items = [e for e in self.item_to_tids.items() if len(e[1]) >= self.min_support]
        for key, key_idxs in items:
            if len(key_idxs) >= self.min_support:
                yield from self._inner(frozenset(), key_idxs, key, items)

    def discover(self):
        data = self.discover_yield()
        return pd.DataFrame.from_records(data=data, columns=['itemset', self.extra_col])

    def _format_with_tids(self, p_prime, p_idxs):
        return p_prime, p_idxs

    def _format(self, p_prime, p_idxs):
        return p_prime, len(p_idxs)

    def get_new_scope_keys(self, new_items, p_prime, p_idxs, limit):
        for new_limit, limit_idxs in new_items:
            if new_limit not in p_prime:
                inter_len = p_idxs.intersection_len(limit_idxs)
                if inter_len >= self.min_support:
                    new_pidxs = p_idxs.intersection(limit_idxs)
                    yield limit, new_pidxs

    def _inner(self, p, p_idxs, limit, scope_items):
        cp = (k for k, idxs in reversed(scope_items) if p_idxs.issubset(idxs))

        max_k = next(cp, None)
        if max_k and max_k == limit:
            cp = set(cp).union({max_k})
            p_prime = p.union(cp)
            yield self.format(p_prime, p_idxs)

            new_items = self.item_to_tids.items()[:self.item_to_tids.bisect(limit)]
            for new_limit, new_pidxs in self.get_new_scope_keys(new_items, p_prime, p_idxs, limit):
                yield from self._inner(p_prime, new_pidxs, new_limit, scope_items)

            cp.clear()
Пример #3
0
class BisectNodePolicy(BaseNodePolicy):
    def __init__(self, hash_class=defaultHashClass):
        self.ring = SortedDict()
        super(BisectNodePolicy, self).__init__(hash_class=hash_class)

    def add_node(self, node=None, vnode_count=None):
        for i in range_(int(vnode_count)):
            self.ring[self._gen_key(node, i)] = node

    def remove_node(self, node=None):
        keys = list(self.ring.keys())
        for key in keys:
            if self.ring[key] == node:
                self.ring.pop(key)

    def get_proper_node(self, key):
        key, _ = self.ring.peekitem(self._find_proper_pos(key))
        return self.ring[key]

    def _find_proper_pos(self, key):
        key = self._gen_key(key)
        pos = self.ring.bisect(key)
        # if object_hash == node_hash, return node index
        if key in self.ring:
            return pos - 1
        # embodies the concept of the ring.
        if pos == len(self.ring):
            return 0
        return pos
class RangeModule:

    def __init__(self):
        self.data = SortedDict()

    def addRange(self, left: int, right: int) -> None:
        l, r = self.data.bisect(left), self.data.bisect(right)
        if l != 0:
            # move L to the left by 1 this will point to the lower bound
            l -= 1
            # if the left is larger than the previous interval we need to move it up
            if self.data.peekitem(l)[1] < left:
                l += 1
        if l != r:
            # given the adjust left and right intervals we need to check if a merge needs to happen. we take
            # the min of the left intervals and the max of the right intervals to maximize the interval size.
            left, right = min(left, self.data.peekitem(l)[0]), max(right, self.data.peekitem(r-1)[1])
            # now that we have the new interval we need ot pop the redundant intervals
            for _ in range(l, r):
                self.data.popitem(l)
        # insert the new interval
        self.data[left] = right
        print(self.data)

    def queryRange(self, left: int, right: int) -> bool:
        l, r = self.data.bisect_right(left), self.data.bisect_right(right)
        print("l == 0: ",(l == 0), "self.data.peekitem(l-1)[1] < right: ",(self.data.peekitem(l-1)[1] < right))
        if l == 0 or self.data.peekitem(l-1)[1] < right: return False

        return True

    def removeRange(self, left: int, right: int) -> None:
        l, r = self.data.bisect_right(left), self.data.bisect_right(right)
        if l != 0:
            l -= 1
            if self.data.peekitem(l)[1] < left:
                l += 1
        if l != r:
            ll, rr = min(left, self.data.peekitem(l)[0]), max(right, self.data.peekitem(r-1)[1])
            for _ in range(l, r):
                self.data.popitem(l)
            if ll < left: self.data[ll] = left
            if right < rr: self.data[right] = rr
Пример #5
0
class RangeModuleDict:
    def __init__(self):
        self.data = SortedDict()

    def addRange(self, left: int, right: int) -> None:
        l = self.data.bisect(left)
        r = self.data.bisect(right)
        if l != 0:
            l -= 1
            if self.data.peekitem(l)[1] < left:
                l += 1
        if l != r:
            left = min(left, self.data.peekitem(l)[0])
            right = max(right, self.data.peekitem(r - 1)[1])
            for _ in range(l, r):
                self.data.popitem(l)
        self.data[left] = right

    def queryRange(self, left: int, right: int) -> bool:
        l = self.data.bisect_right(left)
        r = self.data.bisect_right(right)
        if l == 0 or self.data.peekitem(l - 1)[1] < right:
            return False
        return True

    def removeRange(self, left: int, right: int) -> None:
        l = self.data.bisect_right(left)
        r = self.data.bisect_right(right)
        if l != 0:
            l -= 1
            if self.data.peekitem(l)[1] < left:
                l += 1
        if l != r:
            minLeft = min(left, self.data.peekitem(l)[0])
            maxRight = max(right, self.data.peekitem(r - 1)[1])
            for _ in range(l, r):
                self.data.popitem(l)
            if minLeft < left:
                self.data[minLeft] = left
            if right < maxRight:
                self.data[right] = maxRight
Пример #6
0
def test_bisect():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    assert temp.bisect_left('a') == 0
    assert temp.bisect_right('f') == 6
    assert temp.bisect('f') == 6
Пример #7
0
class SLIM(BaseMiner, MDLOptimizer):
    """SLIM: Directly Mining Descriptive Patterns

    SLIM looks for a compressed representation of transactional data.
    This compressed representation if a set of descriptive patterns,
    and can be used to:

    - provide a natively interpretable modeling of this data
    - make predictions on new data, using this condensed representation as an encoding scheme

    Idea of early stopping is inspired from
    http://eda.mmci.uni-saarland.de/pres/ida14-slimmer-poster.pdf


    Parameters
    ----------
    n_iter_no_change: int, default=100
        Number of candidate evaluation with no improvement to count before stopping optimization.
    tol: float, default=None
        Tolerance for the early stopping, in bits.
        When the compression size is not improving by at least `tol` for `n_iter_no_change`
        iterations, the training stops.
        Default to None, will be automatically computed considering the size of input data.
    pruning: bool, default=True
        Either to activate pruning or not. Pruned itemsets may be useful at
        prediction time, so it is usually recommended to set it to False
        to build a classifier. The model will be less concise, but will lead
        to more accurate predictions on average.


    Examples
    --------
    >>> from skmine.itemsets import SLIM
    >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']]
    >>> SLIM().fit(D).codetable  # doctest: +SKIP
    (butter, tea)         [2]
    (milk, bananas)    [0, 1]
    (cookies)          [1, 2]
    dtype: object

    References
    ----------
    .. [1]
        Smets, K & Vreeken, J
        "Slim: Directly Mining Descriptive Patterns", 2012

    .. [2] Gandhi, M & Vreeken, J
        "Slimmer, outsmarting Slim", 2014
    """
    def __init__(self, *, n_iter_no_change=100, tol=None, pruning=True):
        self.n_iter_no_change = n_iter_no_change
        self.tol = tol
        self.standard_codetable_ = None
        self.codetable_ = SortedDict()
        self.model_size_ = None  # L(CT|D)
        self.data_size_ = None  # L(D|CT)
        self.pruning = pruning

    def fit(self, D, y=None):  # pylint:disable = too-many-locals
        """fit SLIM on a transactional dataset

        This generate new candidate patterns and add those which improve compression,
        iteratibely refining ``self.codetable_``

        Parameters
        -------
        D: pd.DataFrame
            Transactional dataset, either as an iterable of iterables or encoded as tabular binary data
        """
        self._prefit(D, y=y)
        n_iter_no_change = 0
        seen_cands = set()

        tol = self.tol or self.standard_codetable_.map(len).median()

        while n_iter_no_change < self.n_iter_no_change:
            candidates = self.generate_candidates(stack=seen_cands)
            for cand, _ in candidates:
                data_size, model_size, update_d, prune_set = self.evaluate(
                    cand)
                diff = (self.model_size_ + self.data_size_) - (data_size +
                                                               model_size)

                if diff > 0.01:  # underflow
                    self.codetable_.update(update_d)
                    if self.pruning:
                        self.codetable_, data_size, model_size = self._prune(
                            self.codetable_, prune_set, model_size, data_size)

                    self.data_size_ = data_size
                    self.model_size_ = model_size

                if diff < tol:
                    n_iter_no_change += 1
                    if n_iter_no_change > self.n_iter_no_change:
                        break  # inner break

            if not candidates:  # if empty candidate generation
                n_iter_no_change += self.n_iter_no_change  # force while loop to break

        return self

    def decision_function(self, D):
        """Compute covers on new data, and return code length

        This function function is named ``decision_function`` because code lengths
        represent the distance between a point and the current codetable.

        Setting ``pruning`` to False when creating the model
        is recommended to cover unseen data, and especially when building a classifier.

        Parameters
        ----------
        D: pd.DataFrame or np.ndarray
            new data to make predictions on, in tabular format

        Example
        -------
        >>> from skmine.itemsets import SLIM; import pandas as pd
        >>> def to_tabular(D): return pd.Series(D).str.join('|').str.get_dummies(sep="|")
        >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']]
        >>> new_D = to_tabular([['cookies', 'butter']])
        >>> slim = SLIM().fit(to_tabular(D))
        >>> slim.decision_function(new_D)
        0   -1.321928
        dtype: float32
        """
        D = _check_D(D)
        codetable = pd.Series(self.codetable_)
        D_sct = {
            k: Bitmap(np.where(D[k])[0])
            for k in D.columns if k in self.standard_codetable_
        }
        covers = cover(D_sct, codetable.index)

        mat = np.zeros(shape=(len(D), len(covers)))
        for idx, tids in enumerate(covers.values()):
            mat[tids, idx] = 1
        mat = pd.DataFrame(mat, columns=covers.keys())

        code_lengths = codetable.map(len)
        ct_codes = code_lengths / code_lengths.sum()
        codes = (mat * ct_codes).sum(axis=1).astype(np.float32)
        # positive sign on log2 to return negative distance : sklearn]
        r = _log2(codes)
        r[r == 0] = -np.inf  # zeros would fool a `shortest code wins` strategy
        return r

    def generate_candidates(self, stack=None, thresh=1e3):
        """
        Generate candidates from the current codetable (SLIM is any-time)

        Note that `stack` is updated during the execution of this method.

        Parameters
        ----------
        stack: set[frozenset], default=None
            a stack of already-seen candidates to be excluded
        thresh: int, default=1_000
            if the size of the current codetable is higher than `thresh`,
            candidate are generated on-the-fly, and remain unsorted. If not,
            they are returned in a list, sorted by decreasing order of estimated gain

        Returns
        -------
        iterator[tuple(frozenset, Bitmap)]
        """
        ct = SortedDict(self._standard_candidate_order, self.codetable.items())
        # if big number of elements in codetable, just take a generator, do not sort output
        gen = generate_candidates if len(
            ct) < thresh else generate_candidates_big
        return gen(ct, stack=stack)

    def evaluate(self, candidate):
        """
        Evaluate ``candidate``, considering the current codetable and a dataset ``D``

        Parameters
        ----------
        candidate: frozenset
            a new candidate to be evaluated

        Returns
        -------
        (float, float, dict, set)
            updated (data size, model size, codetable)
            and finally the set of itemsets for which usage decreased
        """
        idx = self.codetable_.bisect(candidate)
        ct = list(self.codetable_)
        ct.insert(idx, candidate)
        D = {k: v.copy() for k, v in self.standard_codetable_.items()}
        CTc = cover(D, ct)

        updated, decreased = {candidate: CTc[candidate]}, set()
        for iset, usage in self.codetable_.items(
        ):  # TODO useless is size is too big
            if usage != CTc[iset]:
                updated[iset] = CTc[iset]
                if len(CTc[iset]) < len(usage):
                    decreased.add(iset)

        data_size, model_size = self._compute_sizes(
            CTc)  # TODO pruning in evaluate

        return data_size, model_size, updated, decreased

    def reconstruct(self):
        """reconstruct the original data from the current `self.codetable_`"""
        return reconstruct(self.codetable_)

    @lru_cache(maxsize=1024)
    def get_support(self, itemset):
        """Get support from an itemset"""
        U = reduce(Bitmap.intersection, self.standard_codetable_.loc[itemset])
        return len(U)

    def _standard_cover_order(self, itemset):
        """
        Returns a tuple associated with an itemset,
        so that many itemsets can be sorted in Standard Cover Order
        """
        return (-len(itemset), -self.get_support(itemset), tuple(itemset))

    def _standard_candidate_order(self, itemset):
        return (-self.get_support(itemset), -len(itemset), tuple(itemset))

    def _prefit(self, D, y=None):
        if hasattr(D, 'ndim') and D.ndim == 2:
            D = _check_D(D)
            if y is not None:
                D = supervised_to_unsupervised(D, y)  # SKLEARN_COMPAT
            item_to_tids = {k: Bitmap(np.where(D[k])[0]) for k in D.columns}
        else:
            item_to_tids = _to_vertical(D)
        self.standard_codetable_ = pd.Series(item_to_tids)
        usage = self.standard_codetable_.map(len).astype(np.uint32)

        ct_it = ((frozenset([e]), tids) for e, tids in item_to_tids.items())
        self.codetable_ = SortedDict(self._standard_cover_order, ct_it)

        codes = -_log2(usage / usage.sum())

        # L(code_ST(X)) = L(code_CT(X)), because CT=ST
        self.model_size_ = 2 * codes.sum()

        self.data_size_ = (codes * usage).sum()

        return self

    def _get_standard_codes(self, index):
        """compute the size of a codetable index given the standard codetable"""
        flat_items = list(chain(*index))
        items, counts = np.unique(flat_items, return_counts=True)

        usages = self.standard_codetable_.loc[items].map(len).astype(np.uint32)
        usages /= usages.sum()
        codes = -_log2(usages)
        return codes * counts

    def _compute_sizes(self, codetable):
        """
        Compute sizes for both the data and the model

        .. math:: L(D|CT)
        .. math:: L(CT|D)

        Parameters
        ----------
        codetable : Mapping
            A series mapping itemsets to their usage tids

        Returns
        -------
        tuple(float, float)
            (data_size, model_size)
        """
        isets, usages = zip(*((_[0], len(_[1])) for _ in codetable.items()
                              if len(_[1]) > 0))
        usages = np.array(usages, dtype=np.uint32)
        codes = -_log2(usages / usages.sum())

        stand_codes = self._get_standard_codes(isets)

        model_size = stand_codes.sum() + codes.sum(
        )  # L(CTc|D) = L(X|ST) + L(X|CTc)
        data_size = (codes * usages).sum()
        return data_size, model_size

    def _prune(self, codetable, prune_set, model_size, data_size):
        """post prune a codetable considering itemsets for which usage has decreased

        Parameters
        ----------
        codetable: SortedDict
        prune_set: set
            itemsets in ``codetable`` for which usage has decreased
        model_size: float
            current model_size for ``codetable``
        data_size: float
            current data size when encoding ``D`` with ``codetable``

        Returns
        -------
        new_codetable, new_data_size, new_model_size: SortedDict, float, float
            a tuple containing the pruned codetable, and new model size and data size
            w.r.t this new codetable
        """
        prune_set = {k for k in prune_set if len(k) > 1}  # remove singletons
        while prune_set:
            cand = min(prune_set, key=lambda e: len(codetable[e]))
            prune_set.discard(cand)

            ct = list(codetable)
            ct.remove(cand)

            D = {k: v.copy()
                 for k, v in self.standard_codetable_.items()
                 }  # TODO avoid data copies
            CTp = cover(D, ct)
            decreased = {
                k
                for k, v in CTp.items()
                if len(k) > 1 and len(v) < len(codetable[k])
            }

            d_size, m_size = self._compute_sizes(CTp)

            if d_size + m_size < model_size + data_size:
                codetable.update(CTp)
                del codetable[cand]
                prune_set.update(decreased)
                data_size, model_size = d_size, m_size

        return codetable, data_size, model_size
Пример #8
0
class StepVector():
    @classmethod
    def sliced(cls, other, start, end):
        newobj = cls(other.datatype, _tree=other._t, _bounds=(start, end))
        return newobj

    def __init__(self, datatype, _tree=None, _bounds=None):
        self.datatype = datatype

        if _tree is not None:
            self._t = _tree
        else:
            self._t = SortedDict()

        if _bounds is not None:
            self._bounds = _bounds
        else:
            self._bounds = (None, None)  # set upon slicing/subsetting

    def __getitem__(self, key):
        if type(key) == slice:
            if (key.step is not None) and (key.step != 1):
                raise ValueError("Invalid step value")

            start = key.start
            end = key.stop

            if self._bounds[0] is not None:
                if start is None:
                    start = self._bounds[0]
                else:
                    if start < self._bounds[0]:
                        raise ValueError("Start out of bounds")
            if self._bounds[1] is not None:
                if end is None:
                    end = self._bounds[1]
                else:
                    if end > self._bounds[1]:
                        raise ValueError("End out of bounds")

            return self.sliced(self, start, end)
        else:
            assert type(key) == int

            if self._bounds[0] is not None:
                if key < self._bounds[0]:
                    raise ValueError("Key out of bounds")
            if self._bounds[1] is not None:
                if key >= self._bounds[0]:
                    raise ValueError("Key out of bounds")

            if self._t:
                try:
                    prevkey = self._floor_key(key)
                    return self._t[prevkey]
                except KeyError:
                    # no item smaller than or equal to key
                    return self.datatype()
            else:
                # empty tree
                return self.datatype()

    def __setitem__(self, key, value):
        if type(key) == slice:
            start = key.start
            end = key.stop
        else:
            assert type(key) == int
            start = key
            end = key + 1

        assert start is not None
        assert end is not None

        assert type(value) == self.datatype
        assert end >= start

        if start == end:
            return

        # check next val
        if self._t:
            try:
                nkey = self._floor_key(end, bisect="right")
                nvalue = self._t[nkey]
            except KeyError:
                nkey = None
                nvalue = None
        else:
            # empty tree
            nkey = None
            nvalue = None

        # check prev val
        if self._t:
            try:
                pkey = self._floor_key(start)
                pvalue = self._t[pkey]
            except KeyError:
                pkey = None
                pvalue = None
        else:
            pkey = None
            pvalue = None

        # remove intermediate steps if any
        if self._t:
            a = self._t.bisect_left(start)
            b = self._t.bisect(end)
            assert a <= b
            del self._t.iloc[a:b]

        # set an end marker if necessary
        if nkey is None:
            self._t[end] = self.datatype()
        elif nvalue != value:
            self._t[end] = nvalue

        # set a start marker if necessary
        if pkey is None or pvalue != value:
            self._t[start] = value

    def __iter__(self):
        start, end = self._bounds

        if not self._t:
            # empty tree
            if start is None or end is None:
                raise StopIteration  # FIXME: can't figure out a better thing to do if only one is None
            else:
                if start < end:
                    yield (start, end, self.datatype())
                raise StopIteration

        if start is None:
            a = 0
        else:
            a = max(0, self._bisect_right(start) - 1)

        if end is None:
            b = len(self._t)
        else:
            b = self._bisect_right(end)

        assert b >= a
        if a == b:
            if a is None:
                start = self._t[a]
            if b is None:
                end = self._t[b]

            if start < end:
                yield (start, end, self.datatype())

            raise StopIteration

        it = self._t.islice(a, b)

        currkey = next(it)
        currvalue = self._t[currkey]
        if start is not None:
            currkey = max(start, currkey)
            if start < currkey:
                yield (start, currkey, self.datatype())

        prevkey, prevvalue = currkey, currvalue
        for currkey in it:
            currvalue = self._t[currkey]
            yield (prevkey, currkey, prevvalue)
            prevkey = currkey
            prevvalue = currvalue

        if end is not None:
            if currkey < end:
                yield (currkey, end, prevvalue)

    def add_value(self, start, end, value):
        assert type(value) == self.datatype

        # can't modify self while iterating over values; will change the tree, and thus f**k up iteration
        items = list(self[start:end])

        for a, b, x in items:
            if self.datatype == set:
                y = x.copy()
                y.update(value)
            else:
                y = x + value

            self[a:b] = y

    def _bisect_left(self, key):
        return self._t.bisect_left(key)

    def _bisect_right(self, key):
        return self._t.bisect_right(key)

    def _floor_key(self, key, bisect="left"):
        """
        Returns the greatest key less than or equal to key
        """

        if bisect == "right":
            p = self._bisect_right(key)
        else:
            p = self._bisect_left(key)

        if p == 0:
            raise KeyError
        else:
            return self._t.iloc[p - 1]
Пример #9
0
class SymReader:
    """
    Helper class that looks for symbols in multiple binaries given
    a memory map where they are loaded and an address.
    """
    def __init__(self, vmmap, path):

        self.vmmap = vmmap
        """Memory map used to locate the start address of sections."""

        self.paths = path
        """Search paths for binaries that contain symbols."""

        self._symbol_map = SortedDict()
        """Internal mapping of address to symbol name"""

        self.caprelocs = {}
        """Capability relocations, populated by fetch caprelocs"""

        self.captable_mappings = SortedDict()
        """Hold capability table mappings as start => (end, file)"""

        self._capreloc_fmt = struct.Struct(">5Q")
        """
        5 uint64 in order: reloc_target, object, offset, size, perms
        """

        self.loaded = []
        """List of loaded executables"""

        with ProgressTimer("Load symbols", logger):
            self._load_mapped()
            logger.debug("Symbol map %s", self)

    def _find_elf(self, vme_path):
        fname = os.path.basename(vme_path)
        if not fname:
            return None
        logger.debug("looking for %s", fname)
        for path in self.paths:
            bin_file = os.path.join(path, fname)
            if os.path.exists(bin_file):
                logger.info("Found symbols for %s (%s)", fname, bin_file)
                return bin_file
        logger.debug("No ELF found for %s", bin_file)
        return None

    def map_base(self, vme_path):
        """
        Find the base address where this file has been mapped
        in the vmmap
        """
        lower_addr = np.inf
        for vme in self.vmmap.get_model():
            if (os.path.basename(vme.path) == os.path.basename(vme_path)
                    and vme.start < lower_addr):
                lower_addr = vme.start
        if lower_addr == np.inf:
            return None
        return lower_addr

    def _load_mapped(self):
        for vme in self.vmmap.get_model():
            bin_file = self._find_elf(vme.path)
            if bin_file is None or bin_file in self.loaded:
                # is the file already been loaded?
                continue

            self.loaded.append(bin_file)
            elf_file = ELFFile(open(bin_file, "rb"))
            symtab = elf_file.get_section_by_name(".symtab")

            # do we need to relocate the addresses?
            if elf_file.header["e_type"] == "ET_DYN":
                map_base = self.map_base(vme.path)
            else:
                map_base = 0

            for sym in symtab.iter_symbols():
                if sym["st_shndx"] != ENUM_ST_SHNDX["SHN_UNDEF"]:
                    self._symbol_map[map_base + sym["st_value"]] = (sym.name,
                                                                    bin_file)
        kern_image = self._find_elf("kernel")
        kern_full = self._find_elf("kernel.full")
        if kern_full is not None:
            self.loaded.append(kern_full)
            self._load_kernel(kern_full)
        elif kern_image is not None:
            self.loaded.append(kern_image)
            self._load_kernel(kern_image)

    def _load_kernel(self, path):
        elf_file = ELFFile(open(path, "rb"))
        symtab = elf_file.get_section_by_name(".symtab")

        # the kernel should not be ET_DYN
        assert elf_file.header["e_type"] != "ET_DYN"
        for sym in symtab.iter_symbols():
            self._symbol_map[sym["st_value"]] = (sym.name, path)

    def __str__(self):
        data = io.StringIO()
        data.write("SymReader loaded symbols:\n")
        for addr, (sym, fname) in self._symbol_map.items():
            data.write("0x{:x} {} {}\n".format(addr, fname, sym))
        return data.getvalue()

    def fetch_caprelocs(self):
        """
        Populate the caprelocs map
        """
        for bin_file in self.loaded:
            elf_file = ELFFile(open(bin_file, "rb"))
            # grab __cap_relocs section
            relocs = elf_file.get_section_by_name("__cap_relocs")
            if relocs is None:
                logger.info("No capability relocations for %s", bin_file)
                continue

            # do we need to relocate the addresses?
            if elf_file.header["e_type"] == "ET_DYN":
                map_base = self.map_base(bin_file)
            else:
                map_base = 0

            unpacked_relocs = self._capreloc_fmt.iter_unpack(relocs.data())
            nrelocs = 0
            for reloc in unpacked_relocs:
                # caprelocs[target] = [base, offset, length, perms]
                nrelocs += 1
                self.caprelocs[map_base + reloc[0]] = reloc[1:]
            logger.info("Found caprelocs for %s, %d entries", bin_file,
                        nrelocs)

    def fetch_cap_tables(self):
        for path in self.loaded:
            elf = ELFFile(open(path, "rb"))
            if elf.header["e_type"] == "ET_DYN":
                map_base = self.map_base(path)
            else:
                map_base = 0
            assert map_base is not None
            # grab section with given name
            captable = elf.get_section_by_name(".cap_table")
            if captable is None:
                logger.info("No capability table for %s", path)
                continue
            sec_start = captable["sh_addr"] + map_base
            sec_end = sec_start + captable["sh_size"]
            logger.info("Found capability table %s @ [0x%x, 0x%x]", path,
                        sec_start, sec_end)
            self.captable_mappings[sec_start] = {"end": sec_end, "path": path}

    def get_captable(self, addr):
        index = self.captable_mappings.bisect(addr) - 1
        if index < 0:
            return None
        key = self.captable_mappings.iloc[index]
        if addr > self.captable_mappings[key]["end"]:
            return None
        return self.captable_mappings[key]["path"]

    def find_file(self, addr):
        """
        Find the file where the symbol at the given address is defined.
        """
        try:
            sym, fname = self.symbol_map[addr]
            return fname
        except KeyError:
            return None

    def find_symbol(self, addr):
        """
        Return the symbol where the address is found,
        if possible.
        """
        entry = self.find_address(addr)
        if entry:
            return entry[0]
        return None

    def find_address(self, addr):
        """
        Return the symbol and file where the address is found.
        """
        try:
            sym, fname = self._symbol_map[addr]
            return (sym, os.path.basename(fname))
        except KeyError:
            return None

    def find_function(self, addr):
        """
        Return the symbol and file of the function containing the
        given address, if possible.
        """
        index = self._symbol_map.bisect(addr) - 1
        if index < 0:
            return None
        key = self._symbol_map.iloc[index]
        sym, fname = self._symbol_map[key]
        return (sym, os.path.basename(fname))
Пример #10
0
class PiecewiseConstantFunction(Generic[T]):
    def __init__(self, initial_value: float = 0) -> None:
        """ Initialize the constant function to a particular value

        :param initial_value: the starting value for the function
        """
        self.breakpoints = SortedDict()
        self._initial_value: float = initial_value

    def add_breakpoint(self,
                       xval: XValue[T],
                       yval: float,
                       squash: bool = True) -> None:
        """ Add a breakpoint to the function and update the value

        Let f(x) be the original function, and next_bp be the first breakpoint > xval; after calling
        this method, the function will be modified to f'(x) = yval for x \in [xval, next_bp)

        :param xval: the x-position of the breakpoint to add/modify
        :param yval: the value to set the function to at xval
        :param squash: if True and f(xval) = yval before calling this method, the function will remain unchanged
        """
        if squash and self.call(xval) == yval:
            return
        self.breakpoints[xval] = yval

    def add_delta(self, xval: XValue[T], delta: float) -> None:
        """ Modify the function value for x >= xval

        Let f(x) be the original function; After calling this method,
        the function will be modified to f'(x) = f(x) + delta for all x >= xval

        :param xval: the x-position of the breakpoint to add/modify
        :param delta: the amount to shift the function value by at xval
        """
        if delta == 0:
            return

        if xval not in self.breakpoints:
            self.breakpoints[xval] = self.call(xval)

        for x in self.breakpoints.irange(xval):
            self.breakpoints[x] += delta

        self.values.cache_clear()
        self.integrals.cache_clear()

    def call(self, xval: XValue[T]) -> float:
        """ Compute the output of the function at a point

        :param xval: the x-position to compute
        :returns: f(xval)
        """
        if len(self.breakpoints) == 0 or xval < self.breakpoints.keys()[0]:
            return self._initial_value
        else:
            lower_index = self.breakpoints.bisect(xval) - 1
            return self.breakpoints.values()[lower_index]

    def _breakpoint_info(
        self, index: Optional[int]
    ) -> Tuple[Optional[int], Optional[XValue[T]], float]:
        """ Helper function for computing breakpoint information

        :param index: index of the breakpoint to compute
        :returns: (index, breakpoint, value)
          * index is the breakpoint index (if it exists), or None if we're off the end
          * breakpoint is the x-value of the breakpoint, or None if we're off the end
          * value is f(breakpoint), or f(last_breakpoint) if we're off the end
        """
        try:
            breakpoint, value = self.breakpoints.peekitem(index)
        except IndexError:
            index = None
            breakpoint, value = None, self.breakpoints.values()[-1]
        return (index, breakpoint, value)

    @lru_cache(maxsize=_LRU_CACHE_SIZE
               )  # cache results of calls to this function
    def values(self, start: XValue[T], stop: XValue[T],
               step: XValueDiff[T]) -> 'SortedDict[XValue[T], float]':
        """ Compute a sequence of values of the function

        This is more efficient than [self.call(xval) for xval in range(start, stop, step)] because each self.call(..)
        takes O(log n) time due to the binary tree structure of self._breakpoints.  This method can compute the range
        of values in linear time in the range, which is significantly faster for large value ranges.

        :param start: lower bound of value sequence
        :param stop: upper bound of value sequence
        :param step: width between points in the sequence
        :returns: a SortedDict of the values of the function between start and stop, with the x-distance between
            each data-point equal to `step`; like normal "range" functions the right endpoint is not included
        """

        step = step or (stop - start)
        if len(self.breakpoints) == 0:
            num_values = int(math.ceil((stop - start) / step))
            return SortedDict([(start + step * i, self._initial_value)
                               for i in range(num_values)])

        curr_xval = start
        curr_value = self.call(start)
        next_index, next_breakpoint, next_value = self._breakpoint_info(
            self.breakpoints.bisect(start))

        sequence = SortedDict()
        while curr_xval < stop:
            sequence[curr_xval] = curr_value

            next_xval = min(stop, curr_xval + step)
            while next_breakpoint and next_xval >= next_breakpoint:
                assert next_index is not None  # if next_breakpoint is set, next_index should also be set
                curr_value = next_value
                next_index, next_breakpoint, next_value = self._breakpoint_info(
                    next_index + 1)
            curr_xval = next_xval

        return sequence

    @lru_cache(maxsize=_LRU_CACHE_SIZE
               )  # cache results of calls to this function
    def integrals(
        self,
        start: XValue[T],
        stop: XValue[T],
        step: XValueDiff[T],
        transform: Callable[[XValueDiff[T]], float] = lambda x: cast(float, x),
    ) -> 'SortedDict[XValue[T], float]':
        """ Compute a sequence of integrals of the function

        :param start: lower bound of integral sequence
        :param stop: upper bound of integral sequence
        :param step: width of each "chunk" of the integral sequence
        :param transform: function to apply to x-widths before computing the integral
        :returns: a SortedDict of the numeric integral values of the function between start and stop;
            each integral has a range of size `step`, and the key-value is the left endpoint of the chunk
        """
        step = step or (stop - start)
        if len(self.breakpoints) == 0:
            # If there are no breakpoints, just split up the range into even widths and compute
            # (width * self._initial_value) for each chunk.
            step_width = transform(step)
            range_width = transform(stop - start)
            num_full_chunks = int(range_width // step_width)
            sequence = SortedDict([(start + step * i,
                                    step_width * self._initial_value)
                                   for i in range(num_full_chunks)])

            # If the width does not evenly divide the range, compute the last chunk separately
            if range_width % step_width != 0:
                sequence[
                    start + step *
                    num_full_chunks] = range_width % step_width * self._initial_value
            return sequence

        # Set up starting loop parameters
        curr_xval = start
        curr_value = self.call(start)
        next_index, next_breakpoint, next_value = self._breakpoint_info(
            self.breakpoints.bisect(start))

        # Loop through the entire range and compute the integral of each chunk
        sequence = SortedDict()
        while curr_xval < stop:
            orig_xval = curr_xval
            next_xval = min(stop, curr_xval + step)

            # For each breakpoint in [curr_xval, next_xval), compute the area of that sub-chunk
            next_integral: float = 0
            while next_breakpoint and next_xval >= next_breakpoint:
                assert next_index is not None  # if next_breakpoint is set, next_index should also be set
                next_integral += transform(next_breakpoint -
                                           curr_xval) * curr_value
                curr_xval = next_breakpoint
                curr_value = next_value
                next_index, next_breakpoint, next_value = self._breakpoint_info(
                    next_index + 1)

            # Handle any remaining width between the last breakpoint and the end of the chunk
            next_integral += transform(next_xval - curr_xval) * curr_value
            sequence[orig_xval] = next_integral

            curr_xval = next_xval

        return sequence

    def integral(
        self,
        start: XValue[T],
        stop: XValue[T],
        transform: Callable[[XValueDiff[T]], float] = lambda x: cast(float, x),
    ) -> float:
        """ Helper function to compute the integral of the whole specified range

        :param start: lower bound of the integral
        :param stop: upper bound of the integral
        :returns: the integral of the function between start and stop
        """
        return self.integrals(start, stop, (stop - start),
                              transform).values()[0]

    def __str__(self) -> str:
        ret = f'{self._initial_value}, x < {self.breakpoints.keys()[0]}\n'
        for xval, yval in self.breakpoints.items():
            ret += f'{yval}, x >= {xval}\n'
        return ret

    def __add__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
            self._initial_value + other._initial_value)
        for xval, y0, y1 in _merged_breakpoints(self, other):
            new_func.add_breakpoint(xval, y0 + y1)
        return new_func

    def __sub__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
            self._initial_value - other._initial_value)
        for xval, y0, y1 in _merged_breakpoints(self, other):
            new_func.add_breakpoint(xval, y0 - y1)
        return new_func

    def __mul__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
            self._initial_value * other._initial_value)
        for xval, y0, y1 in _merged_breakpoints(self, other):
            new_func.add_breakpoint(xval, y0 * y1)
        return new_func

    def __truediv__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        try:
            new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
                self._initial_value / other._initial_value)
        except ZeroDivisionError:
            new_func = PiecewiseConstantFunction()

        for xval, y0, y1 in _merged_breakpoints(self, other):
            try:
                new_func.add_breakpoint(xval, y0 / y1)
            except ZeroDivisionError:
                new_func.add_breakpoint(xval, 0)
        return new_func
Пример #11
0
class ModelDelayWrapper:
    """A wrapper around QuadrotorModel that delays observations, actions and introduces jitter to the controller
    execution.
    """
    __slots__ = "delay_scale", "model", "controller_jitter", "observation_delay", "observation_jitter", \
                "action_delay", "action_jitter", "controller_period", "past_states", "future_actions", "time",

    def __init__(self,
                 model: QuadrotorModel,
                 delay_scale: float = 1,
                 controller_jitter: Union[float, str] = 0,
                 controller_period: float = 0.01,
                 observation_delay: float = 0,
                 observation_jitter: Union[float, str] = 0,
                 action_delay: float = 0,
                 action_jitter: Union[float, str] = 0):
        """
        Creates a new ModelDelayWrapper wrapping the specified model.

        All times specified in this class are in seconds.

        Note:
            All jitters can either be specified as a float, in which case they are interpreted as a standard
            deviation of a mean centered gauss distribution from which to draw them, or as a path to a text file that
            can be loaded using np.loadtxt. The jitters are then generated by drawing random elements from the resulting
            numpy array. This feature is useful if you measured the jitter in your real hardware system and want to
            reproduce the jitter distribution in simulation as closely as possible.

        :param model: The model to wrap.
        :param delay_scale: Global scaling factor by which all other delays are multiplied. Default is 1.
        :param controller_jitter: The amount of jitter to apply to the control period.
        :param controller_period: The duration between calls to the controller. The simulation will advance this much on
        each call to the step method.
        :param observation_delay: The age of the state that is to be observed.
        :param observation_jitter: The amount of jitter in the age of the state that is observed.
        :param action_delay: The amount of time that has to pass before an action is applied to the system.
        :param action_jitter: The jitter to be added to the action delay.
        """

        if isinstance(observation_jitter, str):
            observation_jitter = np.loadtxt(observation_jitter)
        if isinstance(action_jitter, str):
            action_jitter = np.loadtxt(action_jitter)
        if isinstance(controller_jitter, str):
            controller_jitter = np.loadtxt(controller_jitter)

        observation_jitter = np.asarray(observation_jitter)
        action_jitter = np.asarray(action_jitter)
        controller_jitter = np.asarray(controller_jitter)

        assert observation_delay >= 0
        assert action_delay >= 0
        assert np.all(observation_jitter >= 0)
        assert np.all(action_jitter >= 0)
        assert delay_scale >= 0

        self.delay_scale = delay_scale
        self.model = model
        self.controller_jitter = controller_jitter
        self.observation_delay = observation_delay
        self.observation_jitter = observation_jitter
        self.action_delay = action_delay
        self.action_jitter = action_jitter
        self.controller_period = controller_period

        self.past_states = SortedDict()
        self.future_actions = SortedDict()
        self.time = None

    def reset(self, initial_action: np.ndarray,
              initial_state: SysState) -> (SysState, float, float):
        """
        Resets the state of the model delay wrapper by clearing all past states, future actions and initializing with
        the specified initial action and state.
        
        :param initial_action: The initial action to assume.
        :param initial_state: The initial state to assume
        :return: A triple consisting of
            1. the initial observed state (which is jus the initial state that as passed),
            2. the initial controller period and
            3. the initial observation age.
        """
        self.past_states = SortedDict()
        self.future_actions = SortedDict()

        observation_delay = self._sample_observation_delay()
        self.time = observation_delay
        self.past_states[0] = (initial_action, initial_state)
        return initial_state, self._sample_controller_period(
        ), observation_delay

    def compute_past_state(self, time: float) -> SysState:
        """
        Computes the state at some arbitrary point in the past.

        :param time: The time at which to compute the state.
        :return: The state at the time.
        """
        # Find last computed state just before the observed state in the state history
        # (sbt = state before t = most recent state before t)
        sbt_index = self.past_states.bisect(time) - 1
        assert sbt_index >= 0
        t_sbt, (sbt_action, sbt) = self.past_states.peekitem(sbt_index)
        assert time >= t_sbt

        return self.model.next_state(sbt, sbt_action, time - t_sbt)

    def compute_current_state(self) -> SysState:
        """
        Computes the current state of the system.

        :return: The current state of the system.
        """
        return self.compute_past_state(self.time)

    def step(self, action: np.ndarray) -> (SysState, float, float):
        """
        Applies an action to the delayed system and advances the time by the controller period.

        :param action: The action to execute (probably in the future because it is delayed).
        :return: A triple consisting of
            1. the observed state (probably from the past),
            2. the actual controller period (with jitter)
            3. The age of the observed state.
        """
        # Insert future action to the action schedule
        self.future_actions[self.time + self._sample_action_delay()] = action

        current_controller_period = self._sample_controller_period()
        self.time += current_controller_period
        self._materialize_past_states()

        # Sample time of observation
        t_obs = np.clip(self.time - self._sample_observation_delay(), 0,
                        self.time)

        # Compute observed state
        observed_state = self.compute_past_state(t_obs)

        return observed_state, current_controller_period, self.time - t_obs

    def _materialize_past_states(self):
        # Compute states that now lie in the past
        while len(self.future_actions) > 0 and self.future_actions.peekitem(
                0)[0] <= self.time:
            t_a, new_action = self.future_actions.popitem(0)
            t_sba, (old_action, sba) = self.past_states.peekitem()
            assert t_a - t_sba >= 0
            new_state = self.model.next_state(sba, old_action, t_a - t_sba)
            self.past_states[t_a] = (new_action, new_state)

    def _sample_action_delay(self):
        if self.action_jitter.ndim == 0:
            return np.clip(
                self.action_delay + np.random.randn() * self.action_jitter, 0,
                np.inf) * self.delay_scale
        else:
            return np.clip(
                self.action_delay + np.random.choice(self.action_jitter), 0,
                np.inf) * self.delay_scale

    def _sample_observation_delay(self):
        if self.observation_jitter.ndim == 0:
            return np.clip(self.observation_delay + np.random.randn() * self.observation_jitter, 0, np.inf) \
                   * self.delay_scale
        else:
            return np.clip(self.observation_delay + np.random.choice(self.observation_jitter), 0, np.inf) \
                   * self.delay_scale

    def _sample_controller_period(self):
        if self.controller_jitter.ndim == 0:
            return np.clip(
                self.controller_period +
                np.random.randn() * self.controller_jitter, 0, np.inf)
        else:
            return np.clip(
                self.controller_period +
                np.random.choice(self.controller_jitter), 0, np.inf)
def test_bisect_key():
    temp = SortedDict(modulo, 7, ((val, val) for val in range(100)))
    assert all(temp.bisect(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(temp.bisect_right(val) == ((val % 10) + 1) * 10 for val in range(100))
    assert all(temp.bisect_left(val) == (val % 10) * 10 for val in range(100))
Пример #13
0
# iterate through whole directory
for subdir, dirs, files in os.walk(photosDirectory) :
  for file in files :

    if isPhoto(file) :
      try :
        exif = getExif(os.path.join(subdir, file))
        if not cameraIsValid(exif) :
          continue
        # get focal length and convert from rational data type to float
        focalLength = exif[FOCALLENGTH_TAG][0] / exif[FOCALLENGTH_TAG][1]
        # count every focal length occurence in dictionary
        if (focalLength in occurences) :
          occurences[focalLength] = occurences[focalLength] + 1
        else:   # find nearest
          index = occurences.bisect(focalLength)
          greater = occurences.iloc[index]
          smaller = occurences.iloc[index - 1]
          nearestFL = greater if (greater - focalLength < focalLength - smaller) else smaller
          occurences[nearestFL] = occurences[nearestFL] + 1
      except (KeyError, TypeError, IndexError) :
        # there is no focal length info in image exif data (Key/Type/IndexError)
        pass

# plot the graph
position = arange(len(focalLengths)) + .5
barh(position, occurences.values(), align='center', color='#FF0000')
yticks(position, occurences.keys())
xlabel('Occurrences')
ylabel('Focal length')
title('Focal length usage analysis')
Пример #14
0
class GrowSpaceSortedEnv(gym.Env):
    def __init__(self,
                 width=DEFAULT_RES,
                 height=DEFAULT_RES,
                 light_dif=LIGHT_DIFFUSION):
        self.width = width
        self.height = height
        self.seed()
        self.light_dif = light_dif
        self.action_space = gym.spaces.Discrete(
            3)  # L, R, keep of light paddle
        self.observation_space = gym.spaces.Box(0,
                                                255,
                                                shape=(height, width, 3),
                                                dtype=np.uint8)
        self.steps = 0

        # data format for branches: they are indexed/sorted by x_end position and each
        # key has a list of values that are [y_end, x_start, y_start, children]

        self.branches = SortedDict()
        self.points = SortedDict()

    def seed(self, seed=None):
        return [np.random.seed(seed)]

    def light_move_R(self):
        if np.around(
                self.light_left,
                1) >= 1 - LIGHT_WIDTH - LIGHT_STEP:  # limit of coordinates
            self.light_left = 1 - LIGHT_WIDTH  # stay put
        else:
            self.light_left += LIGHT_STEP  # move by .1 right

    def light_move_L(self):
        if np.around(self.light_left, 1) <= LIGHT_STEP:  # limit of coordinates
            self.light_left = 0
        else:
            self.light_left -= LIGHT_STEP  # move by .1 left

    def find_closest_branch(self, point_x, branches):
        branch_names = []
        branch_distances = []
        # prefilter by x
        if len(branches) > MAX_BRANCHES:
            branches_trimmed = sample(branches, MAX_BRANCHES)
        else:
            branches_trimmed = branches
        for branch in branches_trimmed:
            dist_x = branch - point_x
            if np.abs(dist_x) <= MAX_GROW_DIST:
                # we got a potential candidate - now let's check Y
                dist_y = self.branches[branch][0] - self.points[point_x]
                if np.abs(dist_y) <= MAX_GROW_DIST:
                    dist = norm((dist_x, dist_y))
                    if dist <= MAX_GROW_DIST:
                        branch_names.append(branch)
                        branch_distances.append(dist)
        if len(branch_distances) == 0:
            return None, None
        argmin = np.argmin(branch_distances)
        return branch_names[argmin], branch_distances[argmin]

    def grow_plant(self):
        points_filtered = list(
            self.get_points_in_range(self.light_left - MAX_GROW_DIST,
                                     self.light_right + MAX_GROW_DIST))
        branches_filtered = list(
            self.get_branches_in_range(self.light_left, self.light_right))

        growths = {}  # will have the format: [(branch, target_x)]

        for point in points_filtered:
            closest_branch, dist = self.find_closest_branch(
                point, branches_filtered)
            if closest_branch is None:
                continue
            if dist < MIN_GROW_DIST:
                self.points.pop(point)
            elif dist < MAX_GROW_DIST:
                if closest_branch not in growths:
                    growths[closest_branch] = [point]
                else:
                    growths[closest_branch].append(point)

        for branch, points in growths.items():
            end_x = (branch +
                     (sum(points) / len(points) - branch) * BRANCH_LENGTH
                     )  # alternatively sum(poins)/len(points)
            branch_y = self.branches[branch][0]
            point_ys = [self.points[p] for p in points]
            end_y = branch_y + (sum(point_ys) / len(point_ys) -
                                branch_y) * BRANCH_LENGTH
            while end_x in self.branches:
                end_x += EPSILON  # keys need to be unique in branches dict
            self.branches[end_x] = [end_y, branch, self.branches[branch][0], 0]

        # update_all_branch_widths(branches)

    def get_points_in_range(self, start, end):
        return self.points.irange(start, end)  # this is dark SortedDict magic

    def get_branches_in_range(self, start, end):
        return self.branches.irange(start,
                                    end)  # this is dark SortedDict magic

    def branch_bisect_range(self, lower, upper):
        start = self.branches.bisect(lower)
        end = self.branches.bisect_right(upper)
        return self.branches[start:end]

    def get_branch_start_end_thiccness(self, end_x):
        end_y, start_x, start_y, children = self.branches[end_x]
        thicc = ir((children + 1) * BRANCH_THICCNESS * self.width)
        return (
            (ir(start_x * self.width), ir(start_y * self.height)),
            (ir(end_x * self.width), ir(end_y * self.height)),
            thicc,
        )

    def get_observation(self, debug_show_scatter=False):
        # new empty image
        img = np.zeros((self.height, self.width, 3), dtype=np.uint8)

        # place light as rectangle
        x1 = ir(self.light_left * self.width)
        x2 = ir(self.light_right * self.width)
        cv2.rectangle(img,
                      pt1=(x1, 0),
                      pt2=(x2, self.height),
                      color=LIGHT_COLOR,
                      thickness=-1)

        if debug_show_scatter:
            points_filtered = self.get_points_in_range(self.light_left,
                                                       self.light_right)
            for k in list(points_filtered):
                x = ir(k * self.width)
                y = ir(self.points[k] * self.height)
                cv2.circle(img,
                           center=(x, y),
                           radius=POINT_RADIUS,
                           color=POINT_COLOR,
                           thickness=-1)

        # Draw plant as series of lines (1 branch = 1 line)
        for branch_x_end in self.branches.keys():
            start, end, thiccness = self.get_branch_start_end_thiccness(
                branch_x_end)
            cv2.line(img,
                     pt1=start,
                     pt2=end,
                     color=PLANT_COLOR,
                     thickness=thiccness)

        # place goal as filled circle with center and radius
        # also important - place goal last because must be always visible
        x = ir(self.target[0] * self.width)
        y = ir(self.target[1] * self.height)
        cv2.circle(img,
                   center=(x, y),
                   radius=ir(0.03 * self.width),
                   color=(0, 0, 255),
                   thickness=-1)

        # flip image, because plant grows from the bottom, not the top
        img = cv2.flip(img, 0)

        return img

    def reset(self):
        random_start = np.random.rand()  # is in range [0,1
        self.branches.clear()
        self.points.clear()

        self.branches[random_start] = [FIRST_BRANCH_HEIGHT, random_start, 0, 0]

        self.target = [np.random.uniform(0, 1), np.random.uniform(0.8, 1)]
        if random_start >= (1 - LIGHT_WIDTH / 2):
            self.light_left = 1 - LIGHT_WIDTH
        elif random_start <= LIGHT_WIDTH / 2:
            self.light_left = 0
        else:
            self.light_left = random_start - (LIGHT_WIDTH / 2)

        self.light_right = self.light_left + LIGHT_WIDTH

        points_x = np.random.uniform(0, 1, self.light_dif)
        points_y = np.random.uniform(FIRST_BRANCH_HEIGHT + 0.1, 1,
                                     self.light_dif)

        for i in range(self.light_dif):
            while points_x[i] in self.points:
                points_x[i] += EPSILON
            self.points[points_x[i]] = points_y[i]

        self.steps = 0

        return self.get_observation()

    def step(self, action):
        # Two possible actions, move light left or right

        if action == 0:
            self.light_move_L()

        if action == 1:
            self.light_move_R()

        self.light_right = self.light_left + LIGHT_WIDTH

        if action == 2:
            # then we keep the light in place
            pass

        self.grow_plant()

        # # Calculate distance to target
        # reward = 1 / self.distance_target(tips)

        ####### TODO

        reward = 0  # TODO

        ####### TODO

        # Render image of environment at current state
        observation = self.get_observation()  # image

        done = False  # because we don't have a terminal condition
        misc = {
        }  # (optional) additional information about plant/episode/other stuff, leave empty for now
        # print("steps:", self.steps)    # sanity check
        self.steps += 1
        return observation, reward, done, misc

    def render(self,
               mode="human",
               debug_show_scatter=False):  # or mode="rgb_array"
        img = self.get_observation(debug_show_scatter)

        if mode == "human":
            cv2.imshow("plant", img)  # create opencv window to show plant
            cv2.waitKey(
                1)  # this is necessary or the window closes immediately
        else:
            return img
Пример #15
0
class SLIM(BaseMiner, MDLOptimizer, InteractiveMiner):
    """SLIM: Directly Mining Descriptive Patterns

    SLIM looks for a compressed representation of transactional data.
    This compressed representation if a set of descriptive patterns,
    and can be used to:

    - provide a natively interpretable modeling of this data
    - make predictions on new data, using this condensed representation as an encoding scheme


    Parameters
    ----------
    k: int, default=50
        Number of non-singleton itemsets to mine.
        A singleton is an itemset containing a single item.
    pruning: bool, default=True
        Either to activate pruning or not. Pruned itemsets may be useful at
        prediction time, so it is usually recommended to set it to `False`
        to build a classifier. The model will be less concise, but will lead
        to more accurate predictions on average.
    n_items: int, default=200
        Number of most frequent items to consider for mining.
        As SLIM is highly dependant from the set of symbols from which
        it refines its codetable,
        lowering this argument will significantly improve runtime.

        Note: The reconstruction is lossless from this set of items. If the input data
        has more than `n_items` items, then the reconstruction will be lossy w.r.t this
        input data.
    tol: float, default=0.5
        Minimum compression gain (in bits) for a candidate to be accepted


    Examples
    --------
    >>> from skmine.itemsets import SLIM
    >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']]
    >>> SLIM().fit(D).discover(singletons=True, usage_tids=True)
    (bananas, milk)    [0, 1]
    (butter, tea)         [2]
    (cookies,)         [1, 2]
    dtype: object

    References
    ----------
    .. [1]
        Smets, K & Vreeken, J
        "Slim: Directly Mining Descriptive Patterns", 2012

    .. [2] Gandhi, M & Vreeken, J
        "Slimmer, outsmarting Slim", 2014
    """
    def __init__(
        self,
        *,
        k=50,
        pruning=True,
        n_items=200,
        tol=0.5,
    ):
        self.n_items = n_items
        self.tol = tol
        self.standard_codetable_ = None
        self.codetable_ = SortedDict()
        self.model_size_ = None  # L(CT|D)
        self.data_size_ = None  # L(D|CT)
        self.pruning = pruning
        self.k = k

    def fit(self, D, y=None):  # pylint:disable = too-many-locals
        """fit SLIM on a transactional dataset

        This generate new candidate patterns and add those which improve compression,
        iteratibely refining ``self.codetable_``

        Parameters
        ----------
        D: iterable of iterables or array-like
            Transactional dataset, either as an iterable of iterables
            or encoded as tabular binary data
        """
        self.prefit(D, y=y)
        seen_cands = set()
        k = 0

        while k < self.k:
            candidates = self.generate_candidates(stack=seen_cands)
            for cand, _ in candidates:
                data_size, model_size, usages = self.evaluate(cand)
                diff = (self.model_size_ + self.data_size_) - (data_size +
                                                               model_size)

                if diff >= self.tol:
                    self.update(usages=usages,
                                data_size=data_size,
                                model_size=model_size)

                    k = sum(map(lambda iset: len(iset) > 1, self.codetable_))
                if k >= self.k:
                    break

            if not candidates:  # if empty candidate generation
                Warning(
                    f"could not find `{self.k}` itemsets, try with a lower `tol`"
                )
                break

        return self

    def decision_function(self, D):
        """Compute covers on new data, and return code length

        This function is named ``decision_function`` because code lengths
        represent the distance between a point and the current codetable.

        Setting ``pruning`` to False when creating the model
        is recommended to cover unseen data, and especially when building a classifier.

        Parameters
        ----------
        D: pd.DataFrame or np.ndarray
            new data to make predictions on, in tabular format

        Example
        -------
        >>> from skmine.itemsets import SLIM; import pandas as pd
        >>> def to_tabular(D): return pd.Series(D).str.join('|').str.get_dummies(sep="|")
        >>> D = [['bananas', 'milk'], ['milk', 'bananas', 'cookies'], ['cookies', 'butter', 'tea']]
        >>> new_D = to_tabular([['cookies', 'butter']])
        >>> slim = SLIM().fit(to_tabular(D))
        >>> slim.decision_function(new_D)
        0   -1.321928
        dtype: float32

        See Also
        --------
        cover
        discover
        """
        mat = self.cover(D)
        code_lengths = self.discover(singletons=True, usage_tids=False)
        ct_codes = code_lengths / code_lengths.sum()
        codes = (mat * ct_codes).sum(axis=1).astype(np.float32)
        # positive sign on log2 to return negative distance : sklearn]
        r = _log2(codes)
        r[r == 0] = -np.inf  # zeros would fool a `shortest code wins` strategy
        return r

    def generate_candidates(self, stack=set()):
        """
        Generate candidates from the current codetable (SLIM is any-time)

        Note that `stack` is updated during the execution of this method.

        Parameters
        ----------
        stack: set[frozenset], default=None
            a stack of already-seen candidates to be excluded

        Returns
        -------
        iterator[tuple(frozenset, Bitmap)]
        """
        ct = SortedDict(self._standard_candidate_order,
                        self.codetable_.items())
        return generate_candidates(ct, stack=stack)

    def evaluate(self, candidate):
        """
        Evaluate ``candidate``, considering the current codetable and a dataset ``D``

        Parameters
        ----------
        candidate: frozenset
            a new candidate to be evaluated

        Returns
        -------
        (float, float, dict)
            updated (data size, model size, codetable)
        """
        idx = self.codetable_.bisect(candidate)
        ct = list(self.codetable_)
        ct.insert(idx, candidate)
        D = {k: v.copy() for k, v in self.standard_codetable_.items()}
        CTc = cover(D, ct)

        decreased = set()
        for iset, usage in self.codetable_.items(
        ):  # TODO useless is size is too big
            if len(CTc[iset]) < len(usage):
                decreased.add(iset)

        data_size, model_size = self._compute_sizes(CTc)

        if self.pruning:
            CTc, data_size, model_size = self._prune(CTc, decreased,
                                                     model_size, data_size)

        return data_size, model_size, CTc

    def update(self,
               candidate=None,
               model_size=None,
               data_size=None,
               usages=None):
        """
        Update the current codetable.

        If `candidate` is passed as None, `model_size`, `data_size` and `usages` will be used
        If `candidate` is not None, `model_size`, `data_size` and `usages`
        will be computed by calling `.evaluate`

        Parameters
        ----------
        candidate: frozenset, default=None
            candidate to be inserted

        model_size: float, default=None
            new model size (in bits) to be set

        data_size: float
            new data size (in bits) to be set

        usages: dict, default=None
            optional for usage outside of this class
            eg. if one simply needs to include an itemset in the current codetable
            as in interactive data mining

        Raises
        ------
        AssertionError
        """
        assert not (candidate is None and usages is None)
        if usages is None:
            data_size, model_size, usages = self.evaluate(candidate)
        to_drop = {
            c
            for c in self.codetable_.keys() - usages.keys() if len(c) > 1
        }
        self.codetable_.update(usages)
        for iset in to_drop:
            del self.codetable_[iset]

        self.data_size_ = data_size
        self.model_size_ = model_size

    def cover(self, D):
        """
        cover unseen data

        items never seen are dropped out


        Examples
        --------
        >>> from skmine.itemsets import SLIM
        >>> D = ["ABC", "AB", "BCD"]
        >>> s = SLIM().fit(D)
        >>> s.cover(["BC", "AB"])
           (A, B)   (B,)   (C,)
        0   False   True   True
        1    True  False  False

        Returns
        -------
        pd.DataFrame
        """
        if hasattr(D, "shape") and len(D.shape) == 2:  # tabular
            D = _check_D(D)
            D_sct = {
                k: Bitmap(np.where(D[k])[0])
                for k in D.columns if k in self.standard_codetable_
            }
        else:  # transactional
            D_sct = _to_vertical(D)

        isets = self.discover(singletons=True, usage_tids=False)
        isets = isets[isets.index.map(set(D_sct).issuperset)]
        covers = cover(D_sct, isets.index)

        mat = np.zeros(shape=(len(D), len(covers)), dtype=bool)
        for idx, tids in enumerate(covers.values()):
            mat[tids, idx] = True
        return pd.DataFrame(mat, columns=list(covers.keys()))

    def discover(self,
                 singletons=False,
                 usage_tids=False,
                 drop_null_usage=True):
        """Get a user-friendly copy of the codetable

        Parameters
        ----------
        singletons: bool, default=False
            Either to include itemsets of length 1 in the result
        usage_tids: bool, default=False
            Either to return transaction ids for an itemset (usage) or its codelength
        drop_null_usage: bool, default=True
            Either to include itemset with no usage in the training data
            (i.e itemsets under cover of other itemsets)

        Example
        -------
        >>> from skmine.itemsets import SLIM
        >>> D = ["ABC", "AB", "BCD"]
        >>> SLIM().fit(D).discover(singletons=True, usage_tids=True, drop_null_usage=False)
        (A, B)    [0, 1]
        (B,)         [2]
        (A,)          []
        (C,)      [0, 2]
        (D,)         [2]
        dtype: object

        Returns
        -------
        pd.Series
            codetable containing patterns and ids of transactions in which they are used
        """
        s = {
            tuple(sorted(iset)): tids.copy()
            for iset, tids in self.codetable_.items()
            if len(tids) >= drop_null_usage and len(iset) > (not singletons)
        }
        s = pd.Series(list(s.values()), index=list(s.keys()))
        if not usage_tids:
            s = s.map(len).astype(np.uint32)
        return s

    def reconstruct(self):
        """reconstruct the original data from the current `self.codetable_`"""
        n_transactions = (max(
            map(Bitmap.max, filter(lambda e: e, self.codetable_.values()))) +
                          1)

        D = pd.Series([set()] * n_transactions)
        for itemset, tids in self.codetable_.items():
            D.iloc[list(tids)] = D.iloc[list(tids)].map(itemset.union)
        return D.map(sorted)

    @lru_cache(maxsize=1024)
    def get_support(self, *items):
        """
        Get support from an itemset

        Note
        ----
        Items in an itemset must be passed as positional arguments

        Unseen items will throw errors
        """
        a = items[-1]
        tids = self.standard_codetable_[a]
        if len(items) > 1:
            return tids & self.get_support(*items[:-1])
        return tids

    def _standard_cover_order(self, itemset):
        """
        Returns a tuple associated with an itemset,
        so that many itemsets can be sorted in Standard Cover Order
        """
        return (-len(itemset), -len(self.get_support(*itemset)),
                tuple(itemset))

    def _standard_candidate_order(self, itemset):
        return (-len(self.get_support(*itemset)), -len(itemset),
                tuple(itemset))

    def prefit(self, D, y=None):
        """
        Parameters
        ----------
        D: iterable of iterables or array-like
            Transactional dataset, either as an iterable of iterables
            or encoded as tabular binary data

        Note
        ----
        works in 3 steps

        1. ingest data `D`
        2. track bitmaps for the top `self.n_items` frequent items from `D`
        3. set `self.data_size_` and `self.model_size` given the standard codetable
        """
        if hasattr(D, "ndim") and D.ndim == 2:
            D = _check_D(D)
            if y is not None:
                D = supervised_to_unsupervised(D, y)  # SKLEARN_COMPAT
            item_to_tids = {k: Bitmap(np.where(D[k])[0]) for k in D.columns}
        else:
            item_to_tids = _to_vertical(D)
        sct = pd.Series(item_to_tids)
        usage = sct.map(len).astype(np.uint32)
        usage = usage.nlargest(self.n_items)
        sct = sct[usage.index]
        self.standard_codetable_ = sct

        ct_it = ((frozenset([e]), tids) for e, tids in sct.items())
        self.codetable_ = SortedDict(self._standard_cover_order, ct_it)

        codes = -_log2(usage / usage.sum())
        self._starting_codes = codes

        # L(code_ST(X)) = L(code_CT(X)), because CT=ST
        self.model_size_ = 2 * codes.sum()

        self.data_size_ = (codes * usage).sum()

        return self

    def _compute_sizes(self, codetable):
        """
        Compute sizes for both the data and the model

        .. math:: L(D|CT)
        .. math:: L(CT|D)

        Parameters
        ----------
        codetable : Mapping
            A series mapping itemsets to their usage tids

        Returns
        -------
        tuple(float, float)
            (data_size, model_size)
        """
        isets, usages = zip(*((_[0], len(_[1])) for _ in codetable.items()
                              if len(_[1]) > 0))
        usages = np.array(usages, dtype=np.uint32)
        codes = -_log2(usages / usages.sum())

        counts = Counter(chain(*isets))
        stand_codes_sum = sum(self._starting_codes[item] * ctr
                              for item, ctr in counts.items())

        model_size = stand_codes_sum + codes.sum(
        )  # L(CTc|D) = L(X|ST) + L(X|CTc)
        data_size = (codes * usages).sum()
        return data_size, model_size

    def _prune(self, codetable, prune_set, model_size, data_size):
        """post prune a codetable considering itemsets for which usage has decreased

        Parameters
        ----------
        codetable: SortedDict
        prune_set: set
            itemsets in ``codetable`` for which usage has decreased
        model_size: float
            current model_size for ``codetable``
        data_size: float
            current data size when encoding ``D`` with ``codetable``

        Returns
        -------
        new_codetable, new_data_size, new_model_size: SortedDict, float, float
            a tuple containing the pruned codetable, and new model size and data size
            w.r.t this new codetable
        """
        prune_set = {k for k in prune_set if len(k) > 1}  # remove singletons
        while prune_set:
            cand = min(prune_set, key=lambda e: len(codetable[e]))
            prune_set.discard(cand)

            ct = list(codetable)
            ct.remove(cand)

            D = {k: v.copy()
                 for k, v in self.standard_codetable_.items()
                 }  # TODO avoid data copies
            CTp = cover(D, ct)
            decreased = {
                k
                for k, v in CTp.items()
                if len(k) > 1 and len(v) < len(codetable[k])
            }

            d_size, m_size = self._compute_sizes(CTp)

            if d_size + m_size < model_size + data_size:
                codetable.update(CTp)
                del codetable[cand]
                prune_set.update(decreased)
                data_size, model_size = d_size, m_size

        return codetable, data_size, model_size
def test_bisect():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    assert temp.bisect_left('a') == 0
    assert temp.bisect_right('f') == 6
    assert temp.bisect('f') == 6
Пример #17
0
class TimeSeries(TictsMagicMixin, TictsOperationMixin, PandasMixin,
                 TictsIOMixin, TictsPlot):
    """ TimeSeries object.

    Args:
        default: The default value of timeseries.
        permissive (bool): Whether to allow accessing non-existing values or not.
            If is True, getting non existing item returns None.
            If is False, getting non existing item raises.
    """
    _default_interpolate = "previous"

    _meta_keys = ('default', 'name', 'permissive')

    @property
    def index(self):
        return self.data.keys()

    @property
    def lower_bound(self):
        """Return the lower bound time index."""
        if self.empty:
            return MINTS
        return self.index[0]

    @property
    def upper_bound(self):
        """Return the upper bound time index."""
        if self.empty:
            return MAXTS
        return self.index[-1]

    @property
    def _has_default(self):
        return self.default != NO_DEFAULT

    @property
    def _kwargs_special_keys(self):
        kwargs = {}
        for attr_name in self._meta_keys:
            kwargs[attr_name] = getattr(self, attr_name)
        return kwargs

    @property
    def empty(self):
        """Return whether the TimeSeries is empty or not."""
        return len(self) == 0

    def __init__(self,
                 data=None,
                 default=NO_DEFAULT,
                 name=DEFAULT_NAME,
                 permissive=True,
                 tz='UTC'):
        """"""
        if isinstance(data, self.__class__):
            for attr in ('data', *self._meta_keys):
                setattr(self, attr, getattr(data, attr))

            # Only set 'default' and 'name' if is different from default
            if default != NO_DEFAULT:
                setattr(self, 'default', default)
            if name != DEFAULT_NAME:
                setattr(self, 'name', name)
            return

        if hasattr(default, 'lower') and default.lower() == 'no_default':
            # 'no_default' as string is used at JSON serealization time
            self.default = NO_DEFAULT
        else:
            self.default = default

        self.name = name
        self.permissive = permissive

        # Overwrite the name if data is an instance of pd.DataFrame or pd.Series
        if isinstance(data, pd.DataFrame):
            if len(data.columns) != 1:
                msg = ("Can't convert a DataFrame with several columns into "
                       "one timeseries: {}.")
                raise ValueError(msg.format(data.columns))
            self.name = data.columns[0]

        elif isinstance(data, pd.Series):
            self.name = data.name

        try:
            tz = pytz.timezone(tz)
        except pytz.UnknownTimeZoneError:
            raise ValueError('{} is not a valid timezone'.format(tz))

        # SortedDict.__init__ does not use the __setitem__
        # Hence we got to parse datetime keys ourselves.
        # SortedDict use the first arg given and check if is a callable
        # in case you want to give your custom sorting function.
        self.data = SortedDict(None, _process_args(data, tz))

    def __setitem__(self, key, value):
        if isinstance(key, slice):
            return self.set_interval(key.start, key.stop, value)
        if key in self._meta_keys:
            super().__setitem__(key, value)
        else:
            key = timestamp_converter(key, self.tz)
            self.data[key] = value

    def __getitem__(self, key):
        """Get the value of the time series, even in-between measured values by interpolation.
        Args:
            key (datetime): datetime index
            interpolate (str): interpolate operator among ["previous", "linear"]
        """

        interpolate = self._default_interpolate

        if isinstance(key, tuple):
            if len(key) == 2:
                key, interpolate = key
            elif len(key) > 2:
                raise KeyError

        if isinstance(key, slice):
            return self.slice(key.start, key.stop)

        key = timestamp_converter(key, self.tz)

        basemsg = "Getting {} but default attribute is not set".format(key)
        if self.empty:
            if self._has_default:
                return self.default
            else:
                if self.permissive:
                    return
                else:
                    raise KeyError(
                        "{} and timeseries is empty".format(basemsg))

        if key < self.lower_bound:
            if self._has_default:
                return self.default
            else:
                if self.permissive:
                    return
                else:
                    msg = "{}, can't deduce value before the oldest measurement"
                    raise KeyError(msg.format(basemsg))

        # If the key is already defined:
        if key in self.index:
            return self.data[key]

        if interpolate.lower() == "previous":
            fn = self._get_previous
        elif interpolate.lower() == "linear":
            fn = self._get_linear_interpolate
        else:
            raise ValueError("'{}' interpolation unknown.".format(interpolate))

        return fn(key)

    def _get_previous(self, time):
        # In this case, bisect_left == bisect_right == bisect
        # And idx > 0 as we already handled other cases
        previous_idx = self.data.bisect(time) - 1
        time_idx = self.index[previous_idx]
        return self.data[time_idx]

    def _get_linear_interpolate(self, time):
        # TODO: put it into a 'get_previous_index' method
        idx = self.data.bisect_left(time)
        previous_time_idx = self.index[idx - 1]

        # TODO: check on left bound case

        # out of right bound case:
        if idx == len(self):
            return self.data[previous_time_idx]

        next_time_idx = self.index[idx]

        previous_value = self.data[previous_time_idx]
        next_value = self.data[next_time_idx]

        coeff = (time - previous_time_idx) / (
            next_time_idx - previous_time_idx)

        value = previous_value + coeff * (next_value - previous_value)
        return value

    def slice(self, start, end):  # noqa A003
        """Slice your timeseries for give interval.

        Args:
            start (datetime or str): lower bound
            end (datetime or str): upper bound

        Returns:
            TimeSeries sliced
        """
        start = timestamp_converter(start, self.tz)
        end = timestamp_converter(end, self.tz)

        newts = TimeSeries(**self._kwargs_special_keys)

        for key in self.data.irange(start, end, inclusive=(True, False)):
            newts[key] = self[key]

        should_add_left_closure = (start not in newts.index
                                   and start >= self.lower_bound)
        if should_add_left_closure:
            newts[start] = self[start]  # is applying get_previous on self

        return newts

    def set_interval(self, start, end, value):
        """Set a value for an interval of time.

        Args:
            start (datetime or str): lower bound
            end (datetime or str): upper bound
            value: the value to be set

        Returns:
            self

        Raises:
            NotImplementedError: when no default is set.
        """
        if not self._has_default:
            msg = "At the moment, you have to set a default for set_interval"
            raise NotImplementedError(msg)

        start = timestamp_converter(start, self.tz)
        end = timestamp_converter(end, self.tz)

        keys = self.data.irange(start, end, inclusive=(True, False))

        last_value = self[end]

        for key in list(keys):
            del self.data[key]

        self[start] = value
        self[end] = last_value

    def compact(self):
        """Convert this instance to a compact version: consecutive measurement of the
        same value are discarded.

        Returns:
            TimeSeries
        """
        ts = TimeSeries(**self._kwargs_special_keys)
        for time, value in self.items():
            should_set_it = ts.empty or (ts[time] != value)
            if should_set_it:
                ts[time] = value
        return ts

    def iterintervals(self, end=None):
        """Iterator that contain start, end of intervals.

        Args:
            end (datetime): right bound of last interval.
        """
        lst_keys = SortedList(self.index)
        if not end:
            end = self.upper_bound
        else:
            end = timestamp_converter(end, self.tz)
            if end not in lst_keys:
                lst_keys.add(end)

        for i, key in enumerate(lst_keys[:-1]):
            next_key = lst_keys[i + 1]
            if next_key > end:  # stop there
                raise StopIteration
            yield key, next_key

    def equals(self, other, check_default=True, check_name=True):
        if not isinstance(other, self.__class__):
            raise TypeError("Can't compare {} with {}".format(
                self.__class__.__name__, other.__class__.__name__))

        is_equal = self.data == other.data

        if check_default:
            is_equal = is_equal and self.default == other.default

        if check_name:
            is_equal = is_equal and self.name == other.name

        return is_equal

    @property
    def tz(self):
        if self.empty:
            return pytz.UTC
        return str(self.index[0].tz)

    def tz_convert(self, tz):
        try:
            tz = pytz.timezone(tz)
        except pytz.UnknownTimeZoneError:
            raise ValueError('{} is not a valid timezone'.format(tz))

        ts = deepcopy(self)

        for key in ts.index:
            ts[key.tz_convert(tz)] = ts.data.pop(key)

        return ts
Пример #18
0
def search(request):
    global new_dict
    if 'selection' in request.POST:
        spid = request.POST.get('selection')
        request.session['spid'] = spid
        col = {}
        for c in new_dict[spid]:
            f = 0
            e = 0
            for a in new_dict[spid][c]:
                f = f + 1
                obj = SomeObject()
                obj.args = {spid: {c: {a}}}
                ob = SomeObject.objects.filter(args=obj.args).count()
                e = e + ob
            col[c] = [f, e]
        print(col)
        context = {'col': col}
        return render(request, "search.html", context)
    if 'error' in request.POST:
        links = {}
        txt = []
        px = []
        spid = request.session['spid']
        c = request.POST.get('error')
        request.session['c'] = c
        cx = re.sub(r"\s([?.!',](?:\s|$))", r"\1", c)
        cx = cx.replace(" '", "'")
        for a in new_dict[spid][c]:
            obj = SomeObject()
            obj.args = {spid: {c: {a}}}
            ob = SomeObject.objects.filter(args=obj.args).count()
            fla = 0
            links[a] = ob
            a = a.split("_")
            URL = "https://en.wikipedia.org/?curid=" + a[0]
            ind = sd.bisect(int(a[0]))
            key = sd.iloc[ind]
            value = str(sd[key], 'utf-8')
            all_files = os.listdir("extracted/" + value + '/')
            temp = open("extracted/" + value + "/index.txt", "rb")
            dic = {}
            for all in all_files:
                for line in temp:
                    (key, val1) = line.split()
                    dic[int(val1)] = key
            sdi = SortedDict((key, value) for key, value in dic.items())
            ind = sdi.bisect(int(a[0]))
            key = sdi.iloc[ind - 1]
            val = str(sdi[key], 'utf-8')
            with bz2.open("extracted/" + value + "/" + val, "rt") as bz_file:
                p = []
                for line in bz_file:
                    if fla == 1:
                        if cx in line:
                            p.append(line.replace(cx, "<b>" + cx + "</b>"))
                    if doc in line:
                        n = re.search(" +id=\"(.*?)\"", line)
                        if n.group(1) == a[0]:
                            fla = 1
                            pp = re.search(" +title=\"(.*?)\"", line)
                            px.append(pp.group(1))
                        else:
                            fla = 0
            #ALTERNATE WAY IF YOU WANT TO GET TEXTS FROM ONLINE LINKS
            #r = requests.get(URL)
            #soup = BeautifulSoup(r.content, 'html5lib')
            #abc = soup.get_text().splitlines()
            #cx = re.sub(r"\s([?.!',](?:\s|$))", r"\1", c)
            #cx = cx.replace(" '","'")
            #px.append(soup.title.string)
            #p = [line for line in soup.get_text().splitlines() if cx in line]
            txt.append(p)
        link = OrderedDict(
            sorted(links.items(), key=lambda t: t[1], reverse=True))
        context = {'links': link, 'cols': cx, 'txt': txt, 'px': px}
        return render(request, "search.html", context)
    if 'store' in request.POST:
        st = request.POST.getlist('recommendations')
        spid = request.session['spid']
        c = request.session['c']
        for s in st:
            for a in new_dict[spid][c]:
                if a == s:
                    obj = SomeObject()
                    obj.args = {spid: {c: {a}}}
                    obj.user = request.user
                    if SomeObject.objects.filter(
                            args=obj.args, user=obj.user).exists() == False:
                        mission = "Thanks for your feedback"
                        obj.save()
                    else:
                        mission = "Already Exists"
        context = {'mission': mission}
        return render(request, "search.html", context)
    spid = request.session['spid']
    co = {}
    for c in new_dict[spid]:
        f = 0
        e = 0
        for a in new_dict[spid][c]:
            f = f + 1
            obj = SomeObject()
            obj.args = {spid: {c: {a}}}
            ob = SomeObject.objects.filter(args=obj.args).count()
            e = e + ob
        co[c] = [f, e]
    if 'alphabet' in request.POST:
        col = OrderedDict(sorted(co.items(), key=lambda t: t[0]))
    if 'frequency' in request.POST:
        col = OrderedDict(
            sorted(co.items(), key=lambda t: t[1][0], reverse=True))
    if 'marked' in request.POST:
        col = OrderedDict(
            sorted(co.items(), key=lambda t: t[1][1], reverse=True))
    context = {'col': col}
    return render(request, "search.html", context)
        # print(date)
        gpsinfo = exif['GPSInfo']
        print(gpsinfo)
        (lat, lng) = gpsTuplesToFloat(gpsinfo)
        # print(lng)
        datetimeLatLng[date] = (lat,lng)

# THETAの写真ファイル一覧を取得
theta_filenames = glob('R*.jpg')

# THETAの写真ファイルに直近のiPhoneの写真ファイルの緯度経度から時間で内挿した緯度経度を入れる
for filename in theta_filenames:
    exif2 = get_exif_of_image(filename)

    dto = dt.strptime(exif2['DateTimeOriginal'], '%Y:%m:%d %H:%M:%S')
    bisectDto = datetimeLatLng.bisect(dto)
    #print(bisectDto)
    # print(type(dto))
    # print(type(keys[0]))
    item1 = datetimeLatLng.items()[max(bisectDto - 1, 0)]
    item2 = datetimeLatLng.items()[min(bisectDto, len(datetimeLatLng) - 1)]
    print(item1)
    print(item2)
    # print(dto)
    k = (dto - item1[0])/(item2[0] - item1[0])
    lat = item1[1][0] + k * (item2[1][0] - item1[1][0])
    lng = item1[1][1] + k * (item2[1][1] - item1[1][1])
    print((dto, (lat,lng)))

    exif_dict2 = piexif.load(filename)
    exif_dict2['GPS'] = floatLatLngToGpsTuple((lat, lng))
Пример #20
0
for subdir, dirs, files in os.walk(photosDirectory):
    for file in files:

        if isPhoto(file):
            try:
                exif = getExif(os.path.join(subdir, file))
                if not cameraIsValid(exif):
                    continue
                # get focal length and convert from rational data type to float
                focalLength = exif[FOCALLENGTH_TAG][0] / exif[FOCALLENGTH_TAG][
                    1]
                # count every focal length occurence in dictionary
                if (focalLength in occurences):
                    occurences[focalLength] = occurences[focalLength] + 1
                else:  # find nearest
                    index = occurences.bisect(focalLength)
                    greater = occurences.iloc[index]
                    smaller = occurences.iloc[index - 1]
                    nearestFL = greater if (greater - focalLength <
                                            focalLength - smaller) else smaller
                    occurences[nearestFL] = occurences[nearestFL] + 1
            except (KeyError, TypeError, IndexError):
                # there is no focal length info in image exif data (Key/Type/IndexError)
                pass

# plot the graph
position = arange(len(focalLengths)) + .5
barh(position, occurences.values(), align='center', color='#FF0000')
yticks(position, occurences.keys())
xlabel('Occurrences')
ylabel('Focal length')