Пример #1
0
def sortinplace(items):
    """
    Sort items inplace per type (str and int supported) given the condition that
    the same types will occupy the same positions after the sort

    :param items: A list of string objects possibly containing digit chars
    :type items: list
    :return: A list of 'inplace per type' sorted items
    :rtype : list

    :Example:
    >sortinplace(['car', 'truck', '8', '4', 'bus', '6', '-1'])
    ['bus', 'car', '-1', '4', 'truck', '6', '8'])

    """
    # filter out indexes for both types, use comprehensions for readability
    # and compactness since input is small
    iidx = [i for i, item in enumerate(items) if item.strip('-').isdigit()]
    oidx = [i for i, item in enumerate(items) if not item.strip('-').isdigit()]

    # Sort indexes based on the value it points to
    siidx = sorted(iidx, key=lambda idx: int(items[idx]))
    soidx = sorted(oidx, key=lambda idx: items[idx])

    # Map the positions pre and post sort, merge the result and sort again
    # based on the pre sort keys.
    msidx = sorted(zip(iidx, siidx) + zip(oidx, soidx), key=getter(0))

    return [items[i[1]] for i in msidx]
Пример #2
0
def sortinplace(items):
    """
    Sort items inplace per type (str and int supported) given the condition that
    the same types will occupy the same positions after the sort

    :param items: A list of string objects possibly containing digit chars
    :type items: list
    :return: A list of 'inplace per type' sorted items
    :rtype : list

    :Example:
    >sortinplace(['car', 'truck', '8', '4', 'bus', '6', '-1'])
    ['bus', 'car', '-1', '4', 'truck', '6', '8'])

    """
    # filter out indexes for both types, use comprehensions for readability
    # and compactness since input is small
    iidx = [i for i, item in enumerate(items) if item.strip('-').isdigit()]
    oidx = [i for i, item in enumerate(items) if not item.strip('-').isdigit()]

    # Sort indexes based on the value it points to
    siidx = sorted(iidx, key=lambda idx: int(items[idx]))
    soidx = sorted(oidx, key=lambda idx: items[idx])

    # Map the positions pre and post sort, merge the result and sort again
    # based on the pre sort keys.
    msidx = sorted(zip(iidx, siidx) + zip(oidx, soidx), key=getter(0))

    return [items[i[1]] for i in msidx]
Пример #3
0
def majorityCnt(classList):
    classCount = {}
    for vote in classList:
        if vote not in classCount.keys():
            classCount[vote] = 0
        classCount += 1
    sortedClassCount = sorted(classCount.items(),
                              key=operator.getter(1),
                              reverse=True)
    return sortedClassCount[0][0]
Пример #4
0
    def __init__(self, imagefile, labelfile,
                 *, training=60, validation=15, test=25, seed=None):
        """Use the data in the given files to create a data set.

        The training, validation, and test sets' sizes are given as integer
        percentages. If they do not evenly divide the data, remaining data
        will be added to the test set.
        """
        if training + validation + test != 100:
            raise ValueError("Sum of data sets is not 100")
        images = load_files(imagefile, labelfile)


        bynum = {i: tuple(imgs) for i, imgs in groupby(images, key=getter('of'))}

        train = []
        valid = []
        test = []
        testids = []
        stats = {}
        for num, nimages in bynum.items():
            r = round(len(nimages) * training // 100)
            v = round(len(nimages) * validation // 100)
            train += nimages[:r]
            valid += nimages[r:r+v]
            test  += nimages[r+v:]
            stats[num] = MiniData(r, v, len(nimages)-r-v)
        # Now train, valid, and test are the data, ordered by number

        # This is the best time to convert the lists into dicts, if you want to.

        """
         I want to support these access methods:
        Get by number : int -> ImmutableCollection[Image]
        -- Get by set    : (set ->) ImmutableCollection[Image]
        Get set as dict: set -> { int: Image }
        Get by both   : set -> int -> ImmutableCollection[Image]
        Get as dict : -> { set: { int: Image } }
        -- Get as set
        """

        data = train + valid + test
        Data._randomizer.seed(Data._default_seed if seed is None else seed)
        Data._randomizer.shuffle(train)
        Data._randomizer.shuffle(valid)
        Data._randomizer.shuffle(test)
        Data._randomizer.shuffle(data)
        self._training = tuple(train)
        self._validation = tuple(valid)
        self._test = tuple(test)
        self._alldata = tuple(data)

        self.stats = stats
Пример #5
0
def get_relations(tag):
    sys.stdout.write(".")
    sys.stdout.flush()
    idx = tag["id"]
    return (
        tag,
        {
            "parents": client.getInheritanceData(tag["name"]),
            "group_list": sorted(map(getter("name"), client.getTagGroups(idx))),
            "dest_targets": client.getBuildTargets(destTagID=idx),
            "build_targets": client.getBuildTargets(buildTagID=idx),
            "external_repos": client.getTagExternalRepos(tag_info=idx),
        },
    )
Пример #6
0
    def execute(self, using, optimize=False): #TODO do ssomething with optimize
        first = getter(0)
        conditions = uniqify(self.conditions)
        #gather all indexed fields
        #TODO exclude those that can't be evaluated against (eg exact=None, etc)
        indexed = [c for c in conditions if first(c).indexed]
        unindexed = [c for c in conditions if c not in indexed]

        results = {} #TODO this could perhaps be cached - think about it
        filtered_results = set()

        #TODO order by type - check against the global type first, so that if
        #we get an empty result set, we can return none

        for index, group in groupby(indexed, lambda c:c.field.index(using)):
            q = None
            for condition in group:
                new_q = q_from_condition(condition)
                if not new_q:
                    break
                if q:
                    q &= new_q
                else:
                    q = new_q
            result_set = set(index.query(q))
            #TODO results is currently worthless
            results[q] = result_set
            #TODO also needs to match at least one type, if any have been provided
            #filter for unindexed conditions, as well
            filtered_result = set(n for n in result_set \
                               if all(matches_condition(n, c) for c in unindexed)\
                                  and n not in filtered_results)
            filtered_results |= filtered_result
            for r in filtered_result:
                yield r

        if unindexed or not indexed:
            return_filter = return_filter_from_conditions(unindexed + indexed)
            rel_types = [neo4j.Outgoing.get('<<TYPE>>'),
                         neo4j.Outgoing.get('<<INSTANCE>>')]
            type_node = self.nodetype._type_node(using)
            pages = type_node.traverse(types=rel_types,
                                            returnable=return_filter,
                                            page_size=QUERY_CHUNK_SIZE)
            for result_set in pages:
                filtered_result = set(n for n in result_set \
                                     if n not in filtered_results)
                filtered_results |= filtered_result
                for r in filtered_result:
                    yield r
Пример #7
0
def budget(y, **kwargs):
    r"""Return a sequence of `Influence` objects

    :arg y: :class:`~lib.UncertainReal` or :class:`~lib.UncertainComplex`:  an uncertain number

    :arg \**kwargs: Keyword arguments:

            * influences: a sequence of uncertain numbers
            * key (:class:`str`): a sorting key (``'u'`` or ``'label'``)
            * reverse (:class:`bool`): the sorting order (forward or reverse)
            * trim (:class:`float`): to control the smallest reported magnitudes
            * max_number (:class:`int`): to return no more than `max_number` components
            * intermediate (:class:`bool`): to report all intermediate components

    :returns: A sequence of :obj:`~named_tuples.Influence` namedtuples.

    Each :obj:`~named_tuples.Influence` has three attributes: ``label``, ``u``, ``uid``.
    
        * ``label`` is the label assigned to the uncertain number.
        * ``u`` is the value of the component of uncertainty (see :func:`~core.component`);
        * ``uid`` is the unique identifier for the uncertain number. 

    The keyword argument ``influences`` can be used to report 
    specific influences.

    The keyword argument ``key`` sets the sequence ordering to use 
    the component of uncertainty or the label, respectively, ``u`` or ``label``.

    The keyword argument ``reverse`` controls the sense of ordering.
    
    The keyword argument ``trim`` can be used to set the minimum relative 
    magnitude of components returned. Components of uncertainty greater 
    than ``trim`` times the largest component returned will be reported. 
    Set ``trim=0`` for a complete list.

    The keyword argument ``max_number`` can be used to restrict the 
    number of components returned.  
    
    The keyword argument ``intermediate`` will cause all components 
    of uncertainty with respect to all intermediate results to be reported.
    When ``intermediate`` is ``True``, ``influences`` cannot be specified. 

    **Examples**::

        >>> x1 = ureal(1,1,label='x1')
        >>> x2 = ureal(2,0.5,label='x2')
        >>> x3 = ureal(3,0.1,label='x3')
        >>> y = (x1 - x2) / x3
        >>> for i in reporting.budget(y):
        ...     print("{0}: {1:G}".format(i.label,i.u))
        ... 	
        x1: 0.333333
        x2: 0.166667
        x3: 0.0111111
        
        >>> for i in reporting.budget(y,reverse=False):
        ... 	print("{0}: {1:G}".format(i.label,i.u))
        ... 	
        x3: 0.0111111
        x2: 0.166667
        x1: 0.333333
 
        >>> y1 = result(x1 + x2,label='y1')
        >>> y2 = result(x2 + x3,label='y2')
        >>> for i in reporting.budget(y1 + y2,intermediate=True):
        ... 	print("{0}: {1:G}".format(i.label,i.u))
        ... 
        y1: 1.11803
        y2: 0.509902
      
    ..  versionchanged:: 1.3.7
        The `Influence` namedtuple has a third attribute `uid`
        
    ..  versionchanged:: 1.3.4
        Added the `intermediate` keyword argument. 
        
    """
    # Keyword options
    influences = kwargs.get('influences')
    key = kwargs.get('key', 'u')
    reverse = kwargs.get('reverse', True)
    trim = kwargs.get('trim', 0.01)
    max_number = kwargs.get('max_number')
    intermediate = kwargs.get('intermediate', False)

    # Some combinations are incompatible
    if intermediate and influences is not None:
        raise RuntimeError(
            "'influences' cannot be specified when 'intermediate' is True")

    if isinstance(y, UncertainReal):
        if influences is None and not intermediate:
            nodes = y._u_components.keys()
            uids = [n_i.uid for n_i in nodes]
            labels = [
                n_i.label if n_i.label is not None else "{}".format(n_i.uid)
                for n_i in nodes
            ]
            values = [math.fabs(u) for u in y._u_components.itervalues()]

            nodes = y._d_components.keys()
            uids += [n_i.uid for n_i in nodes]
            labels += [
                n_i.label if n_i.label is not None else "{}".format(n_i.uid)
                for n_i in nodes
            ]
            values += [math.fabs(u) for u in y._d_components.itervalues()]

        elif intermediate:
            # The argument 'y' could be in the list
            n_y_uid = y._node.uid if y.is_intermediate else 0

            uids = []
            labels = []
            values = []
            for n_i, u_i in y._i_components.iteritems():
                if n_i.uid == n_y_uid: continue  # Do not include 'y' itself

                uids.append(n_i.uid)
                labels.append(n_i.label if n_i.label is not None else "{}".
                              format(n_i.uid))
                values.append(math.fabs(u_i))

        elif influences is not None:
            uids = []
            labels = []
            values = []
            for i in influences:
                if isinstance(i, UncertainReal):
                    uids.append(i.uid)
                    labels.append(i.label)
                    values.append(math.fabs(u_component(y, i)))

                elif isinstance(i, UncertainComplex):
                    uids.append(i.real.uid)
                    labels.append(i.real.label)
                    values.append(math.fabs(u_component(y, i.real)))
                    uids.append(i.imag.uid)
                    labels.append(i.imag.label)
                    values.append(math.fabs(u_component(y, i.imag)))
                else:
                    raise RuntimeError("unexpected type: '{!r}'".format(i))
        else:
            assert False, "should never occur"

        if len(values):
            cut_off = max(values) * float(trim)
            this_budget = [
                Influence(label=n, u=u, uid=uid)
                for (u, n, uid) in izip(values, labels, uids) if u >= cut_off
            ]
        else:
            this_budget = []

    elif isinstance(y, UncertainComplex):
        if influences is None and not intermediate:

            # Ensure that the influence vectors have the same keys
            re = extend_vector(y.real._u_components, y.real._d_components)
            re = extend_vector(re, y.imag._u_components)
            re = extend_vector(re, y.imag._d_components)

            im = extend_vector(y.imag._u_components, y.imag._d_components)
            im = extend_vector(im, y.real._u_components)
            im = extend_vector(im, y.real._d_components)

            try:
                uids = []
                labels = []
                values = []
                it_re = re.iteritems()
                it_im = im.iteritems()

                while True:
                    ir_0, ur_0 = next(it_re)
                    ii_0, ui_0 = next(it_im)

                    if hasattr(ir_0, 'complex'):

                        # The next item is always the imaginary component
                        ir_1, ur_1 = next(it_re)
                        ii_1, ui_1 = next(it_im)

                        # Reduce the 4 components of uncertainty
                        # to a summary value
                        u = u_bar([ur_0, ur_1, ui_0, ui_1])

                        if ir_0.label is None:
                            # No label assigned, report uids
                            label = "uid({},{})".format(
                                uid_str(ir_0.uid), uid_str(ii_0.uid))
                        else:
                            # take the trailing _re off the real label
                            # to label the complex influence
                            label = ir_0.label[:-3]

                        uids.append(ir_0.complex)
                        labels.append(label)
                        values.append(u)

                    else:
                        # Report the component wrt a real influence
                        # this is still a matrix, which is then reduced
                        # to a summary value
                        u = u_bar([ur_0, 0, ui_0, 0])

                        if ir_0.label is None:
                            label = "uid({})".format(uid_str(ir_0.uid))
                        else:
                            label = ir_0.label

                        uids.append(ir_0.uid)
                        labels.append(label)
                        values.append(u)

            except StopIteration:
                pass

        elif intermediate:
            # The argument 'y' could be in the list
            if y.is_intermediate:
                n_yr_uid = y.real._node.uid
                n_yi_uid = y.imag._node.uid
            else:
                n_yr_uid = 0
                n_yi_uid = 0

            # Ensure that the influence vectors have the same keys
            re = extend_vector(y.real._i_components, y.imag._i_components)
            im = extend_vector(y.imag._i_components, y.real._i_components)

            try:
                uids = []
                labels = []
                values = []
                it_re = re.iteritems()
                it_im = im.iteritems()

                while True:
                    ir_0, ur_0 = next(it_re)
                    ii_0, ui_0 = next(it_im)

                    if hasattr(ir_0, 'complex'):

                        # The next item is always the imaginary component
                        ir_1, ur_1 = next(it_re)
                        ii_1, ui_1 = next(it_im)

                        # Skip these real and imaginary components of 'y'
                        if ir_0.uid == n_yr_uid or ii_0.uid == n_yi_uid:
                            continue

                        # Reduce the 4 components of uncertainty
                        # to a summary value
                        u = u_bar([ur_0, ur_1, ui_0, ui_1])

                        if ir_0.label is None:
                            # No label assigned, report uids
                            label = "uid({},{})".format(
                                uid_str(ir_0.uid), uid_str(ii_0.uid))
                        else:
                            # take the trailing _re off the real label
                            # to label the complex influence
                            label = ir_0.label[:-3]

                        uids.append(ir_0.complex)
                        labels.append(label)
                        values.append(u)

                    else:
                        # Report the component wrt a real influence
                        # this is still a matrix, which is then reduced
                        # to a summary value
                        u = u_bar([ur_0, 0, ui_0, 0])

                        if ir_0.label is None:
                            label = "uid({})".format(uid_str(ir_0.uid))
                        else:
                            label = ir_0.label

                        uids.append(ir_0.uid)
                        labels.append(label)
                        values.append(u)

            except StopIteration:
                pass

        elif influences is not None:
            uids = [i.uid for i in influences]
            labels = [i.label for i in influences]
            values = [u_bar(u_component(y, i)) for i in influences]

        else:
            assert False, "should never occur"

        if len(values):
            cut_off = max(values) * float(trim)
            this_budget = [
                Influence(label=n, u=u, uid=uid)
                for (u, n, uid) in izip(values, labels, uids) if u >= cut_off
            ]

        else:
            this_budget = []
    else:
        this_budget = []

    if key is not None:
        this_budget.sort(key=getter(key), reverse=reverse)

    if max_number is not None and len(this_budget) > max_number:
        this_budget = this_budget[:max_number]

    return this_budget
Пример #8
0
def budget(y,
           influences=None,
           key='u',
           reverse=True,
           trim=0.01,
           max_number=None):
    """Return a sequence of label-component of uncertainty pairs

    :arg y:  an uncertain number
    :type y: :class:`~lib.UncertainReal` or :class:`~lib.UncertainComplex`

    :arg influences:  a sequence of uncertain numbers

    :arg key: the list sorting key

    :arg reverse:  determines sorting order (forward or reverse)
    :type reverse: bool

    :arg trim:  remove components of uncertainty that are
                less than ``trim`` times the largest component
    
    :arg max_number: return no more than ``max_number`` components
    
    A sequence of :obj:`~named_tuples.Influence` namedtuples is 
    returned, each with the attributes ``label`` and ``u`` for a 
    component of uncertainty (see :func:`~core.component`). 

    The argument ``influences`` can be used to select the influences
    are that reported.

    The argument ``key`` can be used to order the sequence
    by the component of uncertainty or the label (``u`` or ``label``).

    The argument ``reverse`` controls the sense of ordering.
    
    The argument ``trim`` can be used to set a minimum relative 
    magnitude of components returned. Set ``trim=0`` for a 
    complete list.

    The argument ``max_number`` can be used to restrict the 
    number of components returned.  

    **Example**::

        >>> x1 = ureal(1,1,label='x1')
        >>> x2 = ureal(2,0.5,label='x2')
        >>> x3 = ureal(3,0.1,label='x3')
        >>> y = (x1 - x2) / x3
        >>> for l,u in reporting.budget(y):
        ... 	print("{0}: {1:G}".format(l,u))
        ... 	
        x1: 0.333333
        x2: 0.166667
        x3: 0.0111111
        
        >>> for l,u in reporting.budget(y,reverse=False):
        ... 	print("{0}: {1:G}".format(l,u))
        ... 	
        x3: 0.0111111
        x2: 0.166667
        x1: 0.333333
        
    """
    if isinstance(y, UncertainReal):
        if influences is None:
            nodes = y._u_components.keys()
            labels = [
                n_i.label if n_i.label is not None else "{}".format(n_i.uid)
                for n_i in nodes
            ]
            values = [math.fabs(u) for u in y._u_components.itervalues()]

            nodes = y._d_components.keys()
            labels += [
                n_i.label if n_i.label is not None else "{}".format(n_i.uid)
                for n_i in nodes
            ]
            values += [math.fabs(u) for u in y._d_components.itervalues()]
        else:
            labels = []
            values = []
            for i in influences:
                if isinstance(i, UncertainReal):
                    labels.append(i.label)
                    values.append(math.fabs(u_component(y, i)))
                elif isinstance(i, UncertainComplex):
                    labels.append(i.real.label)
                    values.append(math.fabs(u_component(y, i.real)))
                    labels.append(i.imag.label)
                    values.append(math.fabs(u_component(y, i.imag)))
                else:
                    assert False,\
                           "unexpected type: '{}'".format( type(i) )

        if len(values):
            cut_off = max(values) * float(trim)
            this_budget = [
                Influence(label=n, u=u) for (u, n) in izip(values, labels)
                if u >= cut_off
            ]
        else:
            this_budget = []

    elif isinstance(y, UncertainComplex):
        if influences is None:

            # Ensure that the influence vectors have the same keys
            re = extend_vector(y.real._u_components, y.real._d_components)
            re = extend_vector(re, y.imag._u_components)
            re = re = extend_vector(re, y.imag._d_components)

            im = extend_vector(y.imag._u_components, y.imag._d_components)
            im = extend_vector(im, y.real._u_components)
            im = extend_vector(im, y.real._d_components)

            try:
                labels = []
                values = []
                it_re = re.iteritems()
                it_im = im.iteritems()

                while True:
                    ir_0, ur_0 = next(it_re)
                    ii_0, ui_0 = next(it_im)

                    if hasattr(ir_0, 'complex'):
                        ir_1, ur_1 = next(it_re)
                        ii_1, ui_1 = next(it_im)

                        if ir_0.label is None:
                            # No label assigned, report uids
                            label = "uid({},{})".format(
                                uid_str(ir_0.uid), uid_str(ii_0.uid))
                        else:
                            # take the trailing _re off the real label
                            # to label the complex influence
                            label = ir_0.label[:-3]

                        u = u_bar([ur_0, ur_1, ui_0, ui_1])
                        labels.append(label)
                        values.append(u)

                    else:
                        # Not wrt a complex influence
                        if ir_0.label is None:
                            label = "uid({})".format(uid_str(ir_0.uid))
                        else:
                            label = ir_0.label

                        u = u_bar([ur_0, 0, ui_0, 0])
                        labels.append(label)
                        values.append(u)

            except StopIteration:
                pass
        else:
            labels = [i.label for i in influences]
            values = [u_bar(u_component(y, i)) for i in influences]

        if len(values):
            cut_off = max(values) * float(trim)
            this_budget = [
                Influence(label=n, u=u) for (u, n) in izip(values, labels)
                if u >= cut_off
            ]

        else:
            this_budget = []
    else:
        this_budget = []

    if key is not None:
        this_budget.sort(key=getter(key), reverse=reverse)

    if max_number is not None and len(this_budget) > max_number:
        this_budget = this_budget[:max_number]

    return this_budget