def sortinplace(items): """ Sort items inplace per type (str and int supported) given the condition that the same types will occupy the same positions after the sort :param items: A list of string objects possibly containing digit chars :type items: list :return: A list of 'inplace per type' sorted items :rtype : list :Example: >sortinplace(['car', 'truck', '8', '4', 'bus', '6', '-1']) ['bus', 'car', '-1', '4', 'truck', '6', '8']) """ # filter out indexes for both types, use comprehensions for readability # and compactness since input is small iidx = [i for i, item in enumerate(items) if item.strip('-').isdigit()] oidx = [i for i, item in enumerate(items) if not item.strip('-').isdigit()] # Sort indexes based on the value it points to siidx = sorted(iidx, key=lambda idx: int(items[idx])) soidx = sorted(oidx, key=lambda idx: items[idx]) # Map the positions pre and post sort, merge the result and sort again # based on the pre sort keys. msidx = sorted(zip(iidx, siidx) + zip(oidx, soidx), key=getter(0)) return [items[i[1]] for i in msidx]
def majorityCnt(classList): classCount = {} for vote in classList: if vote not in classCount.keys(): classCount[vote] = 0 classCount += 1 sortedClassCount = sorted(classCount.items(), key=operator.getter(1), reverse=True) return sortedClassCount[0][0]
def __init__(self, imagefile, labelfile, *, training=60, validation=15, test=25, seed=None): """Use the data in the given files to create a data set. The training, validation, and test sets' sizes are given as integer percentages. If they do not evenly divide the data, remaining data will be added to the test set. """ if training + validation + test != 100: raise ValueError("Sum of data sets is not 100") images = load_files(imagefile, labelfile) bynum = {i: tuple(imgs) for i, imgs in groupby(images, key=getter('of'))} train = [] valid = [] test = [] testids = [] stats = {} for num, nimages in bynum.items(): r = round(len(nimages) * training // 100) v = round(len(nimages) * validation // 100) train += nimages[:r] valid += nimages[r:r+v] test += nimages[r+v:] stats[num] = MiniData(r, v, len(nimages)-r-v) # Now train, valid, and test are the data, ordered by number # This is the best time to convert the lists into dicts, if you want to. """ I want to support these access methods: Get by number : int -> ImmutableCollection[Image] -- Get by set : (set ->) ImmutableCollection[Image] Get set as dict: set -> { int: Image } Get by both : set -> int -> ImmutableCollection[Image] Get as dict : -> { set: { int: Image } } -- Get as set """ data = train + valid + test Data._randomizer.seed(Data._default_seed if seed is None else seed) Data._randomizer.shuffle(train) Data._randomizer.shuffle(valid) Data._randomizer.shuffle(test) Data._randomizer.shuffle(data) self._training = tuple(train) self._validation = tuple(valid) self._test = tuple(test) self._alldata = tuple(data) self.stats = stats
def get_relations(tag): sys.stdout.write(".") sys.stdout.flush() idx = tag["id"] return ( tag, { "parents": client.getInheritanceData(tag["name"]), "group_list": sorted(map(getter("name"), client.getTagGroups(idx))), "dest_targets": client.getBuildTargets(destTagID=idx), "build_targets": client.getBuildTargets(buildTagID=idx), "external_repos": client.getTagExternalRepos(tag_info=idx), }, )
def execute(self, using, optimize=False): #TODO do ssomething with optimize first = getter(0) conditions = uniqify(self.conditions) #gather all indexed fields #TODO exclude those that can't be evaluated against (eg exact=None, etc) indexed = [c for c in conditions if first(c).indexed] unindexed = [c for c in conditions if c not in indexed] results = {} #TODO this could perhaps be cached - think about it filtered_results = set() #TODO order by type - check against the global type first, so that if #we get an empty result set, we can return none for index, group in groupby(indexed, lambda c:c.field.index(using)): q = None for condition in group: new_q = q_from_condition(condition) if not new_q: break if q: q &= new_q else: q = new_q result_set = set(index.query(q)) #TODO results is currently worthless results[q] = result_set #TODO also needs to match at least one type, if any have been provided #filter for unindexed conditions, as well filtered_result = set(n for n in result_set \ if all(matches_condition(n, c) for c in unindexed)\ and n not in filtered_results) filtered_results |= filtered_result for r in filtered_result: yield r if unindexed or not indexed: return_filter = return_filter_from_conditions(unindexed + indexed) rel_types = [neo4j.Outgoing.get('<<TYPE>>'), neo4j.Outgoing.get('<<INSTANCE>>')] type_node = self.nodetype._type_node(using) pages = type_node.traverse(types=rel_types, returnable=return_filter, page_size=QUERY_CHUNK_SIZE) for result_set in pages: filtered_result = set(n for n in result_set \ if n not in filtered_results) filtered_results |= filtered_result for r in filtered_result: yield r
def budget(y, **kwargs): r"""Return a sequence of `Influence` objects :arg y: :class:`~lib.UncertainReal` or :class:`~lib.UncertainComplex`: an uncertain number :arg \**kwargs: Keyword arguments: * influences: a sequence of uncertain numbers * key (:class:`str`): a sorting key (``'u'`` or ``'label'``) * reverse (:class:`bool`): the sorting order (forward or reverse) * trim (:class:`float`): to control the smallest reported magnitudes * max_number (:class:`int`): to return no more than `max_number` components * intermediate (:class:`bool`): to report all intermediate components :returns: A sequence of :obj:`~named_tuples.Influence` namedtuples. Each :obj:`~named_tuples.Influence` has three attributes: ``label``, ``u``, ``uid``. * ``label`` is the label assigned to the uncertain number. * ``u`` is the value of the component of uncertainty (see :func:`~core.component`); * ``uid`` is the unique identifier for the uncertain number. The keyword argument ``influences`` can be used to report specific influences. The keyword argument ``key`` sets the sequence ordering to use the component of uncertainty or the label, respectively, ``u`` or ``label``. The keyword argument ``reverse`` controls the sense of ordering. The keyword argument ``trim`` can be used to set the minimum relative magnitude of components returned. Components of uncertainty greater than ``trim`` times the largest component returned will be reported. Set ``trim=0`` for a complete list. The keyword argument ``max_number`` can be used to restrict the number of components returned. The keyword argument ``intermediate`` will cause all components of uncertainty with respect to all intermediate results to be reported. When ``intermediate`` is ``True``, ``influences`` cannot be specified. **Examples**:: >>> x1 = ureal(1,1,label='x1') >>> x2 = ureal(2,0.5,label='x2') >>> x3 = ureal(3,0.1,label='x3') >>> y = (x1 - x2) / x3 >>> for i in reporting.budget(y): ... print("{0}: {1:G}".format(i.label,i.u)) ... x1: 0.333333 x2: 0.166667 x3: 0.0111111 >>> for i in reporting.budget(y,reverse=False): ... print("{0}: {1:G}".format(i.label,i.u)) ... x3: 0.0111111 x2: 0.166667 x1: 0.333333 >>> y1 = result(x1 + x2,label='y1') >>> y2 = result(x2 + x3,label='y2') >>> for i in reporting.budget(y1 + y2,intermediate=True): ... print("{0}: {1:G}".format(i.label,i.u)) ... y1: 1.11803 y2: 0.509902 .. versionchanged:: 1.3.7 The `Influence` namedtuple has a third attribute `uid` .. versionchanged:: 1.3.4 Added the `intermediate` keyword argument. """ # Keyword options influences = kwargs.get('influences') key = kwargs.get('key', 'u') reverse = kwargs.get('reverse', True) trim = kwargs.get('trim', 0.01) max_number = kwargs.get('max_number') intermediate = kwargs.get('intermediate', False) # Some combinations are incompatible if intermediate and influences is not None: raise RuntimeError( "'influences' cannot be specified when 'intermediate' is True") if isinstance(y, UncertainReal): if influences is None and not intermediate: nodes = y._u_components.keys() uids = [n_i.uid for n_i in nodes] labels = [ n_i.label if n_i.label is not None else "{}".format(n_i.uid) for n_i in nodes ] values = [math.fabs(u) for u in y._u_components.itervalues()] nodes = y._d_components.keys() uids += [n_i.uid for n_i in nodes] labels += [ n_i.label if n_i.label is not None else "{}".format(n_i.uid) for n_i in nodes ] values += [math.fabs(u) for u in y._d_components.itervalues()] elif intermediate: # The argument 'y' could be in the list n_y_uid = y._node.uid if y.is_intermediate else 0 uids = [] labels = [] values = [] for n_i, u_i in y._i_components.iteritems(): if n_i.uid == n_y_uid: continue # Do not include 'y' itself uids.append(n_i.uid) labels.append(n_i.label if n_i.label is not None else "{}". format(n_i.uid)) values.append(math.fabs(u_i)) elif influences is not None: uids = [] labels = [] values = [] for i in influences: if isinstance(i, UncertainReal): uids.append(i.uid) labels.append(i.label) values.append(math.fabs(u_component(y, i))) elif isinstance(i, UncertainComplex): uids.append(i.real.uid) labels.append(i.real.label) values.append(math.fabs(u_component(y, i.real))) uids.append(i.imag.uid) labels.append(i.imag.label) values.append(math.fabs(u_component(y, i.imag))) else: raise RuntimeError("unexpected type: '{!r}'".format(i)) else: assert False, "should never occur" if len(values): cut_off = max(values) * float(trim) this_budget = [ Influence(label=n, u=u, uid=uid) for (u, n, uid) in izip(values, labels, uids) if u >= cut_off ] else: this_budget = [] elif isinstance(y, UncertainComplex): if influences is None and not intermediate: # Ensure that the influence vectors have the same keys re = extend_vector(y.real._u_components, y.real._d_components) re = extend_vector(re, y.imag._u_components) re = extend_vector(re, y.imag._d_components) im = extend_vector(y.imag._u_components, y.imag._d_components) im = extend_vector(im, y.real._u_components) im = extend_vector(im, y.real._d_components) try: uids = [] labels = [] values = [] it_re = re.iteritems() it_im = im.iteritems() while True: ir_0, ur_0 = next(it_re) ii_0, ui_0 = next(it_im) if hasattr(ir_0, 'complex'): # The next item is always the imaginary component ir_1, ur_1 = next(it_re) ii_1, ui_1 = next(it_im) # Reduce the 4 components of uncertainty # to a summary value u = u_bar([ur_0, ur_1, ui_0, ui_1]) if ir_0.label is None: # No label assigned, report uids label = "uid({},{})".format( uid_str(ir_0.uid), uid_str(ii_0.uid)) else: # take the trailing _re off the real label # to label the complex influence label = ir_0.label[:-3] uids.append(ir_0.complex) labels.append(label) values.append(u) else: # Report the component wrt a real influence # this is still a matrix, which is then reduced # to a summary value u = u_bar([ur_0, 0, ui_0, 0]) if ir_0.label is None: label = "uid({})".format(uid_str(ir_0.uid)) else: label = ir_0.label uids.append(ir_0.uid) labels.append(label) values.append(u) except StopIteration: pass elif intermediate: # The argument 'y' could be in the list if y.is_intermediate: n_yr_uid = y.real._node.uid n_yi_uid = y.imag._node.uid else: n_yr_uid = 0 n_yi_uid = 0 # Ensure that the influence vectors have the same keys re = extend_vector(y.real._i_components, y.imag._i_components) im = extend_vector(y.imag._i_components, y.real._i_components) try: uids = [] labels = [] values = [] it_re = re.iteritems() it_im = im.iteritems() while True: ir_0, ur_0 = next(it_re) ii_0, ui_0 = next(it_im) if hasattr(ir_0, 'complex'): # The next item is always the imaginary component ir_1, ur_1 = next(it_re) ii_1, ui_1 = next(it_im) # Skip these real and imaginary components of 'y' if ir_0.uid == n_yr_uid or ii_0.uid == n_yi_uid: continue # Reduce the 4 components of uncertainty # to a summary value u = u_bar([ur_0, ur_1, ui_0, ui_1]) if ir_0.label is None: # No label assigned, report uids label = "uid({},{})".format( uid_str(ir_0.uid), uid_str(ii_0.uid)) else: # take the trailing _re off the real label # to label the complex influence label = ir_0.label[:-3] uids.append(ir_0.complex) labels.append(label) values.append(u) else: # Report the component wrt a real influence # this is still a matrix, which is then reduced # to a summary value u = u_bar([ur_0, 0, ui_0, 0]) if ir_0.label is None: label = "uid({})".format(uid_str(ir_0.uid)) else: label = ir_0.label uids.append(ir_0.uid) labels.append(label) values.append(u) except StopIteration: pass elif influences is not None: uids = [i.uid for i in influences] labels = [i.label for i in influences] values = [u_bar(u_component(y, i)) for i in influences] else: assert False, "should never occur" if len(values): cut_off = max(values) * float(trim) this_budget = [ Influence(label=n, u=u, uid=uid) for (u, n, uid) in izip(values, labels, uids) if u >= cut_off ] else: this_budget = [] else: this_budget = [] if key is not None: this_budget.sort(key=getter(key), reverse=reverse) if max_number is not None and len(this_budget) > max_number: this_budget = this_budget[:max_number] return this_budget
def budget(y, influences=None, key='u', reverse=True, trim=0.01, max_number=None): """Return a sequence of label-component of uncertainty pairs :arg y: an uncertain number :type y: :class:`~lib.UncertainReal` or :class:`~lib.UncertainComplex` :arg influences: a sequence of uncertain numbers :arg key: the list sorting key :arg reverse: determines sorting order (forward or reverse) :type reverse: bool :arg trim: remove components of uncertainty that are less than ``trim`` times the largest component :arg max_number: return no more than ``max_number`` components A sequence of :obj:`~named_tuples.Influence` namedtuples is returned, each with the attributes ``label`` and ``u`` for a component of uncertainty (see :func:`~core.component`). The argument ``influences`` can be used to select the influences are that reported. The argument ``key`` can be used to order the sequence by the component of uncertainty or the label (``u`` or ``label``). The argument ``reverse`` controls the sense of ordering. The argument ``trim`` can be used to set a minimum relative magnitude of components returned. Set ``trim=0`` for a complete list. The argument ``max_number`` can be used to restrict the number of components returned. **Example**:: >>> x1 = ureal(1,1,label='x1') >>> x2 = ureal(2,0.5,label='x2') >>> x3 = ureal(3,0.1,label='x3') >>> y = (x1 - x2) / x3 >>> for l,u in reporting.budget(y): ... print("{0}: {1:G}".format(l,u)) ... x1: 0.333333 x2: 0.166667 x3: 0.0111111 >>> for l,u in reporting.budget(y,reverse=False): ... print("{0}: {1:G}".format(l,u)) ... x3: 0.0111111 x2: 0.166667 x1: 0.333333 """ if isinstance(y, UncertainReal): if influences is None: nodes = y._u_components.keys() labels = [ n_i.label if n_i.label is not None else "{}".format(n_i.uid) for n_i in nodes ] values = [math.fabs(u) for u in y._u_components.itervalues()] nodes = y._d_components.keys() labels += [ n_i.label if n_i.label is not None else "{}".format(n_i.uid) for n_i in nodes ] values += [math.fabs(u) for u in y._d_components.itervalues()] else: labels = [] values = [] for i in influences: if isinstance(i, UncertainReal): labels.append(i.label) values.append(math.fabs(u_component(y, i))) elif isinstance(i, UncertainComplex): labels.append(i.real.label) values.append(math.fabs(u_component(y, i.real))) labels.append(i.imag.label) values.append(math.fabs(u_component(y, i.imag))) else: assert False,\ "unexpected type: '{}'".format( type(i) ) if len(values): cut_off = max(values) * float(trim) this_budget = [ Influence(label=n, u=u) for (u, n) in izip(values, labels) if u >= cut_off ] else: this_budget = [] elif isinstance(y, UncertainComplex): if influences is None: # Ensure that the influence vectors have the same keys re = extend_vector(y.real._u_components, y.real._d_components) re = extend_vector(re, y.imag._u_components) re = re = extend_vector(re, y.imag._d_components) im = extend_vector(y.imag._u_components, y.imag._d_components) im = extend_vector(im, y.real._u_components) im = extend_vector(im, y.real._d_components) try: labels = [] values = [] it_re = re.iteritems() it_im = im.iteritems() while True: ir_0, ur_0 = next(it_re) ii_0, ui_0 = next(it_im) if hasattr(ir_0, 'complex'): ir_1, ur_1 = next(it_re) ii_1, ui_1 = next(it_im) if ir_0.label is None: # No label assigned, report uids label = "uid({},{})".format( uid_str(ir_0.uid), uid_str(ii_0.uid)) else: # take the trailing _re off the real label # to label the complex influence label = ir_0.label[:-3] u = u_bar([ur_0, ur_1, ui_0, ui_1]) labels.append(label) values.append(u) else: # Not wrt a complex influence if ir_0.label is None: label = "uid({})".format(uid_str(ir_0.uid)) else: label = ir_0.label u = u_bar([ur_0, 0, ui_0, 0]) labels.append(label) values.append(u) except StopIteration: pass else: labels = [i.label for i in influences] values = [u_bar(u_component(y, i)) for i in influences] if len(values): cut_off = max(values) * float(trim) this_budget = [ Influence(label=n, u=u) for (u, n) in izip(values, labels) if u >= cut_off ] else: this_budget = [] else: this_budget = [] if key is not None: this_budget.sort(key=getter(key), reverse=reverse) if max_number is not None and len(this_budget) > max_number: this_budget = this_budget[:max_number] return this_budget