Exemplos de groupby em Python, exemplos de utils.groupby em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: punktyadresowe_import.py Projeto: slachiewicz/osm-addr-tools

    def _checkDuplicatesInImport(self, data):
        addr_index = groupby(data, lambda x: (x.city, x.simc, x.housenumber.replace(' ', '').upper(), x.street))
        # remove duplicates closer than 2m
        for (addr, occurances) in filter(lambda x: len(x[1]) > 1, addr_index.items()):
            for (a, b) in filter(lambda x: distance(x[0].center, x[1].center) < 2,
                    itertools.combinations(occurances, 2)):
                # if any two duplicates are closer than 2m, remove from data
                self.__log.info("Removing duplicate address: %s", a)
                data.remove(a)

        # mark duplicates
        addr_index = groupby(data, lambda x: (x.city, x.simc, x.housenumber.replace(' ', '').upper(), x.street))
        for (addr, occurances) in filter(lambda x: len(x[1]) > 1, addr_index.items()):
            self.__log.warning("Duplicate addresses in import: %s", occurances[0])
            uid = uuid.uuid4()
            for i in occurances:
                i.addFixme('Duplicate address in import (id: %s)' % (uid,))
            if any(
                    map(
                        lambda x: distance(x[0].center, x[1].center) > 100,
                        itertools.combinations(occurances, 2)
                    )
                ):
                self.__log.warning("Address points doesn't fit into 100m circle. Points count: %d", len(occurances))
                for i in occurances:
                    i.addFixme('(distance over 100m, points: %d)' % (len(occurances),))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: trace.py Projeto: chirag2796/dyna

 def __init__(self, interp):
     self.interp = interp
     self.edges = infer_edges(self.interp)
     # group edges by head then ruleindex
     groups = groupby(lambda x: x[0], self.edges)
     for a in groups:
         groups[a] = groupby(lambda x: x[1], groups[a])
     self.items = groups

Exemplo n.º 3

0

Exibir arquivo

Arquivo: trace.py Projeto: benizi/dyna

 def __init__(self, interp):
     self.interp = interp
     self.edges = infer_edges(self.interp)
     # group edges by head then ruleindex
     groups = groupby(lambda x: x[0], self.edges)
     for a in groups:
         groups[a] = groupby(lambda x: x[1], groups[a])
     self.items = groups

Exemplo n.º 4

0

Exibir arquivo

Arquivo: ad_hoc.py Projeto: gmarceau/pynab

def liquid_position(budget, account_filter_keywords=None):
    import re
    from collections import OrderedDict
    from transactions import transactions_by_month
    from itertools import dropwhile

    if not account_filter_keywords:
        account_filter_keywords = non_liquid_account_keywords

    account_filter_re = '|'.join("({})".format(item)
                                 for item in account_filter_keywords)

    def is_relevant_account(account):
        return not account.hidden and \
            not re.search(account_filter_re, account.accountName) and \
            not re.search(account_filter_re, account.accountType)

    relevant_accounts = {
        acc.entityId
        for acc in budget.accounts if is_relevant_account(acc)
    }

    by_account = {
        acc: txs
        for acc, txs in groupby(budget.transactions,
                                key=lambda t: t.accountId).items()
        if acc in relevant_accounts
    }

    for acc, txs in by_account.items():
        by_account[acc] = play_transactions(txs, accountId=acc, decorate=True)

    months = sorted([mb.month[0:7] for mb in budget.monthlyBudgets])
    result = OrderedDict({m: Object() for m in months})

    for acc, txs in by_account.items():
        by_month = groupby(txs, key=transaction_month)
        balance = 0
        for month in months:
            if by_month.get(month):
                balance = by_month[month][-1].balance
            print('--46', month, balance, len(by_month.get(month, [])))

            pprint(by_month.get(month, []))
            result[month][acc.lookup().accountName] = balance

    for month_result in result.values():
        month_result['total'] = sum(month_result.values())

    return OrderedDict(dropwhile(lambda m: m[1].total == 0, result.items()))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: views.py Projeto: lebenf/couchtv

def view_title(request,title_id):
    title=get_object_or_404(models.Title,pk=title_id)
    relations_member=models.Relation.objects.filter(title=title.id)
    #title is part of a relation (child)
    relations_parent=models.Relation.objects.filter(parent=title.id)
    #title is part of a relation (parent)
    if relations_parent and relations_parent[0].relation == 'T':
        relations_season = utils.groupby(relations_parent, key=lambda x:getattr(x,'tvseason'))
    else:
        relations_season = {}
    directors=title.cast_set.filter(role='D')
    actors=title.cast_set.filter(role='A').order_by('id')
    akas=models.Aka.objects.filter(title=title.id)
    if request.user.is_anonymous():
        userdata=None
    else:
        userdata=title.get_userdata(request.user)
    #print userdata[0].tag
    data={'title':title,
          'relations_member':relations_member,
          'relations_parent':relations_parent,
          'relations_season':relations_season,
          'directors':directors,
          'userdata':userdata,
          'actors':actors,
          'akas':akas
          }
    return data

Exemplo n.º 6

0

Exibir arquivo

    def group_with_topk_nodes():
        from utils import group_around_topk_costs
        from tge import TGE

        base_groups = TGE(gdef, [dev for dev, _, _ in devices]).get_groups()
        id_list = group_around_topk_costs(gdef, base_groups, prof_data, 19)
        return list(groupby(enumerate(id_list), key=cadr, value=car).values())

Exemplo n.º 7

0

Exibir arquivo

Arquivo: step1.py Projeto: aviranzerioniac/beautiful-tucan

def inner_join(courses: Iterable[Course], modules: Iterable[Module]
) -> Iterable[Module]:
    modules = {item['module_id']:item for item in modules}
    courses = ((module_id, item) for item in courses
                                 for module_id in item['modules']
                                 if module_id in modules)
    result = {k:merge_course(g, modules[k])
            for k,g in utils.groupby(courses, key=lambda x:x[0])}
    for k,v in list(result.items()):
        if len(v["details"]) < 1:
            continue

        modtitle = v["details"][1]["details"]
        if " nur Teilnahme" in modtitle:
            del result[k]
            continue

        if not (len(v["content"]) > 1
        and all(c.split(" ", 1)[0][-3:] in ["-ps","-se","-ku"]
                for c in v["content"])):
            continue

        del result[k]
        for i,c in enumerate(v["content"]):
            id,name = c.split(" ",1)
            newtitle = id + " " + modtitle + ". " + name
            newmodid = k+"-"+str(i).zfill(2)
            #print(newmodid, newtitle)
            result[newmodid] = {**v, "module_id": newmodid, "content":
              {newtitle:{**v["content"][c], "title":newtitle}} }
    return result

Exemplo n.º 8

0

Exibir arquivo

Arquivo: classify.py Projeto: jpassaro/seq-align-prep

def full_transposon_treatment(seq,overlap,gap,minlength,fastaout,evalue=None,
                              fname=None):
    '''This is where it all comes together. This takes a sequence of
    hits, assumed to constitute an entire a blast search between one
    transposon and one fly genome. (See note below.)  It performs the
    main process of this module -- i.e., creating the input for a
    multiple-alignment -- and dumps that information in FASTA format
    to *fastaout*, which must be a writeable fasta object (see module
    *fasta*). The user is naturally responsible for closing both, if
    appropriate (as it is in almost all cases).

    NOTE: Generally it is best to have *seq* come from the function
    hitsfromcsv(). This can be done implicitly by giving None as the
    first argument, in which case *f* is expected to be a file object
    or filename to be given to hitstocsv().
    '''
    if None not in (seq,fname):
          raise Error("Cannot give both seq and fname arguments")
    elif seq is None: seq = hitsfromcsv(fname)
    for s,hits in utils.groupby(seq,key=_attrget('SSEQID')).iteritems():
       for island in makeislands(hits,gap):
          singles,nests = classifyrecords(island,overlap)
          nests = [stratify(N,minlength) for N in nests]
          if singles or any(nests):
            fastaout.writeentries(resolve_query_overlap(singles,nests,overlap))
          else: raise Error('No records result from file {!r}'.format(fname))

Exemplo n.º 9

0

Exibir arquivo

 def fit(self, X, Y):
     self.category_means = {}
     ys_grouped_by_x = groupby(zip(X,Y), keyfunc=lambda x_y: x_y[0], mapfunc=lambda x_y: x_y[1])
     for (x, ys) in ys_grouped_by_x.items():
         self.category_means[x] = mean(ys)
     for (k,v) in self.overrides_dict.items():
         self.category_means[k] = v

Exemplo n.º 10

0

Exibir arquivo

 def fetchTiles(self):
     bbox = self.getBbox2180()
     ret = []
     for i in self.divideBbox(*bbox):
         url = GUGiK.__base_url + ",".join(map(str, i))
         self.__log.info("Fetching from EMUIA: %s", url)
         soup = lxml.etree.fromstring(urlopen(url).read())
         doc = soup.find('{http://www.opengis.net/kml/2.2}Document'
                         )  # be namespace aware
         if doc is not None:
             ret.extend(
                 filter(
                     self._isEligible,
                     map(
                         self._convertToAddress,
                         doc.iterchildren(
                             '{http://www.opengis.net/kml/2.2}Placemark'))))
         else:
             raise ValueError(
                 'No data returned from GUGiK possibly to wrong scale. Check __MAX_BBOX_X, __MAX_BBOX_Y, HEIGHT and WIDTH'
             )
     # take latest version for each point (version is last element after dot in id_)
     ret = [
         max(v, key=lambda z: z.id_)
         for v in groupby(ret, lambda z: z.id_.rsplit('.', 1)[0]).values()
     ]
     return ret

Exemplo n.º 11

0

Exibir arquivo

    def _checkDuplicatesInImport(self, data):
        super(GUGiK, self)._checkDuplicatesInImport(data)
        addr_index = groupby(
            filter(lambda x: 'Duplicate address in import' in x.fixmes,
                   data), lambda x:
            (x.city, x.housenumber.replace(' ', '').upper(), x.street))

        for (addr, occurances) in filter(lambda x: len(x[1]) > 1,
                                         addr_index.items()):
            for n in range(len(occurances) - 1):
                addr1 = occurances[n]
                addr2 = occurances[n + 1]
                if distance(addr1.center, addr2.center) < 10:
                    # addresses are closer than 10m
                    # remove first, move second to average of these two
                    self.__log.info(
                        "Merging duplicate addresses: %s, position: %s and %s"
                        % (addr1, addr1.getLatLon(), addr2.getLatLon()))
                    data.remove(addr1)
                    l1 = addr1.getLatLon()
                    l2 = addr2.getLatLon()
                    addr2.location = {
                        'lat': (l1[0] + l2[0]) / 2,
                        'lon': (l1[1] + l2[1]) / 2
                    }

Exemplo n.º 12

0

Exibir arquivo

Arquivo: io.py Projeto: Grimbly/brickrake

def load_bsx(f):
  """Parse all items from a Brickstore Parts List XML file (*.bsx)

  Parameters
  ----------
  f : file-like object
      file containing XML contents"""
  root = etree.parse(f)
  items = []
  for item in root.findall('.//Item'):
    item_dict = {}
    for child in item.getchildren():
      tag = child.tag
      value = CONVERT.get(child.tag, lambda x: x)(child.text)
      item_dict[tag] = value
    items.append(item_dict)

  # sometimes there are multiple wanted lots with the same ItemID and ColorID.
  # Consolidate them together now.
  by_item = utils.groupby(items, lambda x: (x['ItemID'], x['ColorID']))
  result = []
  for ((item_id, color_id), same) in by_item.iteritems():
    prototype = same[0]
    prototype['Qty'] = sum(e['Qty'] for e in same)
    result.append(prototype)
  return result

Exemplo n.º 13

0

Exibir arquivo

Arquivo: profiler.py Projeto: Bevinsky/HU_g9

	def process_data(self, data):
		# all data is not grouped?
		day = utils.groupby(data, xkey=lambda x: x.time.weekday())[self.weekday]
		day = utils.collect_total(day, True)
		for m in day:
			day[m] = day[m] # watt i medel
		
		self.data = day

Exemplo n.º 14

0

Exibir arquivo

    def _checkDuplicatesInImport(self, data):
        addr_index = groupby(
            data, lambda x:
            (x.city, x.housenumber.replace(' ', '').upper(), x.street))

        for (addr, occurances) in filter(lambda x: len(x[1]) > 1,
                                         addr_index.items()):
            self.__log.warning("Duplicate addresses in import: %s", addr)
            for i in occurances:
                i.addFixme('Duplicate address in import')

Exemplo n.º 15

0

Exibir arquivo

Arquivo: report.py Projeto: stjordanis/parsing-int-series

    def split_by_distribution(self, collection):
        result = []

        bynum = lambda item: (item.distribution_name)
        tmp = groupby(collection, bynum)
        for distribution_name, collection in tmp.iteritems():
            res = self.split_by_parameters(distribution_name, collection)
            result.append((get_distribution_title(distribution_name), res))

        return result

Exemplo n.º 16

0

Exibir arquivo

Arquivo: report.py Projeto: stjordanis/parsing-int-series

    def prepare_table(self, procedures):

        keyfun = lambda item: (item.size, item.loops)
        tmp = groupby(procedures, keyfun)

        data = []
        for (size, loops), items in tmp.iteritems():

            def get_time(procedure):
                for item in items:
                    if item.procedure == procedure:
                        return item.time

                raise KeyError("Procedure '%s' not found" % procedure)

            data.append((
                size,
                loops,
                get_time("scalar"),
                get_time("sse"),
                get_time("sse-block"),
            ))

        data.sort(key=lambda t: t[0])  # sort by size

        t = Table()
        t.add_header([("input", 2), "scalar", ("SSE", 2), ("SSE block", 2)])
        t.add_header([
            "size [B]", "loops", "time [us]", "time [us]", "speed-up",
            "time [us]", "speed-up"
        ])

        for item in data:
            t0 = item[2]
            t1 = item[3]
            t2 = item[4]
            if t0 < 10 and t1 < 10 and t2 < 10:
                # don't fool people when all measurements are single-digit numbers
                speedup_sse = '---'
                speedup_sse_block = '---'
            else:
                speedup_sse = '%0.2f' % (float(t0) / t1)
                speedup_sse_block = '%0.2f' % (float(t0) / t2)

            t.add_row([
                '{:,}'.format(item[0]),
                '%d' % item[1],
                '%d' % item[2],
                '%d' % item[3],
                speedup_sse,
                '%d' % item[4],
                speedup_sse_block,
            ])

        return t

Exemplo n.º 17

0

Exibir arquivo

def unsatisified(wanted_list, allocation):
    """What do we still need to buy?"""
    kf1 = lambda x: (x['item_id'], x['wanted_color_id'])
    kf2 = lambda x: (x['ItemID'], x['ColorID'])
    wanted_by_item = utils.groupby(copy.deepcopy(wanted_list), kf2)
    wanted_by_item = dict(
        (k, sum(e['Qty'] for e in v)) for (k, v) in wanted_by_item.iteritems())

    for item in allocation:
        wanted_by_item[kf1(item)] -= item['quantity']
    return dict((k, v) for (k, v) in wanted_by_item.iteritems() if v > 0)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: report.py Projeto: stjordanis/parsing-int-series

    def __init__(self, path):
        with open(path, 'rt') as f:
            self.raw_data = load(f)

        # group by separators distribution
        bysep = lambda item: item.sep_distribution

        self.report = []
        for sep, collection in groupby(self.raw_data, bysep).iteritems():
            ret = self.split_by_distribution(collection)
            self.report.append((get_separator_title(sep), ret))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: punktyadresowe_import.py Projeto: slachiewicz/osm-addr-tools

    def _checkMixedScheme(self, data):
        dups = groupby(data, lambda x: x.simc, lambda x: bool(x.street))

        dups_count = dict((k, len(_filterOnes(v))) for k, v in dups.items())
        dups = dict((k, len(_filterOnes(v))/len(v)) for k, v in dups.items())
        dups = dict((k,v) for k, v in filter(lambda x: 0 < x[1] and x[1] < 1, dups.items()))

        for i in filter(
                lambda x: not bool(x.street) and x.simc in dups.keys(),
                data
                ):
            i.addFixme('Mixed addressing scheme in city - with streets and without. %.1f%% (%d) with streets.' % (dups[i.simc]*100, dups_count[i.simc]))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

def unsatisified(wanted_list, allocation):
  """What do we still need to buy?"""
  kf1 = lambda x: (x['item_id'], x['wanted_color_id'])
  kf2 = lambda x: (x['ItemID'], x['ColorID'])
  wanted_by_item = utils.groupby(copy.deepcopy(wanted_list), kf2)
  wanted_by_item = dict( (k, sum(e['Qty'] for e in v))
                          for (k, v) in wanted_by_item.iteritems() )

  for item in allocation:
    wanted_by_item[kf1(item)] -= item['quantity']
  return dict( (k, v) for (k, v) in wanted_by_item.iteritems()
               if v > 0 )

Exemplo n.º 21

0

Exibir arquivo

Arquivo: report.py Projeto: stjordanis/parsing-int-series

    def split_by_parameters(self, distribution_name, collection):
        byparam = lambda item: item.num_distribution

        result = []
        for key, collection in groupby(collection, byparam).iteritems():
            table = self.prepare_table(collection)
            ret = get_num_distribution_parameters(distribution_name, key)
            result.append((ret.title, table, ret.weight))

        result.sort(key=lambda row: row[-1])

        return [item[:2] for item in result]

Exemplo n.º 22

0

Exibir arquivo

def is_valid_solution(wanted_parts, allocation, stores=None):
    """Check if the allocation is a valid solution

  1) all wanted parts are bought
  2) the amount to buy isn't more than is available
  3) we make the minimum purchase for all used stores
  """
    kf1 = lambda x: (x['item_id'], x['wanted_color_id'])
    kf2 = lambda x: (x['ItemID'], x['ColorID'])

    # for each wanted part
    allocation_by_id = utils.groupby(allocation, kf1)
    for lot in wanted_parts:
        # did we buy enough?
        bought = allocation_by_id.get(kf2(lot), [])
        if sum(e['quantity'] for e in bought) < lot['Qty']:
            return False

    # for each bought lot
    for lot in allocation:
        # did we buy <= the amount available?
        if lot['quantity'] > lot['quantity_available']:
            return False

    if stores is not None:
        # for each store
        allocation_by_store = utils.groupby(allocation,
                                            lambda x: x['store_id'])
        store_by_id = dict((e['store_id'], e) for e in stores)
        for (store_id, lots) in allocation_by_store.iteritems():
            # did we buy at least the minimum purchase?
            if store_id in store_by_id:
                store = store_by_id[store_id]
                price = sum(e['cost_per_unit'] * e['quantity'] for e in lots)
                if price < store['minimum_buy']:
                    return False
            else:
                return False

    return True

Exemplo n.º 23

0

Exibir arquivo

Arquivo: io.py Projeto: Grimbly/brickrake

def save_xml_per_vendor(folder, solution, stores):
  '''Save a BrickLink XML with a Wanted List for each vendor'''
  stores = utils.groupby(stores, lambda x: x['store_id'])
  allocation = utils.groupby(solution['allocation'], lambda x: x['store_id'])
  # for each store
  for (store_id, group) in sorted(allocation.iteritems()):
    store = stores[store_id][0]
    name = store['seller_name']

    # get wanted list id
    prompt = ("Create a new 'Wanted List' named '%s' and" +
        " type its ID here: ") % (name, )
    wanted_list = raw_input(prompt)

    # save that id onto the lot
    for lot in group:
      lot['wanted_list_id'] = wanted_list

  # write file
  allocation = utils.flatten(allocation.values())
  with open(folder, 'w') as f:
    save_xml(f, allocation)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

def is_valid_solution(wanted_parts, allocation, stores=None):
  """Check if the allocation is a valid solution

  1) all wanted parts are bought
  2) the amount to buy isn't more than is available
  3) we make the minimum purchase for all used stores
  """
  kf1 = lambda x: (x['item_id'], x['wanted_color_id'])
  kf2 = lambda x: (x['ItemID'], x['ColorID'])

  # for each wanted part
  allocation_by_id = utils.groupby(allocation, kf1)
  for lot in wanted_parts:
    # did we buy enough?
    bought = allocation_by_id.get(kf2(lot), [])
    if sum(e['quantity'] for e in bought) < lot['Qty']:
      return False

  # for each bought lot
  for lot in allocation:
    # did we buy <= the amount available?
    if lot['quantity'] > lot['quantity_available']:
      return False

  if stores is not None:
    # for each store
    allocation_by_store = utils.groupby(allocation, lambda x: x['store_id'])
    store_by_id = dict( (e['store_id'], e) for e in stores )
    for (store_id, lots) in allocation_by_store.iteritems():
      # did we buy at least the minimum purchase?
      if store_id in store_by_id:
        store = store_by_id[store_id]
        price = sum(e['cost_per_unit'] * e['quantity'] for e in lots)
        if price < store['minimum_buy']:
          return False
      else:
        return False

  return True

Exemplo n.º 25

0

Exibir arquivo

    def _checkMixedScheme(self, data):
        dups = groupby(data, lambda x: x.simc, lambda x: bool(x.street))

        dups_count = dict((k, len(_filterOnes(v))) for k, v in dups.items())
        dups = dict((k, len(_filterOnes(v)) / len(v)) for k, v in dups.items())
        dups = dict(
            (k, v)
            for k, v in filter(lambda x: 0 < x[1] and x[1] < 1, dups.items()))

        for i in filter(lambda x: not bool(x.street) and x.simc in dups.keys(),
                        data):
            i.addFixme(
                'Mixed addressing scheme in city - with streets and without. %.1f%% (%d) with streets.'
                % (dups[i.simc] * 100, dups_count[i.simc]))

Exemplo n.º 26

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

def covers(wanted_parts, available_parts):
  """True if the given stores can cover all desired items"""
  kf = lambda x: (x['item_id'], x['wanted_color_id'])
  available_parts = utils.groupby(available_parts, kf)

  for item in wanted_parts:
    item_id = item['ItemID']
    color_id = item['ColorID']
    quantity = item['Qty']
    available = available_parts.get((item_id, color_id), [])
    inventory = sum(e['quantity_available'] for e in available)

    if inventory < quantity:
      return False
  return True

Exemplo n.º 27

0

Exibir arquivo

def covers(wanted_parts, available_parts):
    """True if the given stores can cover all desired items"""
    kf = lambda x: (x['item_id'], x['wanted_color_id'])
    available_parts = utils.groupby(available_parts, kf)

    for item in wanted_parts:
        item_id = item['ItemID']
        color_id = item['ColorID']
        quantity = item['Qty']
        available = available_parts.get((item_id, color_id), [])
        inventory = sum(e['quantity_available'] for e in available)

        if inventory < quantity:
            return False
    return True

Exemplo n.º 28

0

Exibir arquivo

Arquivo: average.py Projeto: stjordanis/parsing-int-series

    def __init__(self, path):
        with open(path, 'rt') as f:
            self.raw_data = load(f)

        bydistribution = lambda item: item.distribution_name

        bysep = lambda item: (item.sep_distribution, item.distribution_name)

        self.report = []
        for (sep, distribution_name), collection in groupby(self.raw_data, bysep).iteritems():
            ret = self.prepare_table(collection)
            self.report.append((
                get_separator_title(sep),
                get_distribution_title(distribution_name),
                ret
            ))

Exemplo n.º 29

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

    def coverage(inventory):
      kf = lambda x: (x['item_id'], x['wanted_color_id'])
      # only worry about items wanted
      wanted = filter(lambda x: kf(x) in wanted_by_item, inventory)

      # count up how much there is of each (item_id, color_id) pair
      wanted = utils.groupby(wanted, kf)
      wanted = map(lambda x: (x[0], sum(e['quantity_available'] for e in x[1])),
                   wanted.iteritems())

      # count how much of each item I'd buy
      tot = 0
      for (k, v) in wanted:
        if k in wanted_by_item:
          tot += min(wanted_by_item[k][0]['Qty'], v)

      return tot

Exemplo n.º 30

0

Exibir arquivo

        def coverage(inventory):
            kf = lambda x: (x['item_id'], x['wanted_color_id'])
            # only worry about items wanted
            wanted = filter(lambda x: kf(x) in wanted_by_item, inventory)

            # count up how much there is of each (item_id, color_id) pair
            wanted = utils.groupby(wanted, kf)
            wanted = map(
                lambda x: (x[0], sum(e['quantity_available'] for e in x[1])),
                wanted.iteritems())

            # count how much of each item I'd buy
            tot = 0
            for (k, v) in wanted:
                if k in wanted_by_item:
                    tot += min(wanted_by_item[k][0]['Qty'], v)

            return tot

Exemplo n.º 31

0

Exibir arquivo

Arquivo: optimize.py Projeto: jstrafford4/dfs-optimizer

 def add_players(self, players, player_filter=None):
     # returns a new list in descending order by player[self.optimize_key]
     self.players = sorted(players,
                           key=lambda p: p.__dict__[self.optimize_key],
                           reverse=True)
     # filter out unwanted players based on function argument if provided, else class player filter
     self.players_filtered = filter(
         self.player_filter if player_filter is None else player_filter,
         self.players)
     # returns object with keys for each position, values a list of players at that position
     self.players_by_pos = utils.groupby('position', self.players_filtered)
     # checks that we have players in the pool for each position required
     if not all(k in self.players_by_pos for k in self.restrictions.keys()):
         raise Exception(
             '[ERROR] Player list (players=%d) not sufficient to meet restrictions. Missing = %s.'
             % (len(players), [
                 k
                 for k in self.restrictions if k not in self.players_by_pos
             ]))

Exemplo n.º 32

0

Exibir arquivo

Arquivo: inference.py Projeto: teymour-aldridge/unet-pytorch

def inference(img, state_path, device=0):

    model = unet().cuda(device)
    state = torch.load(state_path)
    model.load_state_dict(state)

    binary = np.zeros(img.shape)

    stride, patch_size = 60, 60

    with cp.cuda.Device(device):
        img = cp.asarray(img.astype(np.uint16))

        pos_x = np.arange(0, 2048, stride)
        pos_y = np.arange(0, 2048, stride)
        vx, vy = np.meshgrid(pos_x, pos_y)
        pos = cp.asarray(np.stack([vx, vy]).reshape((2, -1)).transpose([1, 0]))

        X, pos = preprocess(img, pos, half_size=patch_size // 2, device=device)

        X = X.transpose([2, 0, 1])

        indices = np.arange(len(X)).tolist()
        groups = groupby(indices, 100, key='mini')

        out_list = []
        for index, group in enumerate(groups):
            out = X[group]
            _pos = pos[group]

            out = Variable(
                torch.from_numpy(np.expand_dims(out, 1).astype(
                    np.float32)).cuda(device))
            R = model(out)
            R = R.max(1)[1]
            patch = R.cpu().numpy().transpose([1, 2, 0])
            binary[patch_interface(_pos[:, 0], _pos[:, 1],
                                   patch_size // 2)] = patch

    binary = (binary * 255).astype('uint8')
    return binary

Exemplo n.º 33

0

Exibir arquivo

    def __init__(self, path):
        with open(path, 'rt') as f:
            data = list(load(f))

        bysize = lambda item: item.size
        data = groupby(data, bysize)
        self.report = []
        for size in sorted(data):
            collection = data[size]
            sortby = lambda item: (item.distribution_name, item.
                                   num_distribution, item.sep_distribution)
            collection.sort(key=sortby)

            res = []
            for item in collection:
                title = self.get_title(item)
                table = self.prepare_table(item)

                res.append((title, table))

            self.report.append(('Input size %d bytes' % size, res))

Exemplo n.º 34

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

def brute_force(wanted_parts, price_guide, k):
  """Enumerate all possible combinations of k stores"""
  by_store = utils.groupby(price_guide, lambda x: x['store_id'])

  results = []
  for selected_stores in itertools.combinations(by_store.keys(), k):
    # get items sold by these stores only
    inventory = utils.flatten( by_store[s] for s in selected_stores )
    if covers(wanted_parts, inventory):
      # calculate minimum cost to buy everything using these stores
      cost, allocation = min_cost(wanted_parts, inventory)
      results.append({
        'cost': cost,
        'allocation': allocation,
        'store_ids': selected_stores
      })
      #print 'Solution: k=%d, cost=%8.2f, store_ids=%40s' % (k, cost, selected_stores)
    else:
      #print 'Unable to fill quote using store_ids=%40s' % (selected_stores,)
      pass
  return results

Exemplo n.º 35

0

Exibir arquivo

def brute_force(wanted_parts, price_guide, k):
    """Enumerate all possible combinations of k stores"""
    by_store = utils.groupby(price_guide, lambda x: x['store_id'])

    results = []
    for selected_stores in itertools.combinations(by_store.keys(), k):
        # get items sold by these stores only
        inventory = utils.flatten(by_store[s] for s in selected_stores)
        if covers(wanted_parts, inventory):
            # calculate minimum cost to buy everything using these stores
            cost, allocation = min_cost(wanted_parts, inventory)
            results.append({
                'cost': cost,
                'allocation': allocation,
                'store_ids': selected_stores
            })
            #print 'Solution: k=%d, cost=%8.2f, store_ids=%40s' % (k, cost, selected_stores)
        else:
            #print 'Unable to fill quote using store_ids=%40s' % (selected_stores,)
            pass
    return results

Exemplo n.º 36

0

Exibir arquivo

Arquivo: punktyadresowe_import.py Projeto: slachiewicz/osm-addr-tools

    def fetchTiles(self):
        bbox = self.getBbox2180()
        ret = []
        for i in self.divideBbox(*bbox):
            url = GUGiK.__base_url+",".join(map(str, i))
            self.__log.info("Fetching from EMUIA: %s", url)

            opener = get_ssl_no_verify_opener()

            soup = lxml.etree.fromstring(opener.open(url).read())
            doc = soup.find('{http://www.opengis.net/kml/2.2}Document') # be namespace aware
            if doc is not None:
                ret.extend(filter(
                    self._isEligible,
                    map(self._convertToAddress, doc.iterchildren('{http://www.opengis.net/kml/2.2}Placemark'))
                    )
                )
            else:
                raise ValueError('No data returned from GUGiK possibly to wrong scale. Check __MAX_BBOX_X, __MAX_BBOX_Y, HEIGHT and WIDTH')
        # take latest version for each point (version is last element after dot in id_)
        ret = [max(v, key=lambda z: z.id_) for  v in groupby(ret, lambda z: z.id_.rsplit('.', 1)[0]).values()]
        return ret

Exemplo n.º 37

0

Exibir arquivo

def min_cost(wanted_parts, available_parts):
    """Greedily minimize the cost of all wanted parts"""
    kf = lambda x: (x['item_id'], x['wanted_color_id'])
    available_parts = utils.groupby(available_parts, kf)

    result = []
    cost = 0.0
    for item in wanted_parts:
        item_id = item['ItemID']
        color_id = item['ColorID']
        matching = available_parts.get((item_id, color_id), [])
        matching = list(sorted(matching,
                               key=lambda x: -1 * x['cost_per_unit']))

        # take as much inventory as possible, starting with the lowest price, until
        # the requested quantity is filled
        n_remaining = item['Qty']
        while n_remaining > 0:
            if len(matching) == 0:
                print 'WARNING: couldn\'t find enough inventory to purchase %s' % (
                    item['ItemName'], )
                cost = float('inf')
                break

            next = matching.pop()
            amount = min(n_remaining, next['quantity_available'])
            r = {
                'item_id': next['item_id'],
                'color_id': next['color_id'],
                'store_id': next['store_id'],
                'quantity': amount,
                'cost_per_unit': next['cost_per_unit']
            }
            result.append(r)
            n_remaining -= amount
            cost += amount * next['cost_per_unit']

    return (cost, result)

Exemplo n.º 38

0

Exibir arquivo

Arquivo: io.py Projeto: Grimbly/brickrake

def load_xml(f):
  """Parse a BrickLink XML file"""
  root = etree.parse(f)
  items = []
  for item in root.findall('.//ITEM'):
    item_dict = {}
    for child in item.getchildren():
      tag = TRANSLATIONS.get(child.tag, child.tag)
      value = CONVERT.get(tag, lambda x: x)(child.text)
      item_dict[tag] = value
    item_dict['ItemName'] = item_dict['ItemID']
    item_dict['ColorName'] = color.name(item_dict['ColorID'])
    items.append(item_dict)

  # sometimes there are multiple wanted lots with the same ItemID and ColorID.
  # Consolidate them together now.
  by_item = utils.groupby(items, lambda x: (x['ItemID'], x['ColorID']))
  result = []
  for ((item_id, color_id), same) in by_item.iteritems():
    prototype = same[0]
    prototype['Qty'] = sum(e['Qty'] for e in same)
    result.append(prototype)
  return result

Exemplo n.º 39

0

Exibir arquivo

Arquivo: step2.py Projeto: aviranzerioniac/beautiful-tucan

def generate_page(data: List[Module2]) -> str:
    def genmodule(x: Module2) -> str: return stache("""
<div class=flex>
  <label><input id='checker-{{id}}' class=checker type=checkbox></label>
  <details class=module-wrapper>
    <summary id='module-{{id}}' class='module box-b box-b-{{id}}'>
      <span>{{credits}}cp</span>
      <span>{{title_short}}</span>
      <span title='{{title}}'>{{title}}</span>
      <span title='{{owner}}'>{{owner_short}}</span>
      <!-- <span title='{{language}}'>{{{language}}}</span> -->
      <div class=toggler-show></div>
    </summary>
    <div id='details-{{id}}' class=details>{{#details}}
      <b>{{title}}</b><br>
      {{#details}}{{{.}}}<br>{{/details}}
    <!-- "> --></a>{{/details}}</div>
  </details>
</div>""", x) #type: ignore

    def gencategory(title: str, modules: str) -> str: return stache("""
<details class=category>
  <summary>
    <div class=toggler-show></div>
    <b>{{title}}</b>
  </summary>
  <clear></clear>
  {{{modules}}}
</details>
<br>""", {"title": title, "modules": modules}) # type: ignore

    result = ""
    for c, modules in utils.groupby(data, lambda x: x["category"]):
      str_modules = "\n\n".join(genmodule(m) for m in modules)
      result += gencategory(c, str_modules)
    return result

Exemplo n.º 40

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

def min_cost(wanted_parts, available_parts):
  """Greedily minimize the cost of all wanted parts"""
  kf = lambda x: (x['item_id'], x['wanted_color_id'])
  available_parts = utils.groupby(available_parts, kf)

  result = []
  cost = 0.0
  for item in wanted_parts:
    item_id = item['ItemID']
    color_id = item['ColorID']
    matching = available_parts.get((item_id, color_id), [])
    matching = list(sorted(matching, key=lambda x: -1 * x['cost_per_unit']))

    # take as much inventory as possible, starting with the lowest price, until
    # the requested quantity is filled
    n_remaining = item['Qty']
    while n_remaining > 0:
      if len(matching) == 0:
        print 'WARNING: couldn\'t find enough inventory to purchase %s' % (item['ItemName'],)
        cost = float('inf')
        break

      next = matching.pop()
      amount = min(n_remaining, next['quantity_available'])
      r = {
        'item_id': next['item_id'],
        'color_id': next['color_id'],
        'store_id': next['store_id'],
        'quantity': amount,
        'cost_per_unit': next['cost_per_unit']
      }
      result.append(r)
      n_remaining -= amount
      cost += amount * next['cost_per_unit']

  return (cost, result)

Exemplo n.º 41

0

Exibir arquivo

Arquivo: report.py Projeto: stjordanis/parsing-int-series

def main(path):
    with open(path, 'rt') as f:
        keyfun = lambda item: (item.size, item.distribution_name)
        data = groupby(load_file(f), keyfun)

    header1 = [("", 3),
               ("speedup over %s procedure" % reference_procedure,
                len(procedures) * 3)]
    header2 = [("", 3)]
    header3 = ["size [B]", "distribution", "samples"]
    for proc in procedures:
        header2.append((proc, 3))
        header3.extend(["min", "avg", "max"])

    table = Table()
    table.add_header(header1)
    table.add_header(header2)
    table.add_header(header3)

    for key in sorted(data):
        collection = data[key]
        size, name, stats = calculate_speedup_statistics(collection)

        row = []
        row.append('%d' % size)
        row.append(get_distribution_title(name))
        row.append('%d' % len(collection))

        for proc in procedures:
            row.append('%0.2f' % stats[proc][0])
            row.append('%0.2f' % stats[proc][1])
            row.append('%0.2f' % stats[proc][2])

        table.add_row(row)

    print table

Exemplo n.º 42

0

Exibir arquivo

Arquivo: plot_fit.py Projeto: xrf/sp-energies-qd

def plot_fits(plot,
              data,
              get_fit_range,
              badness_threshold,
              x_col,
              x_label,
              y_col,
              y_label,
              absdydx_label,
              title_cols,
              get_title,
              get_fn,
              color_cols,
              get_color,
              get_color_label,
              get_dmc,
              dmc_label,
              dmc_yerr_col=None,
              maxfev=0):
    # continuous x range for plotting continuous functions
    x_c = np.linspace(data[x_col].min() - 1, data[x_col].max() + 1, 250)
    [(title_key, gg)] = utils.groupby(data, title_cols)
    fig, ax = plt.subplots(2)
    fig.set_size_inches(8, 10)  # otherwise the text will get obscured
    y_range = np.array([np.nan, np.nan])
    fit_results = {}
    for color_key, g in utils.groupby(gg, color_cols):
        logging.info(f"plot_fits: method: {color_key['method']}")
        color = get_color(color_key["method"])
        label = get_color_label(color_key["method"])
        g = g.sort_values([x_col])
        d = g.rename(columns={x_col: "x", y_col: "y"})
        deriv_d = differentiate(d, "x", "y", "dydx")
        x = deriv_d["x"]
        dydx = deriv_d["dydx"]
        ax[0].plot(x, abs(dydx), "x", label=label, color=color)
        ax[1].plot(d["x"], d["y"], "x", label=label, color=color)
        utils.update_range(y_range, d["y"])

        fit_range = get_fit_range(color_key["method"])
        fit_range = (max(fit_range[0],
                         d["x"].min()), min(fit_range[1], d["x"].max()))
        if fit_range[1] < fit_range[0]:
            continue
        fit_range = fit_range + np.array([-0.2, 0.2])  # to make it look nicer
        ax[0].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color)
        ax[1].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color)

        d_subset = d[d["x"].between(*fit_range)]
        deriv_subset = deriv_d[deriv_d["x"].between(*fit_range)]
        if len(deriv_subset) < 2:
            continue

        fit = do_fit(d_subset,
                     deriv_subset,
                     badness_threshold=badness_threshold,
                     maxfev=maxfev)
        if fit is None:
            continue
        fit_result = {"num_points": len(d["x"])}
        fit_result.update(fit)
        fit_results[color_key["method"]] = fit_result

        outliers = deriv_subset.loc[fit["logderiv"]["outliers"]]
        ax[0].plot(outliers["x"],
                   abs(outliers["dydx"]),
                   "o",
                   markerfacecolor="none",
                   label="",
                   color="red")
        for stage, result in fit.items():
            if stage == "fixedab":
                continue  # fixedab yields the same plot here as logderiv
            a = result["coefficient"]
            b = result["exponent"]
            b_err = result.get("exponent_err", None)
            dydx_c = a * b * x_c**(b - 1.0)
            ax[0].plot(x_c,
                       abs(dydx_c),
                       linestyle=STAGE_TO_LINESTYLE[stage],
                       label=label + " " + fit_label(stage, b, b_err),
                       color=color)

        for stage, result in fit.items():
            if "constant" not in result:
                continue
            a = result["coefficient"]
            b = result["exponent"]
            c = result["constant"]
            b_err = result.get("exponent_err", None)
            y_c = a * x_c**b + c
            ax[1].plot(x_c,
                       y_c,
                       linestyle=STAGE_TO_LINESTYLE[stage],
                       label=label + " " + fit_label(stage, b, b_err),
                       color=color)
            if b < 0:
                ax[1].axhline(c, linestyle=":", color=color)
            else:
                logging.warn(f"plot_fits: {label}: {stage}.b >= 0: "
                             "no asymptotic result")
            utils.update_range(y_range, c)

    g = get_dmc(**title_key)
    if len(g):
        y = g[y_col].iloc[0]
        if dmc_yerr_col is not None:
            y_err = g[dmc_yerr_col].iloc[0]
            ax[1].axhspan(y - y_err,
                          y + y_err,
                          alpha=0.4,
                          color="black",
                          label=dmc_label)
            utils.update_range(y_range, [y - y_err, y + y_err])
        # add an extra line to make sure it's visible
        ax[1].axhline(y, alpha=0.4, color="black")
        utils.update_range(y_range, [y])

    ax[0].set_xlabel(x_label)
    ax[0].set_ylabel(absdydx_label)
    ax[0].set_xscale("log")
    ax[0].set_yscale("log")
    ax[0].set_title(get_title(**title_key))
    box = ax[0].get_position()
    ax[0].set_position([box.x0, box.y0, box.width * 0.6, box.height])
    ax[0].legend(bbox_to_anchor=(1, 1.0))

    ax[1].legend()
    ax[1].set_xlabel(x_label)
    ax[1].set_ylabel(y_label)
    ax[1].set_ylim(*utils.expand_range(y_range, 0.05))
    box = ax[1].get_position()
    ax[1].set_position([box.x0, box.y0, box.width * 0.6, box.height])
    ax[1].legend(bbox_to_anchor=(1, 1.0))
    ax[1].get_xaxis().set_major_locator(
        matplotlib.ticker.MaxNLocator(integer=True))

    if plot:
        fn = get_fn(**title_key)
        settings_fn = os.path.join("plot_settings", fn + ".json")
        settings = utils.load_json(settings_fn) or {"ax1": {}, "ax2": {}}
        fit_results_fn = os.path.join("fit_results", fn + ".json")

        def save_settings():
            utils.save_json(settings_fn, settings)

        utils.sync_axes_lims(ax[0], settings["ax1"], save_settings)
        utils.sync_axes_lims(ax[1], settings["ax2"], save_settings)
        utils.savefig(fig, fn)
    return fit_results

Exemplo n.º 43

0

Exibir arquivo

Arquivo: optimize.py Projeto: karlwichorek/dfs-optimizer

 def add_players(self, players, player_filter=None):
     self.players = sorted(players, key=lambda p:p.__dict__[self.optimize_key], reverse=True)
     self.players_filtered = filter(self.player_filter if player_filter is None else player_filter, self.players)
     self.players_by_pos = utils.groupby('position', self.players_filtered)
     if not all(k in self.players_by_pos for k in self.restrictions.keys()):
         raise Exception('[ERROR] Player list (players=%d) not sufficient to meet restrictions. Missing = %s.' % (len(players), [k for k in self.restrictions if k not in self.players_by_pos]))

Exemplo n.º 44

0

Exibir arquivo

Arquivo: transactions.py Projeto: gmarceau/pynab

def transactions_by_month(transactions):
    from collections import OrderedDict

    return OrderedDict(
        groupby(sorted(transactions, key=lambda t: t.date),
                key=transaction_month))

Exemplo n.º 45

0

Exibir arquivo

Arquivo: osmdb.py Projeto: slachiewicz/osm-addr-tools

    def get_closed_ways(self, ways):
        if not ways:
            return []
        ways = list(ways)
        way_by_first_node = utils.groupby(ways, lambda x: x._raw['nodes'][0])
        way_by_last_node = utils.groupby(ways, lambda x: x._raw['nodes'][-1])
        ret = []
        cur_elem = ways[0]
        node_ids = []

        def _get_ids(elem):
            return elem['nodes']

        def _get_way(id_, dct):
            if id_ in dct:
                ret = tuple(filter(lambda x: x in ways, dct[id_]))
                if ret:
                    return ret[0]
            return None

        ids = _get_ids(cur_elem)
        while ways:
            #ids = list(y['ref'] for y in cur_elem._raw.find_all('nd', recursive=False))
            node_ids.extend(ids)
            ways.remove(cur_elem)
            if node_ids[0] == node_ids[-1]:
                # full circle, append to Polygons in ret
                ret.append(
                    Polygon(
                        (x.center.x, x.center.y) for x in (self.__osm_obj[('node', y)] for y in node_ids)
                    )
                )
                if ways:
                    cur_elem = ways[0]
                    node_ids = []
                    ids = _get_ids(cur_elem)
            else:
                # not full circle
                if ways: # check if there is something to work on
                    last_id = node_ids[-1]
                    first_id = node_ids[0]
                    if _get_way(last_id, way_by_first_node):
                        cur_elem = _get_way(last_id, way_by_first_node)
                        ids = _get_ids(cur_elem)

                    elif _get_way(last_id, way_by_last_node):
                        cur_elem = _get_way(last_id, way_by_last_node)
                        ids = list(reversed(_get_ids(cur_elem)))

                    elif _get_way(first_id, way_by_first_node):
                        cur_elem = _get_way(first_id, way_by_first_node)
                        node_ids = list(reversed(node_ids))
                        ids = _get_ids(cur_elem)

                    elif _get_way(first_id, way_by_last_node):
                        cur_elem = _get_way(first_id, way_by_last_node)
                        node_ids = list(reversed(node_ids))
                        ids = list(reversed(_get_ids(cur_elem)))
                    else:
                        raise ValueError
                else: # if ways
                    raise ValueError
        # end while
        return ret

Exemplo n.º 46

0

Exibir arquivo

def clean(module_id, entry, fields, regulation):
    def get_first(title: str, entry=entry):
        tmp = [
            detail for detail in entry["details"] if detail["title"] == title
        ]
        return tmp[0].get('details') if len(tmp) > 0 else None

    def get_abbr(title):
        # choose the best one of three abbreviations
        abbr1 = "".join(i for i in title if i.isupper() or i.isnumeric())
        abbr2 = "".join(i[0] if len(i) > 0 else ""
                        for i in title.strip().split(" "))
        abbr3 = (get_first("Kürzel") or "").strip().replace(" ", "")
        abbrs = ([abbr3, abbr1, abbr2] if 1 < len(abbr3) < 6 else sorted(
            (i for i in (abbr1, abbr2)), key=lambda x: abs(3.4 - len(x))))
        #print(abbrs)
        return abbrs[0]

    # module_id, title, abbr
    first_entry = list(entry['content'].values())[0]
    sort_title = first_entry['title'][10:]
    _, title = sort_title.split(" ", 1)
    if len(list(entry['content'].values())) > 1:
        title = get_first("Titel") or title
    orig_title = title
    module_id = module_id or get_first("TUCaN-Nummer") or ""
    title = utils.remove_bracketed_part(title)
    title = utils.remove_bracketed_part(title)
    title = utils.roman_to_latin_numbers(title)
    title = title.replace("Praktikum in der Lehre - ", "")
    abbr = get_abbr(title)

    # reorder details
    later_titles = {
        #"Unterrichtssprache", "Sprache",
        "Min. | Max. Teilnehmerzahl",
        "TUCaN-Nummer",
        "Kürzel",
        "Anzeige im Stundenplan",  # "Titel",
        "Lehrveranstaltungsart",
        "Veranstaltungsart",
        "Turnus",
        "Startsemester",
        "SWS",
        "Semesterwochenstunden",
        "Diploma Supplement",
        "Modulausschlüsse",
        "Modulvoraussetzungen",
        "Studiengangsordnungen",
        "Verwendbarkeit",
        "Anrechenbar für",
        "Orga-Einheit",
        "Gebiet",
        "Fach",
        "Modulverantwortliche",  # "Lehrende",
        "Dauer",
        "Anzahl Wahlkurse",
        "Notenverbesserung nach §25 (2)",
        "Wahlmöglichkeiten",
        "Credits",
        "Kurstermine",
    }
    early = [i for i in entry["details"] if i["title"] not in later_titles]
    late = [i for i in entry["details"] if i["title"] in later_titles]
    entry["details"] = (early + [{
        "details": "<br><hr><b>Andere Angaben aus Tucan und Inferno</b><br>",
        "title": ""
    }] + late)
    for detail in entry["details"]:
        if detail["details"].strip() != "":
            detail["details"] += "<br>"
        if detail['title'] == "Studiengangsordnungen":
            regs = [(x.split("(",
                             1)) for x in sorted(detail['details'].replace(
                                 "<br>", "<br/>").split("<br/>")) if x.strip()]
            regs = utils.groupby(regs, key=lambda x: x[0])
            regs = [(k, list(v)) for k, v in regs]
            #            print(detail['details'].replace("<br>", "<br/>").split("<br/>"))
            #            print([ k +"("+ ", ".join(i[:-1] for _,i in v) + ")" for k,v in regs])
            detail['details'] = "<br/>".join(
                k + "(" + ", ".join(i[:-1] for _, i in sorted(v)) + ")"
                for k, v in regs) + "<br/>"

    # last name of owners
    owner = "; ".join(
        collections.OrderedDict(
            (x, 1) for entry in entry['content'].values()
            for x in (get_first("Lehrende", entry)
                      or get_first("Modulverantwortlicher", entry) or "???"
                      ).split("; ")).keys()) or "???"
    short_owner = "; ".join(i.split()[-1] for i in owner.split("; "))

    # category
    isos = first_entry['title'].split(" ")[0].endswith("-os")
    category = fields.get(regulation, {}).get(module_id, ["", ""])[0]
    category = clean_category(category)
    if category == "C. Fachübergreifende Lehrveranstaltungen": category = ""
    category = (
        "B. Oberseminare"
        if isos else  # category == "B. Seminare" and entry["credits"] == 0
        category or {
            "01": "C. Nebenfach FB 01 (Wirtschaft & Recht; Entrepeneurship)",
            "02": "C. Nebenfach FB 02 (Philosophie)",
            "03": "C. Nebenfach FB 03 (Humanw.; Sportw.)",
            "04":
            "C. Nebenfach FB 04 (Logik; Numerik; Optimierung; Stochastik)",
            "05": "C. Nebenfach FB 05 (Elektrow.; Physik)",
            "11": "C. Nebenfach FB 11 (Geow.)",
            "13": "C. Nebenfach FB 13 (Bauinformatik; Verkehr)",
            "16": "C. Nebenfach FB 16 (Fahrzeugtechnik)",
            "18": "C. Nebenfach FB 18 (Elektrotechnik)",
            "41": "C. Sprachkurse",
        }.get(module_id[:2], "0. Pflichtveranstaltungen"))
    if "B.Sc." in regulation:
        category = category.replace("Nebenfach", "Fachübergreifend")
    else:
        category = category.replace("Pflichtveranstaltung",
                                    "Nicht einsortiert")

    # dates
    pdt = lambda day: datetime.datetime.strptime(day, "%Y-%m-%d")
    fmtdt = lambda day: datetime.datetime.strftime(day, "%Y-%m-%d")
    shiftNweeks = lambda n, x: fmtdt(pdt(x) + datetime.timedelta(weeks=n))

    dates = {
        i  #+", "+ item['title'].split(" ", 1)[1]
        for item in entry['content'].values() for i in item.get('dates', [])
    }
    uedates = {
        i
        for item in entry['content'].values() for i in item.get('uedates', [])
    }
    uebung = "Übung " if len(uedates) != 1 else "Übungsstunde"
    #    uedates = {"\t".join([shiftNweeks(i, y.split("\t",1)[0])] + y.split("\t")[1:3] + [uebung + y.split("\t")[3]]).replace(orig_title, "")
    uedates = {
        "\t".join([shiftNweeks(i,
                               y.split("\t", 1)[0])] + y.split("\t")[1:3] +
                  [uebung])
        for y in uedates for i in range(
            int((pdt(y.split("\t")[4]) - pdt(y.split("\t")[0])).days / 7 + 1))
    }
    dates = clean_dates(dates | uedates)

    # result
    result = utils.merge_dict(entry, dates)
    assert result['module_id'] == module_id
    del result['module_id']
    result = utils.merge_dict(
        result, {
            "id": module_id,
            "title": title,
            "title_short": abbr,
            "owner": owner,
            "owner_short": short_owner,
            "credits": str(entry["credits"]).zfill(2),
            'category': category,
        })
    return result

Exemplo n.º 47

0

Exibir arquivo

Arquivo: average.py Projeto: stjordanis/parsing-int-series

    def prepare_table(self, procedures):

        keyfun = lambda item: (item.size, item.loops, item.num_distribution)
        tmp = groupby(procedures, keyfun)

        data = {}
        for (size, loops, _), items in tmp.iteritems():
            def get_time(procedure):
                for item in items:
                    if item.procedure == procedure:
                        return item.time

                raise KeyError("Procedure '%s' not found" % procedure)

            t0 = get_time("scalar")
            t1 = get_time("sse")
            t2 = get_time("sse-block")

            if t0 < 10 and t1 < 10 and t2 < 10:
                # don't fool people when all measurements are single-digit numbers
                continue

            speedup_sse = float(t0)/t1
            speedup_sse_block = float(t0)/t2

            key = (size, loops)
            if key not in data:
                data[key] = [[], []]
            
            data[key][0].append(speedup_sse)
            data[key][1].append(speedup_sse_block)

        t = Table()
        t.add_header([("input", 2), ("SSE speed-up", 3), ("SSE block speed-up", 3)])
        t.add_header(["size [B]", "loops", "min", "avg", "max", "min", "avg", "max"])

        def stats(numbers):
            s = sum(numbers)
            n = len(numbers)
            return min(numbers), s/n, max(numbers)

        for size, loops in sorted(data, key=lambda t: t[0]):
            
            key = size, loops

            sse = stats(data[key][0])
            sse_block = stats(data[key][1])

            t.add_row([
                '{:,}'.format(size),
                '%d' % loops,

                '%0.2f' % sse[0],
                '%0.2f' % sse[1],
                '%0.2f' % sse[2],

                '%0.2f' % sse_block[0],
                '%0.2f' % sse_block[1],
                '%0.2f' % sse_block[2],
            ])
        
        return t

Exemplo n.º 48

0

Exibir arquivo

def gen_data(gdef, prof_data, op_table, devices, inter=2810, intra=2810):
    edge_link = [], []
    link_feats = []
    device_feats = [[time_ratio / 10, memory / 10_000_000_000]
                    for _, time_ratio, memory in devices]
    tasks = {}
    for i, (name, *_) in enumerate(devices):
        task = re.search("task:(\d+)/", name)[1]
        if task in tasks:
            for other in tasks[task]:
                edge_link[0].append(i)
                edge_link[1].append(other)
                edge_link[0].append(other)
                edge_link[1].append(i)
                link_feats.append([0, intra / 100_000, math.log(intra) / 10])
                link_feats.append([0, intra / 100_000, math.log(intra) / 10])
            tasks[task].append(i)
        else:
            tasks[task] = [i]
    for task, devs in tasks.items():
        for dev in devs:
            for another_task, other_devs in tasks.items():
                if another_task != task:
                    for another_dev in other_devs:
                        edge_link[0].append(dev)
                        edge_link[1].append(another_dev)
                        edge_link[0].append(another_dev)
                        edge_link[1].append(dev)
                        link_feats.append(
                            [1, inter / 100_000,
                             math.log(inter) / 10])
                        link_feats.append(
                            [1, inter / 100_000,
                             math.log(inter) / 10])
    # bandwidth = [x for _, x, _ in link_feats]
    # tgroups = k_spanning_tree(g, bandwidth, 2) + k_spanning_tree(g, bandwidth, 4) + [[0]] + [list(range(len(devices)))]

    base_nccl_model = [
        0.043420241077615454, 368.2013618677043, 0.27766802543921265,
        211.91926070037152
    ]
    nccl_models = {}
    dgroups = groupby(devices,
                      key=lambda x: re.search("task:(\d+)/", x[0])[1],
                      value=lambda x: x[0])

    for task, devs in dgroups.items():
        nccl_models[','.join(
            sorted(devs))] = [x * 2810 / intra for x in base_nccl_model]

    for tasks in (t for i in range(2,
                                   len(dgroups) + 1)
                  for t in itertools.combinations(dgroups.keys(), i)):
        devs = [
            dgroups[t][0] for t in tasks
        ]  # the first (alphabet order) device is the leader of the task
        nccl_models[','.join(
            sorted(devs))] = [x * 2810 / inter for x in base_nccl_model]

    op_types = []
    for node in gdef.node:
        if node.op not in op_table:
            op_table[node.op] = len(op_table)
        op_types.append(op_table[node.op])
    op_feats = [[
        np.mean(prof_data[(node.name, nrep)]) / 10_000 for nrep in (1, 2, 4, 8)
    ] for node in gdef.node]

Exemplo n.º 49

0

Exibir arquivo

Arquivo: plot_fit.py Projeto: xrf/OngoingPapers

def plot_fits(plot,
              data,
              get_fit_range,
              badness_threshold,
              x_col,
              x_label,
              y_col,
              y_label,
              absdydx_label,
              title_cols,
              get_title,
              get_fn,
              color_cols,
              get_color,
              get_color_label,
              get_dmc,
              dmc_label,
              dmc_yerr_col=None):
    # continuous x range for plotting continuous functions
    x_c = np.linspace(data[x_col].min() - 1, data[x_col].max() + 1, 250)
    [(title_key, gg)] = utils.groupby(data, title_cols)
    fig, ax = plt.subplots(2)
    fig.set_size_inches(8, 10) # otherwise the text will get obscured
    y_range = np.array([np.nan, np.nan])
    fit_results = {}
    for color_key, g in utils.groupby(gg, color_cols):
        logging.info(f"plot_fits: method: {color_key['method']}")
        color = get_color(color_key["method"])
        label = get_color_label(color_key["method"])
        g = g.sort_values([x_col])
        d = g.rename(columns={x_col: "x", y_col: "y"})
        deriv_d = differentiate(d, "x", "y", "dydx")
        x = deriv_d["x"]
        dydx = deriv_d["dydx"]
        ax[0].plot(x, abs(dydx), "x", label=label, color=color)
        ax[1].plot(d["x"], d["y"], "x", label=label, color=color)
        utils.update_range(y_range, d["y"])

        fit_range = get_fit_range(color_key["method"])
        fit_range = (max(fit_range[0], d["x"].min()),
                     min(fit_range[1], d["x"].max()))
        if fit_range[1] < fit_range[0]:
            continue
        fit_range = fit_range + np.array([-0.2, 0.2]) # to make it look nicer
        ax[0].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color)
        ax[1].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color)

        d_subset = d[d["x"].between(*fit_range)]
        deriv_subset = deriv_d[deriv_d["x"].between(*fit_range)]
        if len(deriv_subset) < 2:
            continue

        fit = do_fit(d_subset, deriv_subset,
                     badness_threshold=badness_threshold)
        if fit is None:
            continue
        fit_result = {
            "num_points": len(d["x"])
        }
        fit_result.update(fit)
        fit_results[color_key["method"]] = fit_result

        outliers = deriv_subset.loc[fit["logderiv"]["outliers"]]
        ax[0].plot(outliers["x"], abs(outliers["dydx"]), "o",
                   markerfacecolor="none",
                   label="", color="red")
        for stage, result in fit.items():
            if stage == "fixedab":
                continue # fixedab yields the same plot here as logderiv
            a = result["coefficient"]
            b = result["exponent"]
            b_err = result.get("exponent_err", None)
            dydx_c = a * b * x_c ** (b - 1.0)
            ax[0].plot(
                x_c, abs(dydx_c), linestyle=STAGE_TO_LINESTYLE[stage],
                label=label + " " + fit_label(stage, b, b_err),
                color=color)

        for stage, result in fit.items():
            if "constant" not in result:
                continue
            a = result["coefficient"]
            b = result["exponent"]
            c = result["constant"]
            b_err = result.get("exponent_err", None)
            y_c = a * x_c ** b + c
            ax[1].plot(
                x_c, y_c, linestyle=STAGE_TO_LINESTYLE[stage],
                label=label + " " + fit_label(stage, b, b_err),
                color=color)
            if b < 0:
                ax[1].axhline(c, linestyle=":", color=color)
            else:
                logging.warn(f"plot_fits: {stage}.b >= 0: no asymptotic result")
            utils.update_range(y_range, c)

    g = get_dmc(**title_key)
    if len(g):
        y = g[y_col].iloc[0]
        if dmc_yerr_col is not None:
            y_err = g[dmc_yerr_col].iloc[0]
            ax[1].axhspan(y - y_err, y + y_err, alpha=0.4,
                        color="black", label=dmc_label)
            utils.update_range(y_range, [y - y_err, y + y_err])
        # add an extra line to make sure it's visible
        ax[1].axhline(y, alpha=0.4, color="black")
        utils.update_range(y_range, [y])

    ax[0].set_xlabel(x_label)
    ax[0].set_ylabel(absdydx_label)
    ax[0].set_xscale("log")
    ax[0].set_yscale("log")
    ax[0].set_title(get_title(**title_key))
    box = ax[0].get_position()
    ax[0].set_position([box.x0, box.y0, box.width * 0.6, box.height])
    ax[0].legend(bbox_to_anchor=(1, 1.0))

    ax[1].legend()
    ax[1].set_xlabel(x_label)
    ax[1].set_ylabel(y_label)
    ax[1].set_ylim(*utils.expand_range(y_range, 0.05))
    box = ax[1].get_position()
    ax[1].set_position([box.x0, box.y0, box.width * 0.6, box.height])
    ax[1].legend(bbox_to_anchor=(1, 1.0))
    ax[1].get_xaxis().set_major_locator(
        matplotlib.ticker.MaxNLocator(integer=True))

    if plot:
        fn = get_fn(**title_key)
        settings_fn = os.path.join("plot_settings", fn + ".json")
        settings = utils.load_json(settings_fn) or {"ax1": {}, "ax2": {}}
        fit_results_fn = os.path.join("fit_results", fn + ".json")
        def save_settings():
            utils.save_json(settings_fn, settings)
        utils.sync_axes_lims(ax[0], settings["ax1"], save_settings)
        utils.sync_axes_lims(ax[1], settings["ax2"], save_settings)
        utils.savefig(fig, fn)
    return fit_results

Exemplo n.º 50

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

def gurobi(wanted_parts, available_parts, stores, shipping_cost=10.0):
  from gurobipy import Model, GRB, LinExpr

  kf1 = lambda x: (x['item_id'], x['wanted_color_id'])
  kf2 = lambda x: (x['ItemID'], x['ColorID'])

  available_by_store = utils.groupby(available_parts, lambda x: x['store_id'])
  store_by_id = dict( (s['store_id'], s) for s in stores )

  m = Model()

  store_variables     = {}  # store id to variable indicating store is used
  quantity_variables  = []  # list of all lot variables + metadata

  # for every store
  for (store_id, inventory) in available_by_store.iteritems():

    # a variable for if anything was bought from this store. if 1, then pay
    # shipping cost and all store inventory is available; if 0, then don't pay
    # for shipping and every lot in it has 0 quantity available
    store_variables[store_id] = m.addVar(0.0, 1.0, shipping_cost, GRB.BINARY,
                                         "use-store=%s" % (store_id,))

    for lot in inventory:
      store_id = lot['store_id']
      quantity = lot['quantity_available']
      unit_cost= lot['cost_per_unit']
      item_id  = lot['item_id']
      color_id = lot['color_id']

      # a variable for how much to buy of this lot
      v = m.addVar(0.0, quantity, unit_cost, GRB.CONTINUOUS,
                   "quantity-store=%s-item=%s-color=%s" % (store_id, item_id, color_id))

      # keep a list of all lots
      quantity_variables.append({
        'store_id': store_id,
        'item_id': lot['item_id'],
        'wanted_color_id': lot['wanted_color_id'],
        'color_id': lot['color_id'],
        'variable': v,
        'quantity_available': quantity,
        'cost_per_unit': unit_cost
      })

  # actually put the variables into the model
  m.update()

  # for every lot in every store
  for lot in quantity_variables:
    use_store = store_variables[lot['store_id']]
    quantity  = lot['quantity_available']
    unit_cost = lot['cost_per_unit']
    v         = lot['variable']

    # a constraint for how much can be bought
    m.addConstr(LinExpr([1.0, -1 * quantity], [v, use_store]),
                GRB.LESS_EQUAL, 0.0,
                "maxquantity-store=%s-item=%s-color-%d" % (lot['store_id'], lot['item_id'], lot['color_id']))

  # for every wanted lot
  variables_by_id = utils.groupby(quantity_variables, kf1)
  for lot in wanted_parts:
    # a constraint saying amount bought >= wanted amount
    variables = map(lambda x: x['variable'], variables_by_id[kf2(lot)])
    constants = len(variables) * [1.0]
    m.addConstr(LinExpr(constants, variables),
                GRB.GREATER_EQUAL, lot['Qty'],
                "wantedamount-item=%s-color=%s" % (lot['ItemID'], lot['ColorID']))

  # for every store
  variables_by_store = utils.groupby(quantity_variables, lambda x: x['store_id'])
  for (store_id, variables) in variables_by_store.iteritems():
    use_store         = store_variables[store_id]
    minimum_purchase  = store_by_id[store_id]['minimum_buy']

    # a constraint saying "if I purchased from this store, I bought the minimum amount or more"
    constants = [v['cost_per_unit'] for v in variables] + [-1 * minimum_purchase]
    variables = [v['variable'] for v in variables] + [use_store]
    m.addConstr(LinExpr(constants, variables),
                GRB.GREATER_EQUAL, 0.0,
                "minbuy-store=%d" % (store_id,))

  # minimize sum of costs of items bought + shipping costs
  m.setParam(GRB.param.MIPGap, 0.01)  # stop when duality gap <= 1%
  m.optimize()

  # get results
  if m.ObjVal < float('inf'):
    result = []
    for lot in quantity_variables:
      # get variable out
      v = lot['variable']
      del lot['variable']

      # lot variables are continuous, so they might not actually be integral.
      # If they're not, check that they're "almost" integral, so we can just
      # round. Otherwise, print this warning.  According to theory the optimal
      # solution is for all continuous variables to be integral.
      if v.X != int(v.X) and abs(v.X - round(v.X)) > 1e-3:
        print 'Uh oh. Variable %s has value %f. This is a little close for comfort.' % (v.VarName, v.X)

      # save quantity to buy if it's > 0
      lot['quantity'] = int(round(v.X))
      if lot['quantity'] > 0:
        result.append(lot)

    cost = sum(e['quantity'] * e['cost_per_unit'] for e in result)
    store_ids = list(set(e['store_id'] for e in result))
    return [{
      'cost': cost,
      'allocation': result,
      'store_ids': store_ids
    }]
  else:
    print 'No solution :('
    return []

Exemplo n.º 51

0

Exibir arquivo

Arquivo: minimizer.py Projeto: Grimbly/brickrake

def greedy(wanted_parts, price_guide):
  """Greedy Set-Cover algorithm to minimize number of stores purchased from.
  Disregards prices in decisions."""
  result = []

  available_parts = utils.groupby(price_guide, lambda x: x['store_id'])
  available_parts = copy.deepcopy(available_parts)

  wanted_parts = copy.deepcopy(wanted_parts)
  wanted_by_item = utils.groupby(wanted_parts, lambda x: (x['ItemID'], x['ColorID']))

  # while we don't have all the parts we need
  while len(wanted_parts) > 0 and len(available_parts) > 0:
    # calculate how many parts each vendor can cover
    def coverage(inventory):
      kf = lambda x: (x['item_id'], x['wanted_color_id'])
      # only worry about items wanted
      wanted = filter(lambda x: kf(x) in wanted_by_item, inventory)

      # count up how much there is of each (item_id, color_id) pair
      wanted = utils.groupby(wanted, kf)
      wanted = map(lambda x: (x[0], sum(e['quantity_available'] for e in x[1])),
                   wanted.iteritems())

      # count how much of each item I'd buy
      tot = 0
      for (k, v) in wanted:
        if k in wanted_by_item:
          tot += min(wanted_by_item[k][0]['Qty'], v)

      return tot

    coverages = [(k, v, coverage(v)) for (k, v) in available_parts.iteritems()]
    coverages = list(sorted(coverages, key=lambda x: x[2]))

    # use the store that has the most inventory
    next_store, inventory, n_parts = coverages.pop()
    #print 'You can buy %d items from %s' % (n_parts, next_store)
    if n_parts == 0:
      break

    # update the quantities in the wanted parts list
    by_item = utils.groupby(inventory,
                            lambda x: (x['item_id'], x['wanted_color_id']))
    new_wanted_parts = []
    for item in wanted_parts:
      # get all lots from next_store matching item
      item_id = item['ItemID']
      color_id = item['ColorID']
      wanted_qty = item['Qty']

      available = by_item.get((item_id, color_id), [])
      available = list(sorted(available, key=lambda x: -1 * x['cost_per_unit']))

      # keep buying up lots until the wanted_qty is full or the store is bought
      # out
      while wanted_qty > 0 and len(available) > 0:
        next = available.pop()

        amount_to_buy = min(next['quantity_available'], wanted_qty)

        result.append({
          'store_id': next['store_id'],
          'item_id': item_id,
          'wanted_color_id': next['wanted_color_id'],
          'color_id': next['color_id'],
          'quantity_available': next['quantity_available'],
          'cost_per_unit': next['cost_per_unit'],
          'quantity': amount_to_buy,
        })

        wanted_qty -= amount_to_buy

      # this store couldn't fill out our order
      if wanted_qty > 0:
        item['Qty'] = wanted_qty
        new_wanted_parts.append(item)

    # update wanted parts list, remove store from inventory
    wanted_parts = new_wanted_parts
    wanted_by_item = utils.groupby(wanted_parts,
                                   lambda x: (x['ItemID'], x['ColorID']))
    del available_parts[next_store]

    #print 'Wanted parts left: %d' % sum(e['Qty'] for e in wanted_parts)


  if len(wanted_parts) > 0:
    print 'WARNING: there wasn\'t enough availability to buy the following items:'
    print ", ".join(e['ItemName'] for e in wanted_parts)

  cost = sum(e['quantity'] * e['cost_per_unit'] for e in result)
  store_ids = list(set(e['store_id'] for e in result))
  return [{
    'cost': cost,
    'allocation': result,
    'store_ids': store_ids
  }]