def _checkDuplicatesInImport(self, data): addr_index = groupby(data, lambda x: (x.city, x.simc, x.housenumber.replace(' ', '').upper(), x.street)) # remove duplicates closer than 2m for (addr, occurances) in filter(lambda x: len(x[1]) > 1, addr_index.items()): for (a, b) in filter(lambda x: distance(x[0].center, x[1].center) < 2, itertools.combinations(occurances, 2)): # if any two duplicates are closer than 2m, remove from data self.__log.info("Removing duplicate address: %s", a) data.remove(a) # mark duplicates addr_index = groupby(data, lambda x: (x.city, x.simc, x.housenumber.replace(' ', '').upper(), x.street)) for (addr, occurances) in filter(lambda x: len(x[1]) > 1, addr_index.items()): self.__log.warning("Duplicate addresses in import: %s", occurances[0]) uid = uuid.uuid4() for i in occurances: i.addFixme('Duplicate address in import (id: %s)' % (uid,)) if any( map( lambda x: distance(x[0].center, x[1].center) > 100, itertools.combinations(occurances, 2) ) ): self.__log.warning("Address points doesn't fit into 100m circle. Points count: %d", len(occurances)) for i in occurances: i.addFixme('(distance over 100m, points: %d)' % (len(occurances),))
def __init__(self, interp): self.interp = interp self.edges = infer_edges(self.interp) # group edges by head then ruleindex groups = groupby(lambda x: x[0], self.edges) for a in groups: groups[a] = groupby(lambda x: x[1], groups[a]) self.items = groups
def liquid_position(budget, account_filter_keywords=None): import re from collections import OrderedDict from transactions import transactions_by_month from itertools import dropwhile if not account_filter_keywords: account_filter_keywords = non_liquid_account_keywords account_filter_re = '|'.join("({})".format(item) for item in account_filter_keywords) def is_relevant_account(account): return not account.hidden and \ not re.search(account_filter_re, account.accountName) and \ not re.search(account_filter_re, account.accountType) relevant_accounts = { acc.entityId for acc in budget.accounts if is_relevant_account(acc) } by_account = { acc: txs for acc, txs in groupby(budget.transactions, key=lambda t: t.accountId).items() if acc in relevant_accounts } for acc, txs in by_account.items(): by_account[acc] = play_transactions(txs, accountId=acc, decorate=True) months = sorted([mb.month[0:7] for mb in budget.monthlyBudgets]) result = OrderedDict({m: Object() for m in months}) for acc, txs in by_account.items(): by_month = groupby(txs, key=transaction_month) balance = 0 for month in months: if by_month.get(month): balance = by_month[month][-1].balance print('--46', month, balance, len(by_month.get(month, []))) pprint(by_month.get(month, [])) result[month][acc.lookup().accountName] = balance for month_result in result.values(): month_result['total'] = sum(month_result.values()) return OrderedDict(dropwhile(lambda m: m[1].total == 0, result.items()))
def view_title(request,title_id): title=get_object_or_404(models.Title,pk=title_id) relations_member=models.Relation.objects.filter(title=title.id) #title is part of a relation (child) relations_parent=models.Relation.objects.filter(parent=title.id) #title is part of a relation (parent) if relations_parent and relations_parent[0].relation == 'T': relations_season = utils.groupby(relations_parent, key=lambda x:getattr(x,'tvseason')) else: relations_season = {} directors=title.cast_set.filter(role='D') actors=title.cast_set.filter(role='A').order_by('id') akas=models.Aka.objects.filter(title=title.id) if request.user.is_anonymous(): userdata=None else: userdata=title.get_userdata(request.user) #print userdata[0].tag data={'title':title, 'relations_member':relations_member, 'relations_parent':relations_parent, 'relations_season':relations_season, 'directors':directors, 'userdata':userdata, 'actors':actors, 'akas':akas } return data
def group_with_topk_nodes(): from utils import group_around_topk_costs from tge import TGE base_groups = TGE(gdef, [dev for dev, _, _ in devices]).get_groups() id_list = group_around_topk_costs(gdef, base_groups, prof_data, 19) return list(groupby(enumerate(id_list), key=cadr, value=car).values())
def inner_join(courses: Iterable[Course], modules: Iterable[Module] ) -> Iterable[Module]: modules = {item['module_id']:item for item in modules} courses = ((module_id, item) for item in courses for module_id in item['modules'] if module_id in modules) result = {k:merge_course(g, modules[k]) for k,g in utils.groupby(courses, key=lambda x:x[0])} for k,v in list(result.items()): if len(v["details"]) < 1: continue modtitle = v["details"][1]["details"] if " nur Teilnahme" in modtitle: del result[k] continue if not (len(v["content"]) > 1 and all(c.split(" ", 1)[0][-3:] in ["-ps","-se","-ku"] for c in v["content"])): continue del result[k] for i,c in enumerate(v["content"]): id,name = c.split(" ",1) newtitle = id + " " + modtitle + ". " + name newmodid = k+"-"+str(i).zfill(2) #print(newmodid, newtitle) result[newmodid] = {**v, "module_id": newmodid, "content": {newtitle:{**v["content"][c], "title":newtitle}} } return result
def full_transposon_treatment(seq,overlap,gap,minlength,fastaout,evalue=None, fname=None): '''This is where it all comes together. This takes a sequence of hits, assumed to constitute an entire a blast search between one transposon and one fly genome. (See note below.) It performs the main process of this module -- i.e., creating the input for a multiple-alignment -- and dumps that information in FASTA format to *fastaout*, which must be a writeable fasta object (see module *fasta*). The user is naturally responsible for closing both, if appropriate (as it is in almost all cases). NOTE: Generally it is best to have *seq* come from the function hitsfromcsv(). This can be done implicitly by giving None as the first argument, in which case *f* is expected to be a file object or filename to be given to hitstocsv(). ''' if None not in (seq,fname): raise Error("Cannot give both seq and fname arguments") elif seq is None: seq = hitsfromcsv(fname) for s,hits in utils.groupby(seq,key=_attrget('SSEQID')).iteritems(): for island in makeislands(hits,gap): singles,nests = classifyrecords(island,overlap) nests = [stratify(N,minlength) for N in nests] if singles or any(nests): fastaout.writeentries(resolve_query_overlap(singles,nests,overlap)) else: raise Error('No records result from file {!r}'.format(fname))
def fit(self, X, Y): self.category_means = {} ys_grouped_by_x = groupby(zip(X,Y), keyfunc=lambda x_y: x_y[0], mapfunc=lambda x_y: x_y[1]) for (x, ys) in ys_grouped_by_x.items(): self.category_means[x] = mean(ys) for (k,v) in self.overrides_dict.items(): self.category_means[k] = v
def fetchTiles(self): bbox = self.getBbox2180() ret = [] for i in self.divideBbox(*bbox): url = GUGiK.__base_url + ",".join(map(str, i)) self.__log.info("Fetching from EMUIA: %s", url) soup = lxml.etree.fromstring(urlopen(url).read()) doc = soup.find('{http://www.opengis.net/kml/2.2}Document' ) # be namespace aware if doc is not None: ret.extend( filter( self._isEligible, map( self._convertToAddress, doc.iterchildren( '{http://www.opengis.net/kml/2.2}Placemark')))) else: raise ValueError( 'No data returned from GUGiK possibly to wrong scale. Check __MAX_BBOX_X, __MAX_BBOX_Y, HEIGHT and WIDTH' ) # take latest version for each point (version is last element after dot in id_) ret = [ max(v, key=lambda z: z.id_) for v in groupby(ret, lambda z: z.id_.rsplit('.', 1)[0]).values() ] return ret
def _checkDuplicatesInImport(self, data): super(GUGiK, self)._checkDuplicatesInImport(data) addr_index = groupby( filter(lambda x: 'Duplicate address in import' in x.fixmes, data), lambda x: (x.city, x.housenumber.replace(' ', '').upper(), x.street)) for (addr, occurances) in filter(lambda x: len(x[1]) > 1, addr_index.items()): for n in range(len(occurances) - 1): addr1 = occurances[n] addr2 = occurances[n + 1] if distance(addr1.center, addr2.center) < 10: # addresses are closer than 10m # remove first, move second to average of these two self.__log.info( "Merging duplicate addresses: %s, position: %s and %s" % (addr1, addr1.getLatLon(), addr2.getLatLon())) data.remove(addr1) l1 = addr1.getLatLon() l2 = addr2.getLatLon() addr2.location = { 'lat': (l1[0] + l2[0]) / 2, 'lon': (l1[1] + l2[1]) / 2 }
def load_bsx(f): """Parse all items from a Brickstore Parts List XML file (*.bsx) Parameters ---------- f : file-like object file containing XML contents""" root = etree.parse(f) items = [] for item in root.findall('.//Item'): item_dict = {} for child in item.getchildren(): tag = child.tag value = CONVERT.get(child.tag, lambda x: x)(child.text) item_dict[tag] = value items.append(item_dict) # sometimes there are multiple wanted lots with the same ItemID and ColorID. # Consolidate them together now. by_item = utils.groupby(items, lambda x: (x['ItemID'], x['ColorID'])) result = [] for ((item_id, color_id), same) in by_item.iteritems(): prototype = same[0] prototype['Qty'] = sum(e['Qty'] for e in same) result.append(prototype) return result
def process_data(self, data): # all data is not grouped? day = utils.groupby(data, xkey=lambda x: x.time.weekday())[self.weekday] day = utils.collect_total(day, True) for m in day: day[m] = day[m] # watt i medel self.data = day
def _checkDuplicatesInImport(self, data): addr_index = groupby( data, lambda x: (x.city, x.housenumber.replace(' ', '').upper(), x.street)) for (addr, occurances) in filter(lambda x: len(x[1]) > 1, addr_index.items()): self.__log.warning("Duplicate addresses in import: %s", addr) for i in occurances: i.addFixme('Duplicate address in import')
def split_by_distribution(self, collection): result = [] bynum = lambda item: (item.distribution_name) tmp = groupby(collection, bynum) for distribution_name, collection in tmp.iteritems(): res = self.split_by_parameters(distribution_name, collection) result.append((get_distribution_title(distribution_name), res)) return result
def prepare_table(self, procedures): keyfun = lambda item: (item.size, item.loops) tmp = groupby(procedures, keyfun) data = [] for (size, loops), items in tmp.iteritems(): def get_time(procedure): for item in items: if item.procedure == procedure: return item.time raise KeyError("Procedure '%s' not found" % procedure) data.append(( size, loops, get_time("scalar"), get_time("sse"), get_time("sse-block"), )) data.sort(key=lambda t: t[0]) # sort by size t = Table() t.add_header([("input", 2), "scalar", ("SSE", 2), ("SSE block", 2)]) t.add_header([ "size [B]", "loops", "time [us]", "time [us]", "speed-up", "time [us]", "speed-up" ]) for item in data: t0 = item[2] t1 = item[3] t2 = item[4] if t0 < 10 and t1 < 10 and t2 < 10: # don't fool people when all measurements are single-digit numbers speedup_sse = '---' speedup_sse_block = '---' else: speedup_sse = '%0.2f' % (float(t0) / t1) speedup_sse_block = '%0.2f' % (float(t0) / t2) t.add_row([ '{:,}'.format(item[0]), '%d' % item[1], '%d' % item[2], '%d' % item[3], speedup_sse, '%d' % item[4], speedup_sse_block, ]) return t
def unsatisified(wanted_list, allocation): """What do we still need to buy?""" kf1 = lambda x: (x['item_id'], x['wanted_color_id']) kf2 = lambda x: (x['ItemID'], x['ColorID']) wanted_by_item = utils.groupby(copy.deepcopy(wanted_list), kf2) wanted_by_item = dict( (k, sum(e['Qty'] for e in v)) for (k, v) in wanted_by_item.iteritems()) for item in allocation: wanted_by_item[kf1(item)] -= item['quantity'] return dict((k, v) for (k, v) in wanted_by_item.iteritems() if v > 0)
def __init__(self, path): with open(path, 'rt') as f: self.raw_data = load(f) # group by separators distribution bysep = lambda item: item.sep_distribution self.report = [] for sep, collection in groupby(self.raw_data, bysep).iteritems(): ret = self.split_by_distribution(collection) self.report.append((get_separator_title(sep), ret))
def _checkMixedScheme(self, data): dups = groupby(data, lambda x: x.simc, lambda x: bool(x.street)) dups_count = dict((k, len(_filterOnes(v))) for k, v in dups.items()) dups = dict((k, len(_filterOnes(v))/len(v)) for k, v in dups.items()) dups = dict((k,v) for k, v in filter(lambda x: 0 < x[1] and x[1] < 1, dups.items())) for i in filter( lambda x: not bool(x.street) and x.simc in dups.keys(), data ): i.addFixme('Mixed addressing scheme in city - with streets and without. %.1f%% (%d) with streets.' % (dups[i.simc]*100, dups_count[i.simc]))
def unsatisified(wanted_list, allocation): """What do we still need to buy?""" kf1 = lambda x: (x['item_id'], x['wanted_color_id']) kf2 = lambda x: (x['ItemID'], x['ColorID']) wanted_by_item = utils.groupby(copy.deepcopy(wanted_list), kf2) wanted_by_item = dict( (k, sum(e['Qty'] for e in v)) for (k, v) in wanted_by_item.iteritems() ) for item in allocation: wanted_by_item[kf1(item)] -= item['quantity'] return dict( (k, v) for (k, v) in wanted_by_item.iteritems() if v > 0 )
def split_by_parameters(self, distribution_name, collection): byparam = lambda item: item.num_distribution result = [] for key, collection in groupby(collection, byparam).iteritems(): table = self.prepare_table(collection) ret = get_num_distribution_parameters(distribution_name, key) result.append((ret.title, table, ret.weight)) result.sort(key=lambda row: row[-1]) return [item[:2] for item in result]
def is_valid_solution(wanted_parts, allocation, stores=None): """Check if the allocation is a valid solution 1) all wanted parts are bought 2) the amount to buy isn't more than is available 3) we make the minimum purchase for all used stores """ kf1 = lambda x: (x['item_id'], x['wanted_color_id']) kf2 = lambda x: (x['ItemID'], x['ColorID']) # for each wanted part allocation_by_id = utils.groupby(allocation, kf1) for lot in wanted_parts: # did we buy enough? bought = allocation_by_id.get(kf2(lot), []) if sum(e['quantity'] for e in bought) < lot['Qty']: return False # for each bought lot for lot in allocation: # did we buy <= the amount available? if lot['quantity'] > lot['quantity_available']: return False if stores is not None: # for each store allocation_by_store = utils.groupby(allocation, lambda x: x['store_id']) store_by_id = dict((e['store_id'], e) for e in stores) for (store_id, lots) in allocation_by_store.iteritems(): # did we buy at least the minimum purchase? if store_id in store_by_id: store = store_by_id[store_id] price = sum(e['cost_per_unit'] * e['quantity'] for e in lots) if price < store['minimum_buy']: return False else: return False return True
def save_xml_per_vendor(folder, solution, stores): '''Save a BrickLink XML with a Wanted List for each vendor''' stores = utils.groupby(stores, lambda x: x['store_id']) allocation = utils.groupby(solution['allocation'], lambda x: x['store_id']) # for each store for (store_id, group) in sorted(allocation.iteritems()): store = stores[store_id][0] name = store['seller_name'] # get wanted list id prompt = ("Create a new 'Wanted List' named '%s' and" + " type its ID here: ") % (name, ) wanted_list = raw_input(prompt) # save that id onto the lot for lot in group: lot['wanted_list_id'] = wanted_list # write file allocation = utils.flatten(allocation.values()) with open(folder, 'w') as f: save_xml(f, allocation)
def is_valid_solution(wanted_parts, allocation, stores=None): """Check if the allocation is a valid solution 1) all wanted parts are bought 2) the amount to buy isn't more than is available 3) we make the minimum purchase for all used stores """ kf1 = lambda x: (x['item_id'], x['wanted_color_id']) kf2 = lambda x: (x['ItemID'], x['ColorID']) # for each wanted part allocation_by_id = utils.groupby(allocation, kf1) for lot in wanted_parts: # did we buy enough? bought = allocation_by_id.get(kf2(lot), []) if sum(e['quantity'] for e in bought) < lot['Qty']: return False # for each bought lot for lot in allocation: # did we buy <= the amount available? if lot['quantity'] > lot['quantity_available']: return False if stores is not None: # for each store allocation_by_store = utils.groupby(allocation, lambda x: x['store_id']) store_by_id = dict( (e['store_id'], e) for e in stores ) for (store_id, lots) in allocation_by_store.iteritems(): # did we buy at least the minimum purchase? if store_id in store_by_id: store = store_by_id[store_id] price = sum(e['cost_per_unit'] * e['quantity'] for e in lots) if price < store['minimum_buy']: return False else: return False return True
def _checkMixedScheme(self, data): dups = groupby(data, lambda x: x.simc, lambda x: bool(x.street)) dups_count = dict((k, len(_filterOnes(v))) for k, v in dups.items()) dups = dict((k, len(_filterOnes(v)) / len(v)) for k, v in dups.items()) dups = dict( (k, v) for k, v in filter(lambda x: 0 < x[1] and x[1] < 1, dups.items())) for i in filter(lambda x: not bool(x.street) and x.simc in dups.keys(), data): i.addFixme( 'Mixed addressing scheme in city - with streets and without. %.1f%% (%d) with streets.' % (dups[i.simc] * 100, dups_count[i.simc]))
def covers(wanted_parts, available_parts): """True if the given stores can cover all desired items""" kf = lambda x: (x['item_id'], x['wanted_color_id']) available_parts = utils.groupby(available_parts, kf) for item in wanted_parts: item_id = item['ItemID'] color_id = item['ColorID'] quantity = item['Qty'] available = available_parts.get((item_id, color_id), []) inventory = sum(e['quantity_available'] for e in available) if inventory < quantity: return False return True
def __init__(self, path): with open(path, 'rt') as f: self.raw_data = load(f) bydistribution = lambda item: item.distribution_name bysep = lambda item: (item.sep_distribution, item.distribution_name) self.report = [] for (sep, distribution_name), collection in groupby(self.raw_data, bysep).iteritems(): ret = self.prepare_table(collection) self.report.append(( get_separator_title(sep), get_distribution_title(distribution_name), ret ))
def coverage(inventory): kf = lambda x: (x['item_id'], x['wanted_color_id']) # only worry about items wanted wanted = filter(lambda x: kf(x) in wanted_by_item, inventory) # count up how much there is of each (item_id, color_id) pair wanted = utils.groupby(wanted, kf) wanted = map(lambda x: (x[0], sum(e['quantity_available'] for e in x[1])), wanted.iteritems()) # count how much of each item I'd buy tot = 0 for (k, v) in wanted: if k in wanted_by_item: tot += min(wanted_by_item[k][0]['Qty'], v) return tot
def coverage(inventory): kf = lambda x: (x['item_id'], x['wanted_color_id']) # only worry about items wanted wanted = filter(lambda x: kf(x) in wanted_by_item, inventory) # count up how much there is of each (item_id, color_id) pair wanted = utils.groupby(wanted, kf) wanted = map( lambda x: (x[0], sum(e['quantity_available'] for e in x[1])), wanted.iteritems()) # count how much of each item I'd buy tot = 0 for (k, v) in wanted: if k in wanted_by_item: tot += min(wanted_by_item[k][0]['Qty'], v) return tot
def add_players(self, players, player_filter=None): # returns a new list in descending order by player[self.optimize_key] self.players = sorted(players, key=lambda p: p.__dict__[self.optimize_key], reverse=True) # filter out unwanted players based on function argument if provided, else class player filter self.players_filtered = filter( self.player_filter if player_filter is None else player_filter, self.players) # returns object with keys for each position, values a list of players at that position self.players_by_pos = utils.groupby('position', self.players_filtered) # checks that we have players in the pool for each position required if not all(k in self.players_by_pos for k in self.restrictions.keys()): raise Exception( '[ERROR] Player list (players=%d) not sufficient to meet restrictions. Missing = %s.' % (len(players), [ k for k in self.restrictions if k not in self.players_by_pos ]))
def inference(img, state_path, device=0): model = unet().cuda(device) state = torch.load(state_path) model.load_state_dict(state) binary = np.zeros(img.shape) stride, patch_size = 60, 60 with cp.cuda.Device(device): img = cp.asarray(img.astype(np.uint16)) pos_x = np.arange(0, 2048, stride) pos_y = np.arange(0, 2048, stride) vx, vy = np.meshgrid(pos_x, pos_y) pos = cp.asarray(np.stack([vx, vy]).reshape((2, -1)).transpose([1, 0])) X, pos = preprocess(img, pos, half_size=patch_size // 2, device=device) X = X.transpose([2, 0, 1]) indices = np.arange(len(X)).tolist() groups = groupby(indices, 100, key='mini') out_list = [] for index, group in enumerate(groups): out = X[group] _pos = pos[group] out = Variable( torch.from_numpy(np.expand_dims(out, 1).astype( np.float32)).cuda(device)) R = model(out) R = R.max(1)[1] patch = R.cpu().numpy().transpose([1, 2, 0]) binary[patch_interface(_pos[:, 0], _pos[:, 1], patch_size // 2)] = patch binary = (binary * 255).astype('uint8') return binary
def __init__(self, path): with open(path, 'rt') as f: data = list(load(f)) bysize = lambda item: item.size data = groupby(data, bysize) self.report = [] for size in sorted(data): collection = data[size] sortby = lambda item: (item.distribution_name, item. num_distribution, item.sep_distribution) collection.sort(key=sortby) res = [] for item in collection: title = self.get_title(item) table = self.prepare_table(item) res.append((title, table)) self.report.append(('Input size %d bytes' % size, res))
def brute_force(wanted_parts, price_guide, k): """Enumerate all possible combinations of k stores""" by_store = utils.groupby(price_guide, lambda x: x['store_id']) results = [] for selected_stores in itertools.combinations(by_store.keys(), k): # get items sold by these stores only inventory = utils.flatten( by_store[s] for s in selected_stores ) if covers(wanted_parts, inventory): # calculate minimum cost to buy everything using these stores cost, allocation = min_cost(wanted_parts, inventory) results.append({ 'cost': cost, 'allocation': allocation, 'store_ids': selected_stores }) #print 'Solution: k=%d, cost=%8.2f, store_ids=%40s' % (k, cost, selected_stores) else: #print 'Unable to fill quote using store_ids=%40s' % (selected_stores,) pass return results
def brute_force(wanted_parts, price_guide, k): """Enumerate all possible combinations of k stores""" by_store = utils.groupby(price_guide, lambda x: x['store_id']) results = [] for selected_stores in itertools.combinations(by_store.keys(), k): # get items sold by these stores only inventory = utils.flatten(by_store[s] for s in selected_stores) if covers(wanted_parts, inventory): # calculate minimum cost to buy everything using these stores cost, allocation = min_cost(wanted_parts, inventory) results.append({ 'cost': cost, 'allocation': allocation, 'store_ids': selected_stores }) #print 'Solution: k=%d, cost=%8.2f, store_ids=%40s' % (k, cost, selected_stores) else: #print 'Unable to fill quote using store_ids=%40s' % (selected_stores,) pass return results
def fetchTiles(self): bbox = self.getBbox2180() ret = [] for i in self.divideBbox(*bbox): url = GUGiK.__base_url+",".join(map(str, i)) self.__log.info("Fetching from EMUIA: %s", url) opener = get_ssl_no_verify_opener() soup = lxml.etree.fromstring(opener.open(url).read()) doc = soup.find('{http://www.opengis.net/kml/2.2}Document') # be namespace aware if doc is not None: ret.extend(filter( self._isEligible, map(self._convertToAddress, doc.iterchildren('{http://www.opengis.net/kml/2.2}Placemark')) ) ) else: raise ValueError('No data returned from GUGiK possibly to wrong scale. Check __MAX_BBOX_X, __MAX_BBOX_Y, HEIGHT and WIDTH') # take latest version for each point (version is last element after dot in id_) ret = [max(v, key=lambda z: z.id_) for v in groupby(ret, lambda z: z.id_.rsplit('.', 1)[0]).values()] return ret
def min_cost(wanted_parts, available_parts): """Greedily minimize the cost of all wanted parts""" kf = lambda x: (x['item_id'], x['wanted_color_id']) available_parts = utils.groupby(available_parts, kf) result = [] cost = 0.0 for item in wanted_parts: item_id = item['ItemID'] color_id = item['ColorID'] matching = available_parts.get((item_id, color_id), []) matching = list(sorted(matching, key=lambda x: -1 * x['cost_per_unit'])) # take as much inventory as possible, starting with the lowest price, until # the requested quantity is filled n_remaining = item['Qty'] while n_remaining > 0: if len(matching) == 0: print 'WARNING: couldn\'t find enough inventory to purchase %s' % ( item['ItemName'], ) cost = float('inf') break next = matching.pop() amount = min(n_remaining, next['quantity_available']) r = { 'item_id': next['item_id'], 'color_id': next['color_id'], 'store_id': next['store_id'], 'quantity': amount, 'cost_per_unit': next['cost_per_unit'] } result.append(r) n_remaining -= amount cost += amount * next['cost_per_unit'] return (cost, result)
def load_xml(f): """Parse a BrickLink XML file""" root = etree.parse(f) items = [] for item in root.findall('.//ITEM'): item_dict = {} for child in item.getchildren(): tag = TRANSLATIONS.get(child.tag, child.tag) value = CONVERT.get(tag, lambda x: x)(child.text) item_dict[tag] = value item_dict['ItemName'] = item_dict['ItemID'] item_dict['ColorName'] = color.name(item_dict['ColorID']) items.append(item_dict) # sometimes there are multiple wanted lots with the same ItemID and ColorID. # Consolidate them together now. by_item = utils.groupby(items, lambda x: (x['ItemID'], x['ColorID'])) result = [] for ((item_id, color_id), same) in by_item.iteritems(): prototype = same[0] prototype['Qty'] = sum(e['Qty'] for e in same) result.append(prototype) return result
def generate_page(data: List[Module2]) -> str: def genmodule(x: Module2) -> str: return stache(""" <div class=flex> <label><input id='checker-{{id}}' class=checker type=checkbox></label> <details class=module-wrapper> <summary id='module-{{id}}' class='module box-b box-b-{{id}}'> <span>{{credits}}cp</span> <span>{{title_short}}</span> <span title='{{title}}'>{{title}}</span> <span title='{{owner}}'>{{owner_short}}</span> <!-- <span title='{{language}}'>{{{language}}}</span> --> <div class=toggler-show></div> </summary> <div id='details-{{id}}' class=details>{{#details}} <b>{{title}}</b><br> {{#details}}{{{.}}}<br>{{/details}} <!-- "> --></a>{{/details}}</div> </details> </div>""", x) #type: ignore def gencategory(title: str, modules: str) -> str: return stache(""" <details class=category> <summary> <div class=toggler-show></div> <b>{{title}}</b> </summary> <clear></clear> {{{modules}}} </details> <br>""", {"title": title, "modules": modules}) # type: ignore result = "" for c, modules in utils.groupby(data, lambda x: x["category"]): str_modules = "\n\n".join(genmodule(m) for m in modules) result += gencategory(c, str_modules) return result
def min_cost(wanted_parts, available_parts): """Greedily minimize the cost of all wanted parts""" kf = lambda x: (x['item_id'], x['wanted_color_id']) available_parts = utils.groupby(available_parts, kf) result = [] cost = 0.0 for item in wanted_parts: item_id = item['ItemID'] color_id = item['ColorID'] matching = available_parts.get((item_id, color_id), []) matching = list(sorted(matching, key=lambda x: -1 * x['cost_per_unit'])) # take as much inventory as possible, starting with the lowest price, until # the requested quantity is filled n_remaining = item['Qty'] while n_remaining > 0: if len(matching) == 0: print 'WARNING: couldn\'t find enough inventory to purchase %s' % (item['ItemName'],) cost = float('inf') break next = matching.pop() amount = min(n_remaining, next['quantity_available']) r = { 'item_id': next['item_id'], 'color_id': next['color_id'], 'store_id': next['store_id'], 'quantity': amount, 'cost_per_unit': next['cost_per_unit'] } result.append(r) n_remaining -= amount cost += amount * next['cost_per_unit'] return (cost, result)
def main(path): with open(path, 'rt') as f: keyfun = lambda item: (item.size, item.distribution_name) data = groupby(load_file(f), keyfun) header1 = [("", 3), ("speedup over %s procedure" % reference_procedure, len(procedures) * 3)] header2 = [("", 3)] header3 = ["size [B]", "distribution", "samples"] for proc in procedures: header2.append((proc, 3)) header3.extend(["min", "avg", "max"]) table = Table() table.add_header(header1) table.add_header(header2) table.add_header(header3) for key in sorted(data): collection = data[key] size, name, stats = calculate_speedup_statistics(collection) row = [] row.append('%d' % size) row.append(get_distribution_title(name)) row.append('%d' % len(collection)) for proc in procedures: row.append('%0.2f' % stats[proc][0]) row.append('%0.2f' % stats[proc][1]) row.append('%0.2f' % stats[proc][2]) table.add_row(row) print table
def plot_fits(plot, data, get_fit_range, badness_threshold, x_col, x_label, y_col, y_label, absdydx_label, title_cols, get_title, get_fn, color_cols, get_color, get_color_label, get_dmc, dmc_label, dmc_yerr_col=None, maxfev=0): # continuous x range for plotting continuous functions x_c = np.linspace(data[x_col].min() - 1, data[x_col].max() + 1, 250) [(title_key, gg)] = utils.groupby(data, title_cols) fig, ax = plt.subplots(2) fig.set_size_inches(8, 10) # otherwise the text will get obscured y_range = np.array([np.nan, np.nan]) fit_results = {} for color_key, g in utils.groupby(gg, color_cols): logging.info(f"plot_fits: method: {color_key['method']}") color = get_color(color_key["method"]) label = get_color_label(color_key["method"]) g = g.sort_values([x_col]) d = g.rename(columns={x_col: "x", y_col: "y"}) deriv_d = differentiate(d, "x", "y", "dydx") x = deriv_d["x"] dydx = deriv_d["dydx"] ax[0].plot(x, abs(dydx), "x", label=label, color=color) ax[1].plot(d["x"], d["y"], "x", label=label, color=color) utils.update_range(y_range, d["y"]) fit_range = get_fit_range(color_key["method"]) fit_range = (max(fit_range[0], d["x"].min()), min(fit_range[1], d["x"].max())) if fit_range[1] < fit_range[0]: continue fit_range = fit_range + np.array([-0.2, 0.2]) # to make it look nicer ax[0].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color) ax[1].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color) d_subset = d[d["x"].between(*fit_range)] deriv_subset = deriv_d[deriv_d["x"].between(*fit_range)] if len(deriv_subset) < 2: continue fit = do_fit(d_subset, deriv_subset, badness_threshold=badness_threshold, maxfev=maxfev) if fit is None: continue fit_result = {"num_points": len(d["x"])} fit_result.update(fit) fit_results[color_key["method"]] = fit_result outliers = deriv_subset.loc[fit["logderiv"]["outliers"]] ax[0].plot(outliers["x"], abs(outliers["dydx"]), "o", markerfacecolor="none", label="", color="red") for stage, result in fit.items(): if stage == "fixedab": continue # fixedab yields the same plot here as logderiv a = result["coefficient"] b = result["exponent"] b_err = result.get("exponent_err", None) dydx_c = a * b * x_c**(b - 1.0) ax[0].plot(x_c, abs(dydx_c), linestyle=STAGE_TO_LINESTYLE[stage], label=label + " " + fit_label(stage, b, b_err), color=color) for stage, result in fit.items(): if "constant" not in result: continue a = result["coefficient"] b = result["exponent"] c = result["constant"] b_err = result.get("exponent_err", None) y_c = a * x_c**b + c ax[1].plot(x_c, y_c, linestyle=STAGE_TO_LINESTYLE[stage], label=label + " " + fit_label(stage, b, b_err), color=color) if b < 0: ax[1].axhline(c, linestyle=":", color=color) else: logging.warn(f"plot_fits: {label}: {stage}.b >= 0: " "no asymptotic result") utils.update_range(y_range, c) g = get_dmc(**title_key) if len(g): y = g[y_col].iloc[0] if dmc_yerr_col is not None: y_err = g[dmc_yerr_col].iloc[0] ax[1].axhspan(y - y_err, y + y_err, alpha=0.4, color="black", label=dmc_label) utils.update_range(y_range, [y - y_err, y + y_err]) # add an extra line to make sure it's visible ax[1].axhline(y, alpha=0.4, color="black") utils.update_range(y_range, [y]) ax[0].set_xlabel(x_label) ax[0].set_ylabel(absdydx_label) ax[0].set_xscale("log") ax[0].set_yscale("log") ax[0].set_title(get_title(**title_key)) box = ax[0].get_position() ax[0].set_position([box.x0, box.y0, box.width * 0.6, box.height]) ax[0].legend(bbox_to_anchor=(1, 1.0)) ax[1].legend() ax[1].set_xlabel(x_label) ax[1].set_ylabel(y_label) ax[1].set_ylim(*utils.expand_range(y_range, 0.05)) box = ax[1].get_position() ax[1].set_position([box.x0, box.y0, box.width * 0.6, box.height]) ax[1].legend(bbox_to_anchor=(1, 1.0)) ax[1].get_xaxis().set_major_locator( matplotlib.ticker.MaxNLocator(integer=True)) if plot: fn = get_fn(**title_key) settings_fn = os.path.join("plot_settings", fn + ".json") settings = utils.load_json(settings_fn) or {"ax1": {}, "ax2": {}} fit_results_fn = os.path.join("fit_results", fn + ".json") def save_settings(): utils.save_json(settings_fn, settings) utils.sync_axes_lims(ax[0], settings["ax1"], save_settings) utils.sync_axes_lims(ax[1], settings["ax2"], save_settings) utils.savefig(fig, fn) return fit_results
def add_players(self, players, player_filter=None): self.players = sorted(players, key=lambda p:p.__dict__[self.optimize_key], reverse=True) self.players_filtered = filter(self.player_filter if player_filter is None else player_filter, self.players) self.players_by_pos = utils.groupby('position', self.players_filtered) if not all(k in self.players_by_pos for k in self.restrictions.keys()): raise Exception('[ERROR] Player list (players=%d) not sufficient to meet restrictions. Missing = %s.' % (len(players), [k for k in self.restrictions if k not in self.players_by_pos]))
def transactions_by_month(transactions): from collections import OrderedDict return OrderedDict( groupby(sorted(transactions, key=lambda t: t.date), key=transaction_month))
def get_closed_ways(self, ways): if not ways: return [] ways = list(ways) way_by_first_node = utils.groupby(ways, lambda x: x._raw['nodes'][0]) way_by_last_node = utils.groupby(ways, lambda x: x._raw['nodes'][-1]) ret = [] cur_elem = ways[0] node_ids = [] def _get_ids(elem): return elem['nodes'] def _get_way(id_, dct): if id_ in dct: ret = tuple(filter(lambda x: x in ways, dct[id_])) if ret: return ret[0] return None ids = _get_ids(cur_elem) while ways: #ids = list(y['ref'] for y in cur_elem._raw.find_all('nd', recursive=False)) node_ids.extend(ids) ways.remove(cur_elem) if node_ids[0] == node_ids[-1]: # full circle, append to Polygons in ret ret.append( Polygon( (x.center.x, x.center.y) for x in (self.__osm_obj[('node', y)] for y in node_ids) ) ) if ways: cur_elem = ways[0] node_ids = [] ids = _get_ids(cur_elem) else: # not full circle if ways: # check if there is something to work on last_id = node_ids[-1] first_id = node_ids[0] if _get_way(last_id, way_by_first_node): cur_elem = _get_way(last_id, way_by_first_node) ids = _get_ids(cur_elem) elif _get_way(last_id, way_by_last_node): cur_elem = _get_way(last_id, way_by_last_node) ids = list(reversed(_get_ids(cur_elem))) elif _get_way(first_id, way_by_first_node): cur_elem = _get_way(first_id, way_by_first_node) node_ids = list(reversed(node_ids)) ids = _get_ids(cur_elem) elif _get_way(first_id, way_by_last_node): cur_elem = _get_way(first_id, way_by_last_node) node_ids = list(reversed(node_ids)) ids = list(reversed(_get_ids(cur_elem))) else: raise ValueError else: # if ways raise ValueError # end while return ret
def clean(module_id, entry, fields, regulation): def get_first(title: str, entry=entry): tmp = [ detail for detail in entry["details"] if detail["title"] == title ] return tmp[0].get('details') if len(tmp) > 0 else None def get_abbr(title): # choose the best one of three abbreviations abbr1 = "".join(i for i in title if i.isupper() or i.isnumeric()) abbr2 = "".join(i[0] if len(i) > 0 else "" for i in title.strip().split(" ")) abbr3 = (get_first("Kürzel") or "").strip().replace(" ", "") abbrs = ([abbr3, abbr1, abbr2] if 1 < len(abbr3) < 6 else sorted( (i for i in (abbr1, abbr2)), key=lambda x: abs(3.4 - len(x)))) #print(abbrs) return abbrs[0] # module_id, title, abbr first_entry = list(entry['content'].values())[0] sort_title = first_entry['title'][10:] _, title = sort_title.split(" ", 1) if len(list(entry['content'].values())) > 1: title = get_first("Titel") or title orig_title = title module_id = module_id or get_first("TUCaN-Nummer") or "" title = utils.remove_bracketed_part(title) title = utils.remove_bracketed_part(title) title = utils.roman_to_latin_numbers(title) title = title.replace("Praktikum in der Lehre - ", "") abbr = get_abbr(title) # reorder details later_titles = { #"Unterrichtssprache", "Sprache", "Min. | Max. Teilnehmerzahl", "TUCaN-Nummer", "Kürzel", "Anzeige im Stundenplan", # "Titel", "Lehrveranstaltungsart", "Veranstaltungsart", "Turnus", "Startsemester", "SWS", "Semesterwochenstunden", "Diploma Supplement", "Modulausschlüsse", "Modulvoraussetzungen", "Studiengangsordnungen", "Verwendbarkeit", "Anrechenbar für", "Orga-Einheit", "Gebiet", "Fach", "Modulverantwortliche", # "Lehrende", "Dauer", "Anzahl Wahlkurse", "Notenverbesserung nach §25 (2)", "Wahlmöglichkeiten", "Credits", "Kurstermine", } early = [i for i in entry["details"] if i["title"] not in later_titles] late = [i for i in entry["details"] if i["title"] in later_titles] entry["details"] = (early + [{ "details": "<br><hr><b>Andere Angaben aus Tucan und Inferno</b><br>", "title": "" }] + late) for detail in entry["details"]: if detail["details"].strip() != "": detail["details"] += "<br>" if detail['title'] == "Studiengangsordnungen": regs = [(x.split("(", 1)) for x in sorted(detail['details'].replace( "<br>", "<br/>").split("<br/>")) if x.strip()] regs = utils.groupby(regs, key=lambda x: x[0]) regs = [(k, list(v)) for k, v in regs] # print(detail['details'].replace("<br>", "<br/>").split("<br/>")) # print([ k +"("+ ", ".join(i[:-1] for _,i in v) + ")" for k,v in regs]) detail['details'] = "<br/>".join( k + "(" + ", ".join(i[:-1] for _, i in sorted(v)) + ")" for k, v in regs) + "<br/>" # last name of owners owner = "; ".join( collections.OrderedDict( (x, 1) for entry in entry['content'].values() for x in (get_first("Lehrende", entry) or get_first("Modulverantwortlicher", entry) or "???" ).split("; ")).keys()) or "???" short_owner = "; ".join(i.split()[-1] for i in owner.split("; ")) # category isos = first_entry['title'].split(" ")[0].endswith("-os") category = fields.get(regulation, {}).get(module_id, ["", ""])[0] category = clean_category(category) if category == "C. Fachübergreifende Lehrveranstaltungen": category = "" category = ( "B. Oberseminare" if isos else # category == "B. Seminare" and entry["credits"] == 0 category or { "01": "C. Nebenfach FB 01 (Wirtschaft & Recht; Entrepeneurship)", "02": "C. Nebenfach FB 02 (Philosophie)", "03": "C. Nebenfach FB 03 (Humanw.; Sportw.)", "04": "C. Nebenfach FB 04 (Logik; Numerik; Optimierung; Stochastik)", "05": "C. Nebenfach FB 05 (Elektrow.; Physik)", "11": "C. Nebenfach FB 11 (Geow.)", "13": "C. Nebenfach FB 13 (Bauinformatik; Verkehr)", "16": "C. Nebenfach FB 16 (Fahrzeugtechnik)", "18": "C. Nebenfach FB 18 (Elektrotechnik)", "41": "C. Sprachkurse", }.get(module_id[:2], "0. Pflichtveranstaltungen")) if "B.Sc." in regulation: category = category.replace("Nebenfach", "Fachübergreifend") else: category = category.replace("Pflichtveranstaltung", "Nicht einsortiert") # dates pdt = lambda day: datetime.datetime.strptime(day, "%Y-%m-%d") fmtdt = lambda day: datetime.datetime.strftime(day, "%Y-%m-%d") shiftNweeks = lambda n, x: fmtdt(pdt(x) + datetime.timedelta(weeks=n)) dates = { i #+", "+ item['title'].split(" ", 1)[1] for item in entry['content'].values() for i in item.get('dates', []) } uedates = { i for item in entry['content'].values() for i in item.get('uedates', []) } uebung = "Übung " if len(uedates) != 1 else "Übungsstunde" # uedates = {"\t".join([shiftNweeks(i, y.split("\t",1)[0])] + y.split("\t")[1:3] + [uebung + y.split("\t")[3]]).replace(orig_title, "") uedates = { "\t".join([shiftNweeks(i, y.split("\t", 1)[0])] + y.split("\t")[1:3] + [uebung]) for y in uedates for i in range( int((pdt(y.split("\t")[4]) - pdt(y.split("\t")[0])).days / 7 + 1)) } dates = clean_dates(dates | uedates) # result result = utils.merge_dict(entry, dates) assert result['module_id'] == module_id del result['module_id'] result = utils.merge_dict( result, { "id": module_id, "title": title, "title_short": abbr, "owner": owner, "owner_short": short_owner, "credits": str(entry["credits"]).zfill(2), 'category': category, }) return result
def prepare_table(self, procedures): keyfun = lambda item: (item.size, item.loops, item.num_distribution) tmp = groupby(procedures, keyfun) data = {} for (size, loops, _), items in tmp.iteritems(): def get_time(procedure): for item in items: if item.procedure == procedure: return item.time raise KeyError("Procedure '%s' not found" % procedure) t0 = get_time("scalar") t1 = get_time("sse") t2 = get_time("sse-block") if t0 < 10 and t1 < 10 and t2 < 10: # don't fool people when all measurements are single-digit numbers continue speedup_sse = float(t0)/t1 speedup_sse_block = float(t0)/t2 key = (size, loops) if key not in data: data[key] = [[], []] data[key][0].append(speedup_sse) data[key][1].append(speedup_sse_block) t = Table() t.add_header([("input", 2), ("SSE speed-up", 3), ("SSE block speed-up", 3)]) t.add_header(["size [B]", "loops", "min", "avg", "max", "min", "avg", "max"]) def stats(numbers): s = sum(numbers) n = len(numbers) return min(numbers), s/n, max(numbers) for size, loops in sorted(data, key=lambda t: t[0]): key = size, loops sse = stats(data[key][0]) sse_block = stats(data[key][1]) t.add_row([ '{:,}'.format(size), '%d' % loops, '%0.2f' % sse[0], '%0.2f' % sse[1], '%0.2f' % sse[2], '%0.2f' % sse_block[0], '%0.2f' % sse_block[1], '%0.2f' % sse_block[2], ]) return t
def gen_data(gdef, prof_data, op_table, devices, inter=2810, intra=2810): edge_link = [], [] link_feats = [] device_feats = [[time_ratio / 10, memory / 10_000_000_000] for _, time_ratio, memory in devices] tasks = {} for i, (name, *_) in enumerate(devices): task = re.search("task:(\d+)/", name)[1] if task in tasks: for other in tasks[task]: edge_link[0].append(i) edge_link[1].append(other) edge_link[0].append(other) edge_link[1].append(i) link_feats.append([0, intra / 100_000, math.log(intra) / 10]) link_feats.append([0, intra / 100_000, math.log(intra) / 10]) tasks[task].append(i) else: tasks[task] = [i] for task, devs in tasks.items(): for dev in devs: for another_task, other_devs in tasks.items(): if another_task != task: for another_dev in other_devs: edge_link[0].append(dev) edge_link[1].append(another_dev) edge_link[0].append(another_dev) edge_link[1].append(dev) link_feats.append( [1, inter / 100_000, math.log(inter) / 10]) link_feats.append( [1, inter / 100_000, math.log(inter) / 10]) # bandwidth = [x for _, x, _ in link_feats] # tgroups = k_spanning_tree(g, bandwidth, 2) + k_spanning_tree(g, bandwidth, 4) + [[0]] + [list(range(len(devices)))] base_nccl_model = [ 0.043420241077615454, 368.2013618677043, 0.27766802543921265, 211.91926070037152 ] nccl_models = {} dgroups = groupby(devices, key=lambda x: re.search("task:(\d+)/", x[0])[1], value=lambda x: x[0]) for task, devs in dgroups.items(): nccl_models[','.join( sorted(devs))] = [x * 2810 / intra for x in base_nccl_model] for tasks in (t for i in range(2, len(dgroups) + 1) for t in itertools.combinations(dgroups.keys(), i)): devs = [ dgroups[t][0] for t in tasks ] # the first (alphabet order) device is the leader of the task nccl_models[','.join( sorted(devs))] = [x * 2810 / inter for x in base_nccl_model] op_types = [] for node in gdef.node: if node.op not in op_table: op_table[node.op] = len(op_table) op_types.append(op_table[node.op]) op_feats = [[ np.mean(prof_data[(node.name, nrep)]) / 10_000 for nrep in (1, 2, 4, 8) ] for node in gdef.node]
def plot_fits(plot, data, get_fit_range, badness_threshold, x_col, x_label, y_col, y_label, absdydx_label, title_cols, get_title, get_fn, color_cols, get_color, get_color_label, get_dmc, dmc_label, dmc_yerr_col=None): # continuous x range for plotting continuous functions x_c = np.linspace(data[x_col].min() - 1, data[x_col].max() + 1, 250) [(title_key, gg)] = utils.groupby(data, title_cols) fig, ax = plt.subplots(2) fig.set_size_inches(8, 10) # otherwise the text will get obscured y_range = np.array([np.nan, np.nan]) fit_results = {} for color_key, g in utils.groupby(gg, color_cols): logging.info(f"plot_fits: method: {color_key['method']}") color = get_color(color_key["method"]) label = get_color_label(color_key["method"]) g = g.sort_values([x_col]) d = g.rename(columns={x_col: "x", y_col: "y"}) deriv_d = differentiate(d, "x", "y", "dydx") x = deriv_d["x"] dydx = deriv_d["dydx"] ax[0].plot(x, abs(dydx), "x", label=label, color=color) ax[1].plot(d["x"], d["y"], "x", label=label, color=color) utils.update_range(y_range, d["y"]) fit_range = get_fit_range(color_key["method"]) fit_range = (max(fit_range[0], d["x"].min()), min(fit_range[1], d["x"].max())) if fit_range[1] < fit_range[0]: continue fit_range = fit_range + np.array([-0.2, 0.2]) # to make it look nicer ax[0].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color) ax[1].axvspan(fit_range[0], fit_range[1], alpha=0.05, color=color) d_subset = d[d["x"].between(*fit_range)] deriv_subset = deriv_d[deriv_d["x"].between(*fit_range)] if len(deriv_subset) < 2: continue fit = do_fit(d_subset, deriv_subset, badness_threshold=badness_threshold) if fit is None: continue fit_result = { "num_points": len(d["x"]) } fit_result.update(fit) fit_results[color_key["method"]] = fit_result outliers = deriv_subset.loc[fit["logderiv"]["outliers"]] ax[0].plot(outliers["x"], abs(outliers["dydx"]), "o", markerfacecolor="none", label="", color="red") for stage, result in fit.items(): if stage == "fixedab": continue # fixedab yields the same plot here as logderiv a = result["coefficient"] b = result["exponent"] b_err = result.get("exponent_err", None) dydx_c = a * b * x_c ** (b - 1.0) ax[0].plot( x_c, abs(dydx_c), linestyle=STAGE_TO_LINESTYLE[stage], label=label + " " + fit_label(stage, b, b_err), color=color) for stage, result in fit.items(): if "constant" not in result: continue a = result["coefficient"] b = result["exponent"] c = result["constant"] b_err = result.get("exponent_err", None) y_c = a * x_c ** b + c ax[1].plot( x_c, y_c, linestyle=STAGE_TO_LINESTYLE[stage], label=label + " " + fit_label(stage, b, b_err), color=color) if b < 0: ax[1].axhline(c, linestyle=":", color=color) else: logging.warn(f"plot_fits: {stage}.b >= 0: no asymptotic result") utils.update_range(y_range, c) g = get_dmc(**title_key) if len(g): y = g[y_col].iloc[0] if dmc_yerr_col is not None: y_err = g[dmc_yerr_col].iloc[0] ax[1].axhspan(y - y_err, y + y_err, alpha=0.4, color="black", label=dmc_label) utils.update_range(y_range, [y - y_err, y + y_err]) # add an extra line to make sure it's visible ax[1].axhline(y, alpha=0.4, color="black") utils.update_range(y_range, [y]) ax[0].set_xlabel(x_label) ax[0].set_ylabel(absdydx_label) ax[0].set_xscale("log") ax[0].set_yscale("log") ax[0].set_title(get_title(**title_key)) box = ax[0].get_position() ax[0].set_position([box.x0, box.y0, box.width * 0.6, box.height]) ax[0].legend(bbox_to_anchor=(1, 1.0)) ax[1].legend() ax[1].set_xlabel(x_label) ax[1].set_ylabel(y_label) ax[1].set_ylim(*utils.expand_range(y_range, 0.05)) box = ax[1].get_position() ax[1].set_position([box.x0, box.y0, box.width * 0.6, box.height]) ax[1].legend(bbox_to_anchor=(1, 1.0)) ax[1].get_xaxis().set_major_locator( matplotlib.ticker.MaxNLocator(integer=True)) if plot: fn = get_fn(**title_key) settings_fn = os.path.join("plot_settings", fn + ".json") settings = utils.load_json(settings_fn) or {"ax1": {}, "ax2": {}} fit_results_fn = os.path.join("fit_results", fn + ".json") def save_settings(): utils.save_json(settings_fn, settings) utils.sync_axes_lims(ax[0], settings["ax1"], save_settings) utils.sync_axes_lims(ax[1], settings["ax2"], save_settings) utils.savefig(fig, fn) return fit_results
def gurobi(wanted_parts, available_parts, stores, shipping_cost=10.0): from gurobipy import Model, GRB, LinExpr kf1 = lambda x: (x['item_id'], x['wanted_color_id']) kf2 = lambda x: (x['ItemID'], x['ColorID']) available_by_store = utils.groupby(available_parts, lambda x: x['store_id']) store_by_id = dict( (s['store_id'], s) for s in stores ) m = Model() store_variables = {} # store id to variable indicating store is used quantity_variables = [] # list of all lot variables + metadata # for every store for (store_id, inventory) in available_by_store.iteritems(): # a variable for if anything was bought from this store. if 1, then pay # shipping cost and all store inventory is available; if 0, then don't pay # for shipping and every lot in it has 0 quantity available store_variables[store_id] = m.addVar(0.0, 1.0, shipping_cost, GRB.BINARY, "use-store=%s" % (store_id,)) for lot in inventory: store_id = lot['store_id'] quantity = lot['quantity_available'] unit_cost= lot['cost_per_unit'] item_id = lot['item_id'] color_id = lot['color_id'] # a variable for how much to buy of this lot v = m.addVar(0.0, quantity, unit_cost, GRB.CONTINUOUS, "quantity-store=%s-item=%s-color=%s" % (store_id, item_id, color_id)) # keep a list of all lots quantity_variables.append({ 'store_id': store_id, 'item_id': lot['item_id'], 'wanted_color_id': lot['wanted_color_id'], 'color_id': lot['color_id'], 'variable': v, 'quantity_available': quantity, 'cost_per_unit': unit_cost }) # actually put the variables into the model m.update() # for every lot in every store for lot in quantity_variables: use_store = store_variables[lot['store_id']] quantity = lot['quantity_available'] unit_cost = lot['cost_per_unit'] v = lot['variable'] # a constraint for how much can be bought m.addConstr(LinExpr([1.0, -1 * quantity], [v, use_store]), GRB.LESS_EQUAL, 0.0, "maxquantity-store=%s-item=%s-color-%d" % (lot['store_id'], lot['item_id'], lot['color_id'])) # for every wanted lot variables_by_id = utils.groupby(quantity_variables, kf1) for lot in wanted_parts: # a constraint saying amount bought >= wanted amount variables = map(lambda x: x['variable'], variables_by_id[kf2(lot)]) constants = len(variables) * [1.0] m.addConstr(LinExpr(constants, variables), GRB.GREATER_EQUAL, lot['Qty'], "wantedamount-item=%s-color=%s" % (lot['ItemID'], lot['ColorID'])) # for every store variables_by_store = utils.groupby(quantity_variables, lambda x: x['store_id']) for (store_id, variables) in variables_by_store.iteritems(): use_store = store_variables[store_id] minimum_purchase = store_by_id[store_id]['minimum_buy'] # a constraint saying "if I purchased from this store, I bought the minimum amount or more" constants = [v['cost_per_unit'] for v in variables] + [-1 * minimum_purchase] variables = [v['variable'] for v in variables] + [use_store] m.addConstr(LinExpr(constants, variables), GRB.GREATER_EQUAL, 0.0, "minbuy-store=%d" % (store_id,)) # minimize sum of costs of items bought + shipping costs m.setParam(GRB.param.MIPGap, 0.01) # stop when duality gap <= 1% m.optimize() # get results if m.ObjVal < float('inf'): result = [] for lot in quantity_variables: # get variable out v = lot['variable'] del lot['variable'] # lot variables are continuous, so they might not actually be integral. # If they're not, check that they're "almost" integral, so we can just # round. Otherwise, print this warning. According to theory the optimal # solution is for all continuous variables to be integral. if v.X != int(v.X) and abs(v.X - round(v.X)) > 1e-3: print 'Uh oh. Variable %s has value %f. This is a little close for comfort.' % (v.VarName, v.X) # save quantity to buy if it's > 0 lot['quantity'] = int(round(v.X)) if lot['quantity'] > 0: result.append(lot) cost = sum(e['quantity'] * e['cost_per_unit'] for e in result) store_ids = list(set(e['store_id'] for e in result)) return [{ 'cost': cost, 'allocation': result, 'store_ids': store_ids }] else: print 'No solution :(' return []
def greedy(wanted_parts, price_guide): """Greedy Set-Cover algorithm to minimize number of stores purchased from. Disregards prices in decisions.""" result = [] available_parts = utils.groupby(price_guide, lambda x: x['store_id']) available_parts = copy.deepcopy(available_parts) wanted_parts = copy.deepcopy(wanted_parts) wanted_by_item = utils.groupby(wanted_parts, lambda x: (x['ItemID'], x['ColorID'])) # while we don't have all the parts we need while len(wanted_parts) > 0 and len(available_parts) > 0: # calculate how many parts each vendor can cover def coverage(inventory): kf = lambda x: (x['item_id'], x['wanted_color_id']) # only worry about items wanted wanted = filter(lambda x: kf(x) in wanted_by_item, inventory) # count up how much there is of each (item_id, color_id) pair wanted = utils.groupby(wanted, kf) wanted = map(lambda x: (x[0], sum(e['quantity_available'] for e in x[1])), wanted.iteritems()) # count how much of each item I'd buy tot = 0 for (k, v) in wanted: if k in wanted_by_item: tot += min(wanted_by_item[k][0]['Qty'], v) return tot coverages = [(k, v, coverage(v)) for (k, v) in available_parts.iteritems()] coverages = list(sorted(coverages, key=lambda x: x[2])) # use the store that has the most inventory next_store, inventory, n_parts = coverages.pop() #print 'You can buy %d items from %s' % (n_parts, next_store) if n_parts == 0: break # update the quantities in the wanted parts list by_item = utils.groupby(inventory, lambda x: (x['item_id'], x['wanted_color_id'])) new_wanted_parts = [] for item in wanted_parts: # get all lots from next_store matching item item_id = item['ItemID'] color_id = item['ColorID'] wanted_qty = item['Qty'] available = by_item.get((item_id, color_id), []) available = list(sorted(available, key=lambda x: -1 * x['cost_per_unit'])) # keep buying up lots until the wanted_qty is full or the store is bought # out while wanted_qty > 0 and len(available) > 0: next = available.pop() amount_to_buy = min(next['quantity_available'], wanted_qty) result.append({ 'store_id': next['store_id'], 'item_id': item_id, 'wanted_color_id': next['wanted_color_id'], 'color_id': next['color_id'], 'quantity_available': next['quantity_available'], 'cost_per_unit': next['cost_per_unit'], 'quantity': amount_to_buy, }) wanted_qty -= amount_to_buy # this store couldn't fill out our order if wanted_qty > 0: item['Qty'] = wanted_qty new_wanted_parts.append(item) # update wanted parts list, remove store from inventory wanted_parts = new_wanted_parts wanted_by_item = utils.groupby(wanted_parts, lambda x: (x['ItemID'], x['ColorID'])) del available_parts[next_store] #print 'Wanted parts left: %d' % sum(e['Qty'] for e in wanted_parts) if len(wanted_parts) > 0: print 'WARNING: there wasn\'t enough availability to buy the following items:' print ", ".join(e['ItemName'] for e in wanted_parts) cost = sum(e['quantity'] * e['cost_per_unit'] for e in result) store_ids = list(set(e['store_id'] for e in result)) return [{ 'cost': cost, 'allocation': result, 'store_ids': store_ids }]