def get_contributors(project): """ get the list of contributors and the tasks they worked on """ # filter on tasks with state DONE filter = and_( TaskState.project_id == project.id, TaskState.state == TaskState.state_done ) tasks = DBSession.query(TaskState.task_id, User.username) \ .join(TaskState.user) \ .filter(filter) \ .order_by(TaskState.user_id) \ .all() contributors = {} for user, tasks in itertools.groupby(tasks, key=lambda t: t.username): if user not in contributors: contributors[user] = {} contributors[user]['done'] = [task[0] for task in tasks] assigned = DBSession.query(Task.id, User.username) \ .join(Task.assigned_to) \ .filter( Task.project_id == project.id, Task.assigned_to_id != None # noqa ) \ .order_by(Task.assigned_to_id) for user, tasks in itertools.groupby(assigned, key=lambda t: t.username): if user not in contributors: contributors[user] = {} contributors[user]['assigned'] = [task[0] for task in tasks] return contributors
def _groupby_x_axis_and_vqs(self): """Returns a list of list of lists where each list has the term and option dict with the same xAxis and within each list with same xAxis, all items in same sub-list have items with same ValueQuerySet. Here is an example of what this function would return. :: [ [[(term-1-A-1, opts-1-A-1), (term-1-A-2, opts-1-A-2), ...], [(term-1-B-1, opts-1-B-1), (term-1-B-2, opts-1-B-2), ...], ...], [[term-2-A-1, opts-2-A-1), (term-2-A-2, opts-2-A-2), ...], [term-2-B-2, opts-2-B-2), (term-2-B-2, opts-2-B-2), ...], ...], ... ] In the above example, - term-1-*-* all have same xAxis. - term-*-A-* all are from same ValueQuerySet (table) """ dss = self.datasource.series x_axis_vqs_groups = defaultdict(dict) sort_fn = lambda tk_td4: tk_td4[1].get('xAxis', 0) so = sorted(list(self.series_options.items()), key=sort_fn) x_axis_groups = groupby(so, sort_fn) for (x_axis, itr1) in x_axis_groups: sort_fn = lambda tk_td: dss[tk_td[1]['_x_axis_term']]['_data'] itr1 = sorted(itr1, key=sort_fn) for _vqs_num, (_data, itr2) in enumerate(groupby(itr1, sort_fn)): x_axis_vqs_groups[x_axis][_vqs_num] = _x_vqs = {} for tk, td in itr2: _x_vqs.setdefault(td['_x_axis_term'], []).append(tk) return x_axis_vqs_groups
def finalScores(gene, firstGroup, secondGroup, patientCountFirst, patientCountSecond): #bu iki kisim, ayni gendeki birden fazla mutasyonu tek mutasyon olarak saymak icin firstGroupFiltered = [] secondGroupFiltered = [] for variantList in firstGroup: variantsInOneFile = [] for variant in variantList: variantsInOnePatient = [] if variant[1] == gene: variantsInOneFile.append((variant[0], variant[-1]*variant[-2]*variant[-3])) firstGroupFiltered.append(variantsInOneFile) for variantList in secondGroup: variantsInOneFile = [] for variant in variantList: variantsInOnePatient = [] if variant[1] == gene: variantsInOneFile.append((variant[0], variant[-1]*variant[-2]*variant[-3])) secondGroupFiltered.append(variantsInOneFile) firstCounter = 0 for variantList in firstGroupFiltered: for key, group in itertools.groupby(variantList, lambda x: x[0]): firstCounter += max([i[1] for i in group]) secondCounter = 0 for variantList in secondGroupFiltered: for key, group in itertools.groupby(variantList, lambda x: x[0]): secondCounter += max([i[1] for i in group]) return (firstCounter/patientCountFirst - secondCounter/patientCountSecond)
def get_eligible_certificates(exclude=None): """ Finds all certificates that are eligible for certificate expiration. :param exclude: :return: """ certificates = defaultdict(dict) certs = get_certificates(exclude=exclude) # group by owner for owner, items in groupby(certs, lambda x: x.owner): notification_groups = [] for certificate in items: notifications = needs_notification(certificate) if notifications: for notification in notifications: notification_groups.append((notification, certificate)) # group by notification for notification, items in groupby(notification_groups, lambda x: x[0].label): certificates[owner][notification] = list(items) return certificates
def filterRepeatPair(matchPair): newMatchPair = [] # Format : [[3, 5, 2486, 2532, 2486, 'Read48_d'] ] inNoList = [] outNoList = [] matchPair.sort(key = itemgetter(0)) for key, items in groupby(matchPair, itemgetter(0)): ct = 0 anotherSideList = [] for eachitem in items: ct = ct +1 anotherSideList.append(eachitem[1]) if len(set(anotherSideList)) > 1 : inNoList.append(key) matchPair.sort(key= itemgetter(1)) for key, items in groupby(matchPair, itemgetter(1)): ct = 0 anotherSideList = [] for eachitem in items: ct = ct +1 anotherSideList.append(eachitem[0]) if len(set(anotherSideList)) > 1 : outNoList.append(key) for eachitem in matchPair: if not eachitem[0] in inNoList and not eachitem[1] in outNoList: newMatchPair.append(eachitem) return newMatchPair
def personal_view(self, request, week): items = m.DishOrderDayItem.objects\ .filter(order__week=week, count__gt=0)\ .select_related(depth=4)\ .order_by( 'order__user__profile__office__id', 'order__user__first_name', 'order__user__pk', 'dish_day__day__pk', 'dish_day__dish__pk', ) offices = [] items = list(items) self.fix_profiles([i.order for i in items]) for office, usersseq in groupby(list(items), lambda i: i.order.user.profile.office): users = [] for user, seq in groupby(usersseq, lambda i: i.order.user): seq = list(seq) users.append(( user, group_by_materialize(groupby(seq, lambda i: i.dish_day.day)), )) offices.append(( office, users )) return direct_to_template(request, 'dinner/report_personal.html', { 'week': week, 'offices': offices, })
def GetGoAnnotation(seqids): db = pymysql.connect(host = "mysql-amigo.ebi.ac.uk", user = "******", passwd = "amigo", db = "go_latest", port = 4085) cur = db.cursor() cur.execute( """ SELECT term.name, term.acc, term.term_type FROM gene_product INNER JOIN dbxref ON (gene_product.dbxref_id=dbxref.id) INNER JOIN species ON (gene_product.species_id=species.id) INNER JOIN association ON (gene_product.id=association.gene_product_id) INNER JOIN evidence ON (association.id=evidence.association_id) INNER JOIN term ON (association.term_id=term.id) WHERE dbxref.xref_key = %s; """, seqids) List = list() GO = list() f= cur.fetchall() for i in f: List.append(i[0] + ":" + i[2]) GO.append(i[1]) List = list(map(itemgetter(0), groupby(List))) GO = list(map(itemgetter(0), groupby(GO))) db.close() return[seqids,List,GO]
def mapper(): for user,data in itertools.groupby(get_data(sys.stdin,timecol = 1),operator.itemgetter(0)): '''for each user''' # print '--------',user for day,group in itertools.groupby(data,lambda k:datetime.fromtimestamp(long(k[1])).strftime('%Y%m%d')): # print day process_line(user,group)
def record(self, records, otsv, rtsv, blocking_validation): def records_by_chunk(batch_size, records): offset = 0 while len(records[offset:]) > 0: yield records[offset:offset+batch_size] offset += batch_size def get_data_collection(label, action): if label in self.preloaded_data_collections: return self.preloaded_data_collections[label] else: dc_conf = {'label' : label, 'action': action} return self.kb.factory.create(self.kb.DataCollection, dc_conf) if len(records) == 0: msg = 'No records are going to be imported' self.logger.critical(msg) raise core.ImporterValidationError(msg) study = self.find_study(records) self.data_sample_klass = self.find_data_sample_klass(records) self.preload_data_samples() self.preload_data_collections() asetup = self.get_action_setup('importer.data_collection-%f' % time.time(), json.dumps(self.action_setup_conf)) device = self.get_device('importer-%s.data_collection' % version, 'CRS4', 'IMPORT', version) conf = { 'setup': asetup, 'device': device, 'actionCategory': self.kb.ActionCategory.PROCESSING, 'operator': self.operator, 'context': study, } action = self.kb.factory.create(self.kb.Action, conf).save() def keyfunc(r): return r['label'] sub_records = [] data_collections = {} records = sorted(records, key=keyfunc) for k, g in it.groupby(records, keyfunc): data_collections[k] = get_data_collection(k, action) good_records, bad_records = self.do_consistency_checks(data_collections[k], list(g)) sub_records.append(good_records) for br in bad_records: rtsv.writerow(br) if blocking_validation and len(bad_records) >= 1: self.kb.delete(action) raise core.ImporterValidationError('%d invalid records' % len(bad_records)) records = sum(sub_records, []) if len(records) == 0: self.kb.delete(action) msg = 'No records are going to be imported' self.logger.warning(msg) sys.exit(0) records = sorted(records, key=keyfunc) for k, g in it.groupby(records, keyfunc): dc = data_collections[k] if not dc.is_mapped(): dc.save() for i, c in enumerate(records_by_chunk(self.batch_size, list(g))): self.logger.info('start processing chunk %s-%d' % (k, i)) self.process_chunk(otsv, study, dc, c) self.logger.info('done processing chunk %s-%d' % (k,i))
def dispatch_request(self): template = "university/index.html" if request.headers.get("X-Pjax", None): template = "university/_charts.html" marks = ( Mark.query.join(Student, Group) .filter(Group.year == Group.current_year()) .with_entities(Mark.value, Group.id.label("group_id"), Student.id.label("student_id"), Student.sex) .order_by("group_id", "value", "sex") ) marks = { groupd_id: { "marks": { v: { "marks_count": len(marks), "marks_summ": sum(i.value if i.value <= Mark.MARK_INCREDIBLE else 1 for i in marks), } for v, marks in {v: list(marks) for v, marks in groupby(items, lambda x: x.value)}.items() if Mark.MARK_ABSENT <= v <= Mark.MARK_INCREDIBLE or v == Mark.MARK_SHINING }, "marks_count": len(items), "marks_count_positive": len(list(i for i in items if Mark.MARK_BASE < i.value <= Mark.MARK_INCREDIBLE)), "marks_summ": sum(i.value for i in items), "marks_summ_positive": sum(i.value for i in items if Mark.MARK_BASE < i.value <= Mark.MARK_INCREDIBLE), } for groupd_id, items in {id: list(items) for id, items in groupby(marks, lambda x: x.group_id)}.items() } for group_id, info in marks.items(): info["max_summ"] = max(i["marks_summ"] for i in info["marks"].values()) return render_template(template, **{"marks": marks})
def resultAbs(x6): max = sorted(x6,key=operator.itemgetter(1)) max = [list(group) for key,group in itertools.groupby(max,operator.itemgetter(1))][-1][0] min = sorted(x6,key=operator.itemgetter(1)) min = [list(group) for key,group in itertools.groupby(min,operator.itemgetter(1))][0][0] print(max[0],"%.2f" % max[1],sep=",") print(min[0],"%.2f" % min[1],sep=",")
def fasta_iter(fasta_name): ''' given a fasta file. yield tuples of header, sequence modified from Brent Pedersen Correct Way To Parse A Fasta File In Python https://www.biostars.org/p/710/ ''' if((fasta_name[-3:] == '.gz') or (fasta_name[-5:] == '.gzip')): with gzip.open(fasta_name, 'rb') as f: data = (x[1] for x in groupby(f, lambda line: line.decode('utf-8')[0] == ">")) for header in data: header = header.__next__().decode('utf-8')[1:].strip() seq = "".join(s.decode('utf-8').strip() for s in data.__next__()) yield(header, seq) else: with open(fasta_name) as f: # ditch the boolean (x[0]) and just keep the header or sequence since # we know they alternate. data = (x[1] for x in groupby(f, lambda line: line[0] == ">")) for header in data: # drop the ">" header = header.__next__()[1:].strip() # join all sequence lines to one. seq = "".join(s.strip() for s in data.__next__()) yield(header, seq)
def get_exclusions(request, naics_code, link_page=None, all_langs=False): with request.connmgr.get_connection() as conn: cursor = conn.execute('EXEC dbo.sp_NAICS_Exclusion_l ?,?', str(naics_code), all_langs) exclusions = cursor.fetchall() cursor.nextset() uses = cursor.fetchall() cursor.close() uses = dict((k, list(v)) for k,v in groupby(uses, attrgetter('Exclusion_ID'))) output = [] for establishment, exclusions in groupby(exclusions, attrgetter('Establishment')): if establishment: output.extend([Markup('<p>'), _('Establishments primarily engaged in:', request), Markup('</p>')]) output.append(Markup('<ul>')) for exclusion in exclusions: use_instead = "; ".join(link_code(request, x.Code, x.Code, link_page) + ' ' + escape(x.Classification) for x in (uses.get(exclusion.Exclusion_ID) or [])) if use_instead: use_instead = use_instead.join([" (", ")"]) output.extend([Markup('<li>'), escape(exclusion.Description), use_instead,Markup('</li>')]) output.append(Markup('</ul>')) return Markup(''.join(output))
def main(): for fingerprint, fgroup in groupby(read_input(sys.stdin), itemgetter(1)): names = [] for name, ngroup in groupby(fgroup, itemgetter(2)): names.append(name.strip()) if len(names) > 1: print '%s\t%s' % (fingerprint.strip(), names)
def do_controller_specific_work(self): if self.user is None: raise Authorization_Exception("You must be logged in to list your targets.") targets = self.user.targets events = sorted(map(lambda target: target.event, targets), key = lambda event: event.section_id) targets_by_event_id = {} for target in targets: targets_by_event_id[target.event_id] = target.target_id events_by_section_id = dict((k, list(g)) for k, g in groupby(events, lambda event: event.section_id)) db_session = DB_Session_Factory.get_db_session() sections = db_session.query(Section).filter(Section.section_id.in_(events_by_section_id.keys())).order_by(Section.course_id).all() sections_by_course_id = dict((k, list(g)) for k, g in groupby(sections, lambda section: section.course_id)) courses = db_session.query(Course).filter(Course.course_id.in_(sections_by_course_id.keys())).all() response = [] for course in courses: course_dict = course.for_api() sections_list = [] for section in sections_by_course_id.get(course.course_id): section_dict = section.for_api() events = [] for event_in_this_section in events_by_section_id[section.section_id]: event_dict = event_in_this_section.for_api() target_id_number = targets_by_event_id.get(event_in_this_section.event_id, None) if target_id_number is not None: event_dict['target_id'] = str(target_id_number) events.append(event_dict) section_dict['events'] = events sections_list.append(section_dict) course_dict['course_sections'] = sections_list response.append(course_dict) return HTTP_Response('200 OK', {'targets' : response})
def grouped_totals(entries): select = {"day": {"date": """DATE_TRUNC('day', end_time)"""}, "week": {"date": """DATE_TRUNC('week', end_time)"""}} weekly = entries.extra(select=select["week"]).values('date', 'billable') weekly = weekly.annotate(hours=Sum('hours')).order_by('date') daily = entries.extra(select=select["day"]).values('date', 'project__name', 'billable') daily = daily.annotate(hours=Sum('hours')).order_by('date', 'project__name') weeks = {} for week, week_entries in groupby(weekly, lambda x: x['date']): try: if timezone.is_naive(week): week = timezone.make_aware(week, timezone.get_current_timezone()) except AttributeError: week = datetime.datetime.combine(week, timezone.get_current_timezone()) weeks[week] = get_hours(week_entries) days = [] last_week = None for day, day_entries in groupby(daily, lambda x: x['date']): week = get_week_start(day) if last_week and week > last_week: yield last_week, weeks.get(last_week, {}), days days = [] days.append((day, daily_summary(day_entries))) last_week = week yield week, weeks.get(week, {}), days
def report(request): variation_list = ProductVariation.objects.order_by( "product__vendor", "product__position", "product__title" ).select_related("product__vendor") total_price = 0 total_count = 0 report = [] for vendor, vrs in groupby(variation_list, lambda x: x.product.vendor): vendor_vars = list(vrs) prod_details = [] vendor_price = 0 vendor_count = 0 for prod, vrs in groupby(vendor_vars, lambda x: x.product): cur_vars = list(vrs) stock_price = 0 stock_count = 0 for vr in cur_vars: if (vr.num_in_stock > 0) and (vr.unit_price): stock_price += vr.unit_price * vr.num_in_stock stock_count += vr.num_in_stock vrs_sum = (stock_count, stock_price) vendor_price += stock_price vendor_count += stock_count prod_details.append({"product": prod, "variations": cur_vars, "summary": vrs_sum}) vendor_sum = (vendor_count, vendor_price) total_price += vendor_price total_count += vendor_count report.append({"vendor": vendor, "prod_details": prod_details, "summary": vendor_sum}) total_sum = (total_count, total_price) context = {"report": report, "variation_list": variation_list, "total_sum": total_sum} return direct_to_template(request, "admin/shop/report.html", context)
def collapse_cell_addresses(cells, input_ranges=()): """ Collapse a collection of cell co-ordinates down into an optimal range or collection of ranges. E.g. Cells A1, A2, A3, B1, B2 and B3 should have the data-validation object applied, attempt to collapse down to a single range, A1:B3. Currently only collapsing contiguous vertical ranges (i.e. above example results in A1:A3 B1:B3). More work to come. """ keyfunc = lambda x: x[0] # Get the raw coordinates for each cell given raw_coords = [coordinate_from_string(cell) for cell in cells] # Group up as {column: [list of rows]} grouped_coords = OrderedDict((k, [c[1] for c in g]) for k, g in groupby(sorted(raw_coords, key=keyfunc), keyfunc)) ranges = list(input_ranges) # For each column, find contiguous ranges of rows for column in grouped_coords: rows = sorted(grouped_coords[column]) grouped_rows = [[r[1] for r in list(g)] for k, g in groupby(enumerate(rows), lambda x: x[0] - x[1])] for rows in grouped_rows: if len(rows) == 0: pass elif len(rows) == 1: ranges.append("%s%d" % (column, rows[0])) else: ranges.append("%s%d:%s%d" % (column, rows[0], column, rows[-1])) return " ".join(ranges)
def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) queryset = Account.objects.filter(owner_id=self.request.user.pk) queryset = queryset.aggregate(total=Sum('amount')) amount = total_amount = queryset['total'] transactions = Transaction.objects.filter( account__owner_id=self.request.user.pk).order_by('-date') by_weeks_keys = [] by_weeks_values = [] for k, v in groupby(transactions, _week_keyfunc): by_weeks_keys.append(k.split('-')[1]) by_weeks_values.append(str(amount)) amount -= sum(x.amount for x in v) by_months_keys = [] by_months_values = [] for k, v in groupby(transactions, lambda x: format(x.date, "%Y-%m")): by_months_keys.append(k.split('-')[1]) by_months_values.append(str(sum(x.amount for x in v))) graph_weekly = { 'keys': '[{}]'.format(','.join(by_weeks_keys[::-1])), 'values': '[{}]'.format(','.join(by_weeks_values[::-1])), } graph_monthly = { 'keys': '[{}]'.format(','.join(by_months_keys[::-1])), 'values': '[{}]'.format(','.join(by_months_values[::-1])), } context['graph_weekly'] = graph_weekly context['graph_monthly'] = graph_monthly return context
def blastall_v_regions(myFastq1,myFastq2,myRef,outputfile,eVal,blastallDir): fns={} chunk=10**4 with open(myFastq1, 'r') as datafile1: groups = groupby(datafile1, key=lambda k, line=count(): next(line) // chunk) for k, group in groups: with tempfile.NamedTemporaryFile(delete=False, dir=tempfile.mkdtemp(),prefix='{}_'.format(str(k))) as outfile: outfile.write(''.join(group)) fns[k]=outfile.name blastn_cline = blastallDir+"blastall -p blastn -o "+str(outfile.name)+".blast.out -i "+str(outfile.name)+" -d "+myRef+" -e "+str(eVal)+" -m 8 -b 1" os.system(blastn_cline+" > /dev/null 2>&1") os.system("cat "+str(outfile.name)+".blast.out >> "+outputfile) os.remove(str(outfile.name)+".blast.out") os.remove(str(outfile.name)) testvar=commands.getstatusoutput("dirname "+str(outfile.name)) os.system("rm -r "+testvar[1]) fns={} with open(myFastq2, 'r') as datafile2: groups = groupby(datafile2, key=lambda k, line=count(): next(line) // chunk) for k, group in groups: with tempfile.NamedTemporaryFile(delete=False, dir=tempfile.mkdtemp(),prefix='{}_'.format(str(k))) as outfile: outfile.write(''.join(group)) fns[k]=outfile.name blastn_cline = blastallDir+"blastall -p blastn -o "+str(outfile.name)+".blast.out -i "+str(outfile.name)+" -d "+myRef+" -e "+str(eVal)+" -m 8 -b 1" os.system(blastn_cline+" > /dev/null 2>&1") os.system("cat "+str(outfile.name)+".blast.out >> "+outputfile) os.remove(str(outfile.name)+".blast.out") os.remove(str(outfile.name)) testvar=commands.getstatusoutput("dirname "+str(outfile.name)) os.system("rm -r "+testvar[1])
def invoice(cls, commissions): pool = Pool() Invoice = pool.get('account.invoice') key = lambda c: c._group_to_invoice_key() commissions.sort(key=key) invoices = [] to_write = [] for key, commissions in groupby(commissions, key=key): commissions = list(commissions) key = dict(key) invoice = cls._get_invoice(key) invoice.save() invoices.append(invoice) key = lambda c: c._group_to_invoice_line_key() commissions.sort(key=key) for key, commissions in groupby(commissions, key=key): commissions = [c for c in commissions if not c.invoice_line] key = dict(key) invoice_line = cls._get_invoice_line(key, invoice, commissions) invoice_line.save() to_write.extend([commissions, { 'invoice_line': invoice_line.id, }]) if to_write: cls.write(*to_write) Invoice.update_taxes(invoices)
def count_concepts(concepts): # count concepts by location and time counts = [] if settings.DATE_GROUP_TYPE == 'year': entries = [(concept[0], c[0], c[1].split('-')[0]) for concept in concepts for c in concept[-1]] elif settings.DATE_GROUP_TYPE == 'month': entries = [(concept[0], c[0], c[1].rsplit('-', 1)[0]) for concept in concepts for c in concept[-1]] # group by location entries.sort(key=lambda x: x[1]) for loc, group in itertools.groupby(entries, lambda x: x[1]): concepts = list([(g[0], g[2]) for g in group]) # group by concepts concepts.sort(key=lambda x: x[0]) concept_counts = [] for concept, group2 in itertools.groupby(concepts, lambda x: x[0]): # Group by dates dates = [g[1] for g in group2] dates.sort() date_counts = [] for date, group3 in itertools.groupby(dates): date_counts.append((date, len(list(group3)))) date_counts.sort(key=lambda x: x[0]) concept_counts.append((concept, date_counts)) counts.append((loc, dict(concept_counts))) return dict(counts)
def get_events(self, course_set=False): # Create a dictionary of months in the semester that contains # defaultdicts of lists start = datetime.datetime.combine(self.start, datetime.time(0, 0)) end = datetime.datetime.combine(self.end, datetime.time(0, 0)) occurrences = [] if not course_set: course_set = self.course_set.all() # Gather all the occurences for course in course_set: for event in course.schedule.all(): occurrences.append( [ (single_occurence, event) for single_occurence in event.recurrences.occurrences(dtstart=start, dtend=end) ] ) months = dict([(month, list(events)) for month, events in groupby(chain(*occurrences), lambda a: a[0].month)]) for month, e in months.items(): months[month] = dict([(day, list(events)) for day, events in groupby(e, lambda a: a[0].day)]) return months
def read_groups(aligns,platform): platform = platform.lower() if platform=='454': get_read_start = read_start_454 elif platform in ('','neutral'): get_read_start = read_start_generic else: raise ValueError('Unknown platform specified: %s' % platform) for tid,contig_aligns in groupby(aligns,attrgetter('tid')): if tid==-1: continue fwd = [] rev = [] for align in contig_aligns: read_start = get_read_start(align) if not align.is_reverse: fwd.append( (read_start,align) ) else: rev.append( (read_start,align) ) fwd.sort() rev.sort() for pos,group in groupby(fwd,itemgetter(0)): yield [ align for pos,align in group ] for pos,group in groupby(rev,itemgetter(0)): yield [ align for pos,align in group ]
def get_context_data(self, **kwargs): context = super(IllinoisPostListView, self).get_context_data(**kwargs) context['all_posts'] = [] memberships = \ MembershipExtra.objects.order_by('election__name')\ .select_related('base')\ .select_related('election')\ .prefetch_related('base__on_behalf_of')\ .prefetch_related('base__person')\ .prefetch_related('base__post')\ .prefetch_related('base__post__extra') mem_grouper = lambda x: x.election post_grouper = lambda x: x.base.post.extra.slug for election, memberships in itertools.groupby(memberships, key=mem_grouper): person_posts = OrderedDict() memberships = sorted(memberships, key=post_grouper) for post_slug, post_group in itertools.groupby(memberships, key=post_grouper): post_group = list(post_group) post = post_group[0].base.post person_posts[post] = [] for membership in post_group: person_posts[post].append([membership.base.person, membership.base.on_behalf_of]) context['all_posts'].append((election, person_posts)) return context
def run(self, edit, within_lines=True): [*old_selections] = selections = self.view.sel() selections.clear() def selection_to_line(selection): return self.view.rowcol(selection.begin())[0] # Default, no grouping grouper = (lambda _: True) # Group by lines def line_grouper(selection): return self.view.rowcol(selection.begin())[0] if within_lines: # Is there no line with multiple selections? for category, group in groupby(old_selections, key=line_grouper): if sum(1 for _ in group) > 1: grouper = line_grouper for category, group in groupby(old_selections, key=grouper): first = next(group) for last in chain([first], group): pass selections.add(sublime.Region(first.begin(), last.end()))
def report_bad_cycles(quality_csv, bad_cycles_csv, bad_tiles_csv=None): reader = csv.DictReader(quality_csv) writer = csv.DictWriter(bad_cycles_csv, ['tile', 'cycle', 'errorrate'], lineterminator=os.linesep) writer.writeheader() if bad_tiles_csv is None: tile_writer = None else: tile_writer = csv.DictWriter(bad_tiles_csv, ['tile', 'bad_cycles'], lineterminator=os.linesep) tile_writer.writeheader() for tile, tile_cycles in itertools.groupby(reader, itemgetter('tile')): bad_cycle_count = 0 for _direction, cycles in itertools.groupby(tile_cycles, direction_grouper): is_bad = False for cycle in cycles: errorrate = cycle['errorrate'] is_bad = (is_bad or errorrate is None or errorrate == '' or float(errorrate) >= BAD_ERROR_RATE) if is_bad: writer.writerow(cycle) bad_cycle_count += 1 if tile_writer is not None: tile_writer.writerow(dict(tile=tile, bad_cycles=bad_cycle_count))
def combined_releases(releaser): credits = releaser.credits().select_related('nick', 'production__default_screenshot')\ .prefetch_related('production__author_nicks__releaser', 'production__author_affiliation_nicks__releaser', 'production__platforms', 'production__types')\ .defer('production__notes', 'production__author_nicks__releaser__notes', 'production__author_affiliation_nicks__releaser__notes')\ .order_by('-production__release_date_date', 'production__title', 'production__id', 'nick__name', 'nick__id') # reorganise credits queryset into a list of # (production, [ (nick, [credits_for_that_nick]) ]) records credits_by_production = groupby(credits, lambda credit: credit.production) # credits_by_production = list of (production, [credits]) records credits_by_production_nick = [] for (production, credits) in credits_by_production: for (nick, credits) in groupby(credits, lambda credit: credit.nick): record = (production, nick, list(credits)) credits_by_production_nick.append(record) production_ids = [production.id for production, credits, nick in credits_by_production_nick] productions = releaser.productions().select_related('default_screenshot')\ .exclude(id__in=production_ids)\ .prefetch_related('author_nicks__releaser', 'author_affiliation_nicks__releaser', 'platforms', 'types')\ .defer('notes', 'author_nicks__releaser__notes', 'author_affiliation_nicks__releaser__notes')\ .order_by('-release_date_date', '-title') credits_with_prods = credits_by_production_nick + [(prod, None, None) for prod in productions] credits_with_prods.sort(key=lambda item: (item[0].release_date_date is None, item[0].release_date_date), reverse=True) return { 'releaser': releaser, 'credits': credits_with_prods, 'show_screenshots': True, 'show_prod_types': True, }
def generate_paragraph_report(report_config, submitted_to, submitted_by): reporting_equipments = [] for equipment in report_config["equipments"]: # all this query does is it takes the list of paragraphs and puts them into dictionaries keyed # first by paragraph type, then by component num, then by category. paragraphs = dict( (paragraph_type, dict( (component_num, dict( (category_name, sorted(category_paragraph_list, key=lambda x: x["sort_order"])) for category_name, category_paragraph_list in groupby(sorted(component_paragraph_list, key=lambda x: x["category_name"]), lambda x: x["category_name"]))) for component_num, component_paragraph_list in groupby(sorted(type_paragraph_list, key=lambda x: x["component_num"]), lambda x: x["component_num"]))) for paragraph_type, type_paragraph_list in groupby(sorted(equipment["paragraphs"], key=lambda x: x["type"]), lambda x: x["type"])) reporting_equipments.append({ "equipment": equipment["equipment"], "paragraphs": paragraphs }) report_options = { "equipments": reporting_equipments, "reportdate": str(datetime.now().strftime("%m/%d/%Y")), "submittedto": submitted_to, "submittedby": submitted_by, "reportname": "Syrx" } # this is a list of all of the static files necessary to generate the pdf package_subfile = "paragraph" file_paths_to_copy = [ pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "cover.html")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "cover-populate.js")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "footer.html")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "footer-populate.js")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "header.html")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "header-populate.js")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "report.html")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "report-populate.js")), pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "template.css")), pkg_resources.resource_filename(report_templates_package, os.path.join("images", "PathianLogoSmall.png")), pkg_resources.resource_filename(report_templates_package, os.path.join("scripts", "jquery.min.js")), pkg_resources.resource_filename(report_templates_package, os.path.join("scripts", "wkhtmltopdf.substitutions.js")) ] temp_dir = create_temp_dir(report_options, file_paths_to_copy) pdfname = "paragraph_report-" + datetime.now().strftime("%Y-%m-%d %H %M %S") + ".pdf" pdfpath = os.path.join(temp_dir, pdfname) header_html_path = os.path.join(temp_dir, "header.html") footer_html_path = os.path.join(temp_dir, "footer.html") cover_html_path = os.path.join(temp_dir, "cover.html") report_html_path = os.path.join(temp_dir, "report.html") subprocess.call(['wkhtmltopdf', '-B', '2cm', '-L', '2cm', '-T', '2cm', '-R', '2cm', '--header-html', header_html_path, '--footer-html', footer_html_path, 'cover', cover_html_path, report_html_path, pdfpath]) return pdfpath
def group_values(lines): records = [] for _, records in itertools.groupby(lines, lambda row: row[0:2]): for __, by_value in itertools.groupby(records, lambda row: row[-3:]): recs = list(by_value) # consume the iterator so we can grab positionally first = recs[0] record = Record() record.name = first[0] record.type = first[1] if first[2].startswith('ALIAS'): _, alias_hosted_zone_id, alias_dns_name = first[2].split(':') record.alias_hosted_zone_id = alias_hosted_zone_id record.alias_dns_name = alias_dns_name else: record.resource_records = [r[2] for r in recs] record.ttl = first[3] record.region = first[4] or None record.weight = first[5] or None record.identifier = first[6] or None record.failover = first[7] or None if first[8] == 'True': record.alias_evaluate_target_health = True elif first[8] == 'False': record.alias_evaluate_target_health = False else: record.alias_evaluate_target_health = None yield record
source = hit['_source'] categories = categories + source['categories'] categories = [ cat for cat in categories if re.search('^[0-9]{4}s? ', cat) is None and re.search('^[0-9]{2}th-century ', cat) is None and re.search(' in [0-9]{4}s?', cat) is None and re.search( ' films$', cat) is None and re.search(' clips$', cat) is None and re.search(' novels$', cat) is None and re.search('^Film[s]? ', cat) is None and re.search('^Screenplays ', cat) is None ] categories.sort() categories_freq = [(cat[0], len(list(cat[1]))) for cat in itertools.groupby(categories)] categories_freq.sort(key=lambda x: x[1], reverse=True) # Create knowledge domain knowledge_domain = [x[0] for x in categories_freq[0:20]] # Search answers inside knowledge domain # { "term": { "categories": categories[0] }} es_domain_query = [{ "match": { "categories": cat } } for cat in knowledge_domain] hits = [] scores = [] fuzziness_arr = []
def analyzeObject(filePath): global faceList, edgeList, vertexList global minX, maxX, minY, maxY, minZ, maxZ faceList.clear() edgeList.clear() vertexList.clear() surfaceArea = 0 volume = 0 minX = math.inf maxX = -math.inf minY = math.inf maxY = -math.inf minZ = math.inf maxZ = -math.inf # Get file name and handle incorrect/missing file names try: with open(filePath, 'r') as file: text = file.read().splitlines() myCoral = Coral(filePath) print("Coral file " + myCoral.coralName + " found!") except IOError as e: print(filePath + " not found, please try another file:") return None # Start timer to analyze performance start_time = time.time() print("Building list of all vertices and faces.") # Build lists of all vertices and faces for i in range(0, len(text)): if len(text[i]) > 1: if text[i][0] == 'v' and ' ' == text[i][1]: vertexList.append(getListCoord(text[i], "v ")) elif text[i][0] == 'f': faceList.append(text[i]) myCoral.vertexList = vertexList print("Calculating area and volume.") for i in range(0, len(faceList)): # Break each face into the label numbers of each vertex vertex1 = faceList[i].lstrip('f ').split(' ')[0].split('/')[0] vertex2 = faceList[i].lstrip('f ').split(' ')[1].split('/')[0] vertex3 = faceList[i].lstrip('f ').split(' ')[2].split('/')[0] # Surface area calculated by summing area of each triangular face surfaceArea += triArea(vertex1, vertex2, vertex3) # Volume calculated by the sum of signed volumes of tetrahedrons. Each tetrahedron is formed by the three vertices of a face on the object; the fourth point is the origin volume += findtetraVolume(vertex1, vertex2, vertex3) #We define each edge as a list of two vertices and sort so that duplicates can easily be deleted later edge1 = sorted([vertex1, vertex2]) edge2 = sorted([vertex2, vertex3]) edge3 = sorted([vertex3, vertex1]) # Add each edge to an edgeList edgeList.append(edge1) edgeList.append(edge3) edgeList.append(edge2) #Remove duplicates of edges edgeList.sort() edgeList = list(edgeList for edgeList, _ in itertools.groupby(edgeList)) #Euler's Formula holes = int(-(len(vertexList) - len(edgeList) + len(faceList)) / 2 + 1) # Bounding Box distances length = abs(maxX - minX) width = abs(maxY - minY) height = abs(maxZ - minZ) boxDimensions = [minX, minY, minZ, maxX, maxY, maxZ] # Set coral object attributes if holes == 1: myCoral.numHoles = 1 elif holes == 0: myCoral.numHoles = 0 else: myCoral.numHoles = holes myCoral.numEdges = len(edgeList) myCoral.numVertices = len(vertexList) myCoral.numFaces = len(faceList) myCoral.boxDimensions = boxDimensions myCoral.surfaceArea = surfaceArea myCoral.volume = volume myCoral.analysisTime = time.time() - start_time # Some print statements to help with visualizing if writing to document doesn't work print("\n\nThere are " + str(len(vertexList)) + " vertices.") print("There are " + str(len(edgeList)) + " edges.") print("There are " + str(len(faceList)) + " faces.") if holes == 1: print("There is one hole in the object.") elif holes == 0: print("There are no holes in the object.") else: print("There are " + str(holes) + " holes in the object.") print("\nThe bounding box dimensions are {:,.2f}".format(length) + "mm x " + "{:,.2f}".format(width) + "mm x " + "{:,.2f}".format(height) + "mm.") print("The surface area is {:,.3f}".format(surfaceArea) + " square mm.") print("The volume is {:,.3f}".format(volume) + " cubic mm.") print("\n\n--- Elapsed time: {:,.2f}".format(time.time() - start_time) + " seconds ---") print("Calculating fractal dimension.") # Calculate fractal dimension using bucket fractal dimension myFD, myX, myY = findBucketFD(vertexList, myCoral.findBoundBox()) myCoral.myFD = myFD myCoral.myXY = myX, myY myCoral.plotMyFD() myCoral.plotToPlateau() #myCoral.writeXYtoFile() # Using Jessica's fractal dimension fileFD, fileX, fileY = findFromFDFile(myCoral.jessicafilePath) myCoral.fileFD = fileFD myCoral.fileXY = fileX, fileY #myCoral.plotFileFD() return myCoral
def ipartition_by(f, seq): for g, items in groupby(seq, f): yield items
def stage(workflow_name, stage_name): # these are the column names that appear in the screen colnames = [ "id", "task", "successful", "status", "drm_status", "drm_jobID", "attempts", "submitted_on", "finished_on", "wall_time", ] # this indicates if this column can be used for sorting and searching. The names match SQL column names. names_internal = [ "id", "params", "successful", "_status", False, "drm_jobID", "attempt", "submitted_on", "finished_on", "wall_time", ] in_page = request.args.get("in_page", 40, type=int) page = request.args.get("page", 1, type=int) keyword = request.args.get("keyword", "", type=str) sorting = request.args.get("sorting", None, type=str) order = request.args.get("order", None, type=str) ex = session.query(Workflow).filter_by(name=workflow_name).one() stage = session.query(Stage).filter_by(workflow_id=ex.id, name=stage_name).one() if stage is None: return abort(404) from sqlalchemy import text tasks = session.query(Task).filter_by(stage_id=stage.id) # search keyword if keyword == "": tasks_searched = tasks else: pattern = "%" + keyword.replace("'", "''") + "%" tasks_searched = tasks.filter( or_(*[ text(f"{field} LIKE '{pattern}'") if field else None for field in names_internal ])) # sort tasks_sorted = tasks_searched if sorting is not None: if order == "desc": tasks_sorted = tasks_searched.order_by( desc(getattr(Task, sorting))) elif order == "asc": tasks_sorted = tasks_searched.order_by( asc(getattr(Task, sorting))) tasks_paginated = tasks_sorted[(page - 1) * in_page:page * in_page] try: n = tasks_searched.count() max_page = n // in_page + (1 if n % in_page > 0 else 0) except ZeroDivisionError: # no tasks found after search max_page = 1 # urls for page navigation first_url = (url_for( "cosmos.stage_query", workflow_name=workflow_name, stage_name=stage_name, old_page=1, old_keyword=keyword, old_in_page=in_page, sorting=sorting, order=order, ) if page != 1 else None) prev_url = (url_for( "cosmos.stage_query", workflow_name=workflow_name, stage_name=stage_name, old_page=page - 1, old_keyword=keyword, old_in_page=in_page, sorting=sorting, order=order, ) if page >= 2 else None) next_url = (url_for( "cosmos.stage_query", workflow_name=workflow_name, stage_name=stage_name, old_page=page + 1, old_keyword=keyword, old_in_page=in_page, sorting=sorting, order=order, ) if page < max_page else None) last_url = (url_for( "cosmos.stage_query", workflow_name=workflow_name, stage_name=stage_name, old_page=max_page, old_keyword=keyword, old_in_page=in_page, sorting=sorting, order=order, ) if page != max_page else None) # this will change only the url for the column currently used for sorting order_cycle = {None: "asc", "asc": "desc", "desc": None} ordering_for_urls = { colname: order_cycle[order] if good == sorting else "asc" for colname, good in zip(colnames, names_internal) } ordering_urls = { colname: url_for( f"cosmos.stage", workflow_name=workflow_name, stage_name=stage_name, in_page=in_page, page=page, keyword=keyword, sorting=good, order=ordering_for_urls[colname], ) if good else None for colname, good in zip(colnames, names_internal) } jm = JobManager(get_submit_args=None, logger=None) f = attrgetter("drm") drm_statuses = {} for drm, tasks in it.groupby(sorted(tasks_paginated, key=f), f): drm_statuses.update(jm.get_drm(drm).drm_statuses(list(tasks))) url_query = url_for( "cosmos.stage_query", old_page=page, old_keyword=keyword, sorting=sorting, order=order, workflow_name=workflow_name, stage_name=stage_name, old_in_page=in_page, ) return render_template( "cosmos/stage.html", stage=stage, drm_statuses=drm_statuses, in_page=in_page, tasks_on_page=tasks_paginated, max_page=max_page, colnames=colnames, ordering_urls=ordering_urls, page=page, url_query=url_query, first_url=first_url, prev_url=prev_url, next_url=next_url, last_url=last_url, workflow_name=workflow_name, stage_name=stage_name, keyword=keyword, )
# taste {'0': 117872, '1': 384200, '2': 95771, '3': 61508} # region {'0': 438760, '1': 220591} ############################################################################################################### find_distribution = False all_job = ['timeliness','emotion','taste','region'] if find_distribution: content_iter = ExqUtils.load_file_as_iter(p) ori_distribution = {'timeliness': {}, 'emotion': {}, 'region': {}, 'taste': {}} while True: data = list(itertools.islice(content_iter, 10000 * 10)) if len(data) > 0: json_res = [json.loads(i.strip()) for i in data] # sample_list = [c['title'] + ". " + c['text'] for c in content] for job in all_job: job_label_list = np.asarray(sorted([str(c[job]) for c in json_res])) for k, g in itertools.groupby(job_label_list): ori_distribution[job].update({k: len(list(g)) + ori_distribution[job].get(k, 0)}) else: break for job in all_job: print(job,ori_distribution[job]) #################### # 准备(分类)训练样本 # {'1': 39566, '2': 456327, '3': 64505, '4': 17625, '5': 4698, '6': 2979, '7': 24271, '8': 49380} #################### if prepare_samples: print("加载各样本") content_iter = ExqUtils.load_file_as_iter(p) distribution = {} print("清空文件")
def RunLengthEncoding(input_string): return [(len(list(j)), i) for i,j in groupby(input_string)]
def asyn_lpa_communities(G, weight=None): """Returns communities in ``G`` as detected by asynchronous label propagation. The asynchronous label propagation algorithm is described in [1]_. The algorithm is probabilistic and the found communities may vary on different executions. The algorithm proceeds as follows. After initializing each node with a unique label, the algorithm repeatedly sets the label of a node to be the label that appears most frequently among that nodes neighbors. The algorithm halts when each node has the label that appears most frequently among its neighbors. The algorithm is asynchronous because each node is updated without waiting for updates on the remaining nodes. This generalized version of the algorithm in [1]_ accepts edge weights. Parameters ---------- G : Graph weight : string The edge attribute representing the weight of an edge. If ``None``, each edge is assumed to have weight one. In this algorithm, the weight of an edge is used in determining the frequency with which a label appears among the neighbors of a node: a higher weight means the label appears more often. Returns ------- communities : iterable Iterable of communities given as sets of nodes. Notes ------ Edge weight attributes must be numerical. References ---------- .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near linear time algorithm to detect community structures in large-scale networks." Physical Review E 76.3 (2007): 036106. """ labels = {n: i for i, n in enumerate(G)} cont = True while cont: cont = False nodes = list(G) random.shuffle(nodes) # Calculate the label for each node for node in nodes: if len(G[node]) < 1: continue # Get label frequencies. Depending on the order they are processed # in some nodes with be in t and others in t-1, making the # algorithm asynchronous. label_freq = Counter({labels[v]: G.edge[v][node][weight] if weight else 1 for v in G[node]}) # Choose the label with the highest frecuency. If more than 1 label # has the highest frecuency choose one randomly. max_freq = max(label_freq.values()) best_labels = [label for label, freq in label_freq.items() if freq == max_freq] new_label = random.choice(best_labels) labels[node] = new_label # Continue until all nodes have a label that is better than other # neighbour labels (only one label has max_freq for each node). cont = cont or len(best_labels) > 1 return (set(v) for k, v in groupby(sorted(labels, key=labels.get), key=labels.get))
import itertools num = [[10, 20], [40], [30, 56, 25], [10, 20], [33], [40]] print("Original List", num) num.sort() new_num = list(num for num, _ in itertools.groupby(num)) print("New List", new_num)
def get_availability(self): """ Queries the current client for information on what stations are available given the spatial and temporal restrictions. """ # Check if stations needs to be filtered after downloading or if the # restrictions one can impose with the FDSN webservices queries are # enough. This depends on the domain definition. try: self.domain.is_in_domain(0, 0) needs_filtering = True except NotImplementedError: needs_filtering = False arguments = { "network": self.restrictions.network, "station": self.restrictions.station, "location": self.restrictions.location, "channel": self.restrictions.channel, "starttime": self.restrictions.starttime, "endtime": self.restrictions.endtime, # Request at the channel level. "level": "channel" } # Add the domain specific query parameters. arguments.update(self.domain.get_query_parameters()) # Check the capabilities of the service and see what is the most # appropriate way of acquiring availability information. Some services # right now require manual overriding of what they claim to be # capable of. if "matchtimeseries" in self.client.services["station"]: arguments["matchtimeseries"] = True if "format" in self.client.services["station"]: arguments["format"] = "text" self.is_availability_reliable = True else: if "format" in self.client.services["station"]: arguments["format"] = "text" self.is_availability_reliable = False if self.is_availability_reliable: self.logger.info("Client '%s' - Requesting reliable " "availability." % self.client_name) else: self.logger.info( "Client '%s' - Requesting unreliable availability." % self.client_name) try: start = time.time() inv = self.client.get_stations(**arguments) end = time.time() except utils.ERRORS as e: if "no data available" in str(e).lower(): self.logger.info( "Client '%s' - No data available for request." % self.client_name) return self.logger.error( "Client '{0}' - Failed getting availability: %s".format( self.client_name), str(e)) return self.logger.info("Client '%s' - Successfully requested availability " "(%.2f seconds)" % (self.client_name, end - start)) # Get the time intervals from the restrictions. intervals = [TimeInterval(start=_i[0], end=_i[1]) for _i in self.restrictions] for network in inv: for station in network: # Skip the station if it is not in the desired domain. if needs_filtering is True and \ not self.domain.is_in_domain(station.latitude, station.longitude): continue channels = [] for channel in station.channels: # Remove channels that somehow slipped past the temporal # constraints due to weird behaviour from the data center. if (channel.start_date > self.restrictions.endtime) or \ (channel.end_date < self.restrictions.starttime): continue channels.append(Channel( location=channel.location_code, channel=channel.code, intervals=copy.deepcopy(intervals))) # Group by locations and apply the channel priority filter to # each. filtered_channels = [] def get_loc(x): return x.location for location, _channels in itertools.groupby( sorted(channels, key=get_loc), get_loc): filtered_channels.extend(utils.filter_channel_priority( list(_channels), key="channel", priorities=self.restrictions.channel_priorities)) channels = filtered_channels # Filter to remove unwanted locations according to the priority # list. channels = utils.filter_channel_priority( channels, key="location", priorities=self.restrictions.location_priorities) if not channels: continue self.stations[(network.code, station.code)] = Station( network=network.code, station=station.code, latitude=station.latitude, longitude=station.longitude, channels=channels) self.logger.info("Client '%s' - Found %i stations (%i channels)." % ( self.client_name, len(self.stations), sum([len(_i.channels) for _i in self.stations.values()])))
def ReconstructType(Field, EA): # # Get at the metadata for *this* instruction alone # Traces = GetHeapTraces(Field, EA) Metadata = tuple(Meta(Trace[Field]) for Trace in Traces) (Size, Offset, Frames) = EnsureHeapMetadataHomogeneity(Metadata) StructId = AskUserForStructOfSize(Size) BaseAddresses = set(M.Heap.Base for M in Metadata) if StructId == ida.BADNODE: return # # Presumably, this instruction was hit multiple times, and may have # operated on several different allocations of the exact same variety. # # Given this data, find *all* instructions which interacted with # the exact same data. Specifically, we want to get at the traces. # for F in Frames: print ", ".join(str(_) for _ in Frames) # print "Analyzing allocations of size %#x with call frame..." % Size # print '\n'.join(' %s' % F for F in Frames) RelevantTraces = tuple(TracesForHeapAllocation(BaseAddresses, Size, Frames)) UniqueOffset = lambda T: T['Address']['Heap']['Offset'] print "Found %#x traces" % len(RelevantTraces) # # Group all of the fields by their offset from the base # of the heap allocations # for TraceOffset, Traces in itertools.groupby(RelevantTraces, UniqueOffset): # N.B. itertools.groupby() group can only be iterated once # so we have to save it off. Traces = tuple(Traces) FieldFromTraces(StructId, TraceOffset, Traces) # # Group the traces by instruction, and set the structure type # on the memory operand if there isn't one already. # for Trace in Traces: # o_displ EA = Trace['IP'] Operands = {0: ida.GetOpType(EA, 0), 1: ida.GetOpType(EA, 1)} # We want to find the 'Address' operand, which will # be of type displacement, memory, or phrase. OpMem = next(k for k, v in Operands.items() if v in (ida.o_displ, ida.o_mem, ida.o_phrase)) OpValue = ida.GetOperandValue(EA, OpMem) OpOff = 0 # o_phrase comes up for some instructions that look like: # mov reg, [reg2] # and OpValue will then be an index for the list returned by GetRegisterList() # instead of '0'. if Operands[OpMem] == ida.o_phrase: OpOff = TraceOffset else: OpOff = TraceOffset - OpValue print "%x = %x - %x" % (OpOff, TraceOffset, OpValue) print "OpStroffEx(%x, %x, %x, %x)" % (EA, OpMem, StructId, OpOff) ida.OpStroffEx(EA, OpMem, StructId, OpOff)
def compress_homopolymer(seq): return ''.join(x[0] for x in groupby(list(seq)))
def unique_in_order(iterable): return [k for (k, _) in groupby(iterable)]
def run(self): # Set up paths to write to config file install_dir = self.install_dir install_logdir = '/var/log' if self.user or self.home: install_sysconfdir = os.path.join(install_dir, 'etc') elif os.name == 'posix' and install_dir in ('/', '/usr'): install_sysconfdir = '/etc' else: install_sysconfdir = 'scripts\\etc\\mysql' if not self.data_files: return # Go over all entries in data_files and process it if needed for df in self.data_files: # Figure out what the entry contain and collect a list of files. if isinstance(df, str): # This was just a file name, so it will be installed # in the install_dir location. This is a copy of the # behaviour inside distutils intall_data. directory = install_dir filenames = [df] else: directory = df[0] filenames = df[1] # Process all the files for the entry and build a list of # tuples (directory, file) data_files = [] for filename in filenames: # It was a config file template, add install # directories to the config file. if fnmatch.fnmatch(filename, 'data/*.cfg.in'): config = ConfigParser.RawConfigParser({ 'prefix': '', # custom install_dir, 'logdir': install_logdir, 'sysconfdir': install_sysconfdir, }) config.readfp(open(filename)) filename = os.path.splitext(filename)[0] config.write(open(filename, "w")) # change directory 'fabric'to mysql directory = os.path.join(install_sysconfdir, 'mysql') if os.name == 'nt': directory = install_sysconfdir data_files.append((directory, filename)) # Re-construct the data_files entry from what was provided by # merging all tuples with same directory and provide a list of # files as second item, e.g.: # [('foo', 1), ('bar', 2), ('foo', 3), ('foo', 4), ('bar', 5)] # --> [('bar', [2, 5]), ('foo', [1, 3, 4])] data_files.sort() data_files = [ (d, [f[1] for f in fs]) for d, fs in groupby(data_files, key=lambda x: x[0]) ] self.data_files = data_files log.info("package--> self.data_files {0}".format(self.data_files)) log.info("package.py--> self.data_files {0}".format(self.data_files)) _install_data.run(self)
def stree_schedule(clusters): """ Arrange an iterable of Clusters into a ScheduleTree. """ stree = ScheduleTree() prev = Cluster(None) mapper = DefaultOrderedDict(lambda: Bunch(top=None, bottom=None)) def reuse_metadata(c0, c1, d): return (c0.guards.get(d) == c1.guards.get(d) and c0.syncs.get(d) == c1.syncs.get(d)) def attach_metadata(cluster, d, tip): if d in cluster.guards: tip = NodeConditional(cluster.guards[d], tip) if d in cluster.syncs: tip = NodeSync(cluster.syncs[d], tip) return tip for c in clusters: index = 0 # Reuse or add in any Conditionals and Syncs outside of the outermost Iteration if not reuse_metadata(c, prev, None): tip = attach_metadata(c, None, stree) maybe_reusable = [] else: try: tip = mapper[prev.itintervals[index]].top.parent except IndexError: tip = stree maybe_reusable = prev.itintervals for it0, it1 in zip(c.itintervals, maybe_reusable): if it0 != it1: break index += 1 d = it0.dim # The reused sub-trees might acquire new sub-iterators as well as # new properties mapper[it0].top.ispace = IterationSpace.union( mapper[it0].top.ispace, c.ispace.project([d])) mapper[it0].top.properties = normalize_properties( mapper[it0].top.properties, c.properties[it0.dim]) # Different guards or SyncOps cannot further be nested if not reuse_metadata(c, prev, d): tip = mapper[it0].top tip = attach_metadata(c, d, tip) mapper[it0].bottom = tip break else: tip = mapper[it0].bottom # Nested sub-trees, instead, will not be used anymore for it in prev.itintervals[index:]: mapper.pop(it) # Add in Iterations, Conditionals, and Syncs for it in c.itintervals[index:]: d = it.dim tip = NodeIteration(c.ispace.project([d]), tip, c.properties.get(d, ())) mapper[it].top = tip tip = attach_metadata(c, d, tip) mapper[it].bottom = tip # Add in Expressions exprs = [] for conditionals, g in groupby(c.exprs, key=lambda e: e.conditionals): exprs = list(g) # Indirect ConditionalDimensions induce expression-level guards if conditionals: guard = And(*conditionals.values(), evaluate=False) parent = NodeConditional(guard, tip) else: parent = tip NodeExprs(exprs, c.ispace, c.dspace, c.ops, c.traffic, parent) # Prepare for next iteration prev = c return stree
def _serialize_report(user, problem_instances, test_groups): """Generates a dictionary representing a single report. :param request: Django request :type user: :cls:`django.contrib.auth.User` :param user: user to generate the report for :type problem_instances: list of :cls:`oioioi.contests.ProblemInstance` :param problem_instances: problem instances to include in the report :type test_groups: dict(:cls:`oioioi.contests.ProblemInstance` -> list of str) :param test_groups: dictionary mapping problem instances into lists of names of test groups to include """ resultsets = [] total_score = None results = UserResultForProblem.objects.filter( user=user, problem_instance__in=list(problem_instances), submission_report__isnull=False) for r in results: problem_instance = r.problem_instance submission_report = r.submission_report submission = submission_report.submission source_file = submission.programsubmission.source_file groups = list(test_groups[problem_instance]) try: compilation_report = CompilationReport.objects \ .get(submission_report=submission_report) except CompilationReport.DoesNotExist: compilation_report = None try: test_reports = TestReport.objects \ .filter(submission_report__submission=submission) \ .filter(submission_report__status='ACTIVE') \ .filter(submission_report__kind__in=['INITIAL', 'NORMAL']) \ .filter(test_group__in=groups) \ .order_by('test__kind', 'test__order', 'test_name') except TestReport.DoesNotExist: test_reports = [] group_reports = GroupReport.objects \ .filter(submission_report__submission=submission) \ .filter(submission_report__status='ACTIVE') \ .filter(submission_report__kind__in=['INITIAL', 'NORMAL']) \ .filter(group__in=groups) group_reports = dict((g.group, g) for g in group_reports) groups = [] for group_name, tests in itertools.groupby(test_reports, attrgetter('test_group')): groups.append({'tests': list(tests), 'report': group_reports[group_name]}) problem_score = None max_problem_score = None for group in groups: group_score = group['report'].score group_max_score = group['report'].max_score if problem_score is None: problem_score = group_score elif group_score is not None: problem_score += group_score if max_problem_score is None: max_problem_score = group_max_score elif group_max_score is not None: max_problem_score += group_max_score resultsets.append(dict( result=r, score=problem_score, max_score=max_problem_score, compilation_report=compilation_report, groups=groups, code=source_file.read(), codefile=source_file.file.name )) source_file.close() if total_score is None: total_score = problem_score elif problem_score is not None: total_score += problem_score return { 'user': user, 'resultsets': resultsets, 'sum': total_score, }
def main(): trips_csv = DictReader(file(DATA_ROOT + 'trips.txt')) stops_csv = DictReader(file(DATA_ROOT + 'stops.txt')) stop_times_csv = DictReader(file(DATA_ROOT + 'stop_times.txt')) routes_csv = DictReader(file(DATA_ROOT + 'routes.txt')) gexf = GEXF() routes = dict() for route in routes_csv: if route['route_type'] in CONVERT_ROUTE_TYPES: routes[route['route_id']] = route print 'routes', len(routes) trips = dict() for trip in trips_csv: if trip['route_id'] in routes: trip['color'] = routes[trip['route_id']]['route_color'] trips[trip['trip_id']] = trip print 'trips', len(trips) stops = set() edges = dict() for trip_id, stop_time_iter in groupby( stop_times_csv, lambda stop_time: stop_time['trip_id']): if trip_id in trips: trip = trips[trip_id] prev_stop = stop_time_iter.next()['stop_id'] stops.add(prev_stop) for stop_time in stop_time_iter: stop = stop_time['stop_id'] edge = (prev_stop, stop) edges[edge] = trip['color'] stops.add(stop) prev_stop = stop print 'stops', len(stops) print 'edges', len(edges) #stop_map = dict() stops_used = set(DISCARD_STATIONS) for stop in stops_csv: if stop['stop_id'] in stops: stop_id = stop['stop_id'] name = stop['stop_name'] lat = stop['stop_lat'] lon = stop['stop_lon'] #if name not in stops_used: if stop_id not in stops_used: gexf.add_node(stop_id, name, lon, lat) stops_used.add(stop_id) edges_used = set() for (start_stop_id, end_stop_id), color in edges.iteritems(): #start_stop_name = stop_map[start_stop_id] #end_stop_name = stop_map[end_stop_id] #start_stop_id = get_stop_id(start_stop_id) #end_stop_id = get_stop_id(end_stop_id) #if start_stop_name in DISCARD_STATIONS or end_stop_name in DISCARD_STATIONS: # continue edge = min((start_stop_id, end_stop_id), (end_stop_id, start_stop_id)) if edge not in edges_used: gexf.add_edge(start_stop_id, end_stop_id, color) edges_used.add(edge) gexf.write(file('out.gexf', 'w'))
def process_translations(content_list, order_by=None): """ Finds translation and returns them. Returns a tuple with two lists (index, translations). Index list includes items in default language or items which have no variant in default language. Items with the `translation` metadata set to something else than `False` or `false` will be used as translations, unless all the items with the same slug have that metadata. For each content_list item, sets the 'translations' attribute. order_by can be a string of an attribute or sorting function. If order_by is defined, content will be ordered by that attribute or sorting function. By default, content is ordered by slug. Different content types can have default order_by attributes defined in settings, e.g. PAGES_ORDER_BY='sort-order', in which case `sort-order` should be a defined metadata attribute in each page. """ content_list.sort(key=attrgetter('slug')) grouped_by_slugs = groupby(content_list, attrgetter('slug')) index = [] translations = [] for slug, items in grouped_by_slugs: items = list(items) # items with `translation` metadata will be used as translations… default_lang_items = list(filter( lambda i: i.metadata.get('translation', 'false').lower() == 'false', items)) # …unless all items with that slug are translations if not default_lang_items: default_lang_items = items # display warnings if several items have the same lang for lang, lang_items in groupby(items, attrgetter('lang')): lang_items = list(lang_items) len_ = len(lang_items) if len_ > 1: logger.warning('There are %s variants of "%s" with lang %s', len_, slug, lang) for x in lang_items: logger.warning('\t%s', x.source_path) # find items with default language default_lang_items = list(filter(attrgetter('in_default_lang'), default_lang_items)) # if there is no article with default language, take an other one if not default_lang_items: default_lang_items = items[:1] if not slug: logger.warning( 'empty slug for %s. ' 'You can fix this by adding a title or a slug to your ' 'content', default_lang_items[0].source_path) index.extend(default_lang_items) translations.extend([x for x in items if x not in default_lang_items]) for a in items: a.translations = [x for x in items if x != a] if order_by: if callable(order_by): try: index.sort(key=order_by) except Exception: logger.error('Error sorting with function %s', order_by) elif isinstance(order_by, six.string_types): if order_by.startswith('reversed-'): order_reversed = True order_by = order_by.replace('reversed-', '', 1) else: order_reversed = False if order_by == 'basename': index.sort(key=lambda x: os.path.basename(x.source_path or ''), reverse=order_reversed) # already sorted by slug, no need to sort again elif not (order_by == 'slug' and not order_reversed): try: index.sort(key=attrgetter(order_by), reverse=order_reversed) except AttributeError: logger.warning('There is no "%s" attribute in the item ' 'metadata. Defaulting to slug order.', order_by) else: logger.warning('Invalid *_ORDER_BY setting (%s).' 'Valid options are strings and functions.', order_by) return index, translations
alf_path = '/home/mic/Downloads/ZM_1735_2019-08-01_001/mnt/s0/Data/Subjects/ZM_1735/2019-08-01/001/alf' # can be addressed as spikes['time'] or spikes.time spikes = alf.io.load_object(alf_path, 'spikes') clusters = alf.io.load_object(alf_path, 'clusters') channels = alf.io.load_object(alf_path, 'channels') trials = alf.io.load_object(alf_path, '_ibl_trials') # Print number of clusters for each brain region locDict_bothProbes = clusters['brainAcronyms']['brainAcronyms'].to_dict() cluster_idx_probe1 = np.unique(spikes['clusters']) locDict = {} for i in locDict_bothProbes: if i in cluster_idx_probe1: locDict[i] = locDict_bothProbes[i] print([(k, len(list(v))) for k, v in itertools.groupby(sorted(locDict.values()))]) # set key parameters T_BIN = 0.1 TRIALS_TO_PLOT = np.arange(20, 23) # use the real trial numbers PROJECTED_DIMENSIONS = 3 BEHAVIORAL_VARIABLE = 'choice' BRAIN_AREA = 'MB' # that depends on the dataset # Reduce neural data to region of interest if BRAIN_AREA: locations = clusters['brainAcronyms'] spikes = pd.DataFrame.from_dict(spikes) loc_idx = locations.loc[( locations['brainAcronyms'] == BRAIN_AREA)].index spikes = spikes[np.isin(spikes['clusters'], loc_idx)]
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-t", "--template-bam-file", dest="filename_genome_bam", type="string", help="input bam file for header information [%default]") parser.add_option("-s", "--contigs-tsv-file", dest="filename_contigs", type="string", help="filename with contig sizes [%default]") parser.add_option("-o", "--colour", dest="colour_mismatches", action="store_true", help="mismatches will use colour differences (CM tag) [%default]") parser.add_option("-i", "--ignore-mismatches", dest="ignore_mismatches", action="store_true", help="ignore mismatches [%default]") parser.add_option("-c", "--remove-contigs", dest="remove_contigs", type="string", help="','-separated list of contigs to remove [%default]") parser.add_option("-f", "--force-output", dest="force", action="store_true", help="force overwriting of existing files [%default]") parser.add_option("-u", "--unique", dest="unique", action="store_true", help="remove reads not matching uniquely [%default]") parser.set_defaults( filename_genome_bam=None, filename_gtf=None, filename_mismapped=None, remove_contigs=None, force=False, unique=False, colour_mismatches=False, ignore_mismatches=False, ) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) genomefile, referencenames, referencelengths = None, None, None if options.filename_genome_bam: genomefile = pysam.AlignmentFile(options.filename_genome_bam, "rb") elif options.filename_contigs: contigs = IOTools.ReadMap(IOTools.openFile(options.filename_contigs)) data = list(zip(*list(contigs.items()))) referencenames, referencelengths = data[0], list(map(int, data[1])) else: raise ValueError( "please provide either --template-bam-file or --contigs-tsv-file") infile = pysam.AlignmentFile("-", "rb") outfile = pysam.AlignmentFile("-", "wb", template=genomefile, referencenames=referencenames, referencelengths=referencelengths) if options.colour_mismatches: tag = "CM" else: tag = "NM" nambiguous = 0 ninput = 0 nunmapped = 0 ncigar = 0 nfull = 0 noutput = 0 contig2tid = dict([(y, x) for x, y in enumerate(outfile.references)]) for qname, readgroup in itertools.groupby(infile, lambda x: x.qname): ninput += 1 reads = list(readgroup) if reads[0].is_unmapped: nunmapped += 1 continue # filter for best match best = min([x.opt(tag) for x in reads]) reads = [x for x in reads if x.opt(tag) == best] if len(reads) > 1: nambiguous += 1 continue read = reads[0] # reject complicated matches (indels, etc) # to simplify calculations below. if len(read.cigar) > 1: ncigar += 1 continue # set NH flag to latest count t = dict(read.tags) t['NH'] = 1 read.tags = list(t.items()) sname = infile.getrname(read.tid) contig, first_exon_start, middle, last_exon_end, splice, strand = sname.split( "|") first_exon_end, last_exon_start = middle.split("-") first_exon_start, first_exon_end, last_exon_start, last_exon_end = list(map(int, ( first_exon_start, first_exon_end, last_exon_start, last_exon_end))) first_exon_end += 1 total = first_exon_end - first_exon_start + \ last_exon_end - last_exon_start first_exon_length = first_exon_end - first_exon_start match1 = first_exon_length - read.pos intron_length = last_exon_start - first_exon_end match2 = read.qlen - match1 # match lies fully in one exon - ignore if match1 <= 0 or match2 <= 0: nfull += 1 continue # increment pos read.pos = first_exon_start + read.pos read.tid = contig2tid[contig] # 3 = BAM_CREF_SKIP read.cigar = [(0, match1), (3, intron_length), (0, match2)] outfile.write(read) noutput += 1 outfile.close() if genomefile: genomefile.close() c = E.Counter() c.input = ninput c.output = noutput c.full = nfull c.cigar = ncigar c.ambiguous = nambiguous c.unmapped = nunmapped E.info("%s" % str(c)) # write footer and output benchmark information. E.Stop()
nms = [cat['name'] for cat in cats] # category names cat_id_to_name = {cat['id']: cat['name'] for cat in cats} # category id to name mapping cat_name_to_id = {cat['name']: cat['id'] for cat in cats} # category name to id mapping # print('COCO categories: \n{}'.format(' '.join(nms))) nms = set([cat['supercategory'] for cat in cats]) # supercategory names cat_to_supercat = {cat['name']: cat['supercategory'] for cat in cats} cat_id_to_supercat = {cat['id']: cat['supercategory'] for cat in cats} # print('COCO supercategories: \n{}'.format(' '.join(nms))) # print len(nms) # print supercategory and categories in each supercategory supercat_to_cats = {} for key, group in itertools.groupby(sorted([(sc, c) for (c, sc) in cat_to_supercat.items()]), lambda x: x[0]): lst = [thing[1] for thing in group] print(key, ":", ", ".join(lst)) supercat_to_cats[key] = lst colors = [(30, 144, 255), (255, 140, 0), (34, 139, 34), (255, 0, 0), (147, 112, 219), (139, 69, 19), (255, 20, 147), (128, 128, 128), (85, 107, 47), (0, 255, 255)] def get_color(i): return colors[i % len(colors)] # load and see image img = coco.loadImgs([87058])[0] # make sure image ID exists in the dataset given to you. # I = io.imread('%s/%s/%s'%(dataDir,dataType,img['file_name'])) # make sure data dir is correct
import operator import itertools import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import colorConverter from matplotlib.collections import LineCollection from matplotlib.colors import ListedColormap, BoundaryNorm data = np.loadtxt('../../datasets/gatech_honeybee/sequence1.dat', dtype=np.dtype({'names':['x', 'y', 'mode'], 'formats':[np.double, np.double, 'S100']})) segments = [np.array(list(g)) for k, g in itertools.groupby(data, key=operator.itemgetter(2))] colormap = {'waggle':'-r', 'turn_left':'-g', 'turn_right':'-b'} for segment in segments: plt.plot(segment['x'], segment['y'], colormap[segment['mode'][0]], linewidth=2.0) plt.show()
def assert_guess_values(self, matrix, block, guesses, spin_flip=False): """ Assert that the guesses correspond to the smallest diagonal values. """ # Extract useful quantities mospaces = matrix.mospaces nCa = noa = mospaces.n_orbs_alpha("o1") nva = mospaces.n_orbs_alpha("v1") if mospaces.has_core_occupied_space: nCa = mospaces.n_orbs_alpha("o2") # Make a list of diagonal indices, ordered by the corresponding # diagonal values sidcs = None if block == "ph": diagonal = matrix.diagonal().ph.to_ndarray() # Build list of indices, which would sort the diagonal sidcs = np.dstack( np.unravel_index(np.argsort(diagonal.ravel()), diagonal.shape)) assert sidcs.shape[0] == 1 if spin_flip: sidcs = [ idx for idx in sidcs[0] if idx[0] < nCa and idx[1] >= nva ] else: sidcs = [ idx for idx in sidcs[0] if any((idx[0] >= nCa and idx[1] >= nva, idx[0] < nCa and idx[1] < nva)) # noqa: E221 ] elif block == "pphh": diagonal = matrix.diagonal().pphh.to_ndarray() # Build list of indices, which would sort the diagonal sidcs = np.dstack( np.unravel_index(np.argsort(diagonal.ravel()), diagonal.shape)) assert sidcs.shape[0] == 1 if spin_flip: sidcs = [ idx for idx in sidcs[0] if any(( idx[0] < noa and idx[1] < nCa and idx[2] < nva and idx[3] >= nva, # noqa: E221,E501 idx[0] < noa and idx[1] < nCa and idx[2] >= nva and idx[3] < nva, # noqa: E221,E501 idx[0] < noa and idx[1] >= nCa and idx[2] >= nva and idx[3] >= nva, # noqa: E221,E501 idx[0] >= noa and idx[1] < nCa and idx[2] >= nva and idx[3] >= nva)) # noqa: E221,E501 ] else: sidcs = [ idx for idx in sidcs[0] if any(( idx[0] < noa and idx[1] < nCa and idx[2] < nva and idx[3] < nva, # noqa: E221,E501 idx[0] >= noa and idx[1] >= nCa and idx[2] >= nva and idx[3] >= nva, # noqa: E221,E501 idx[0] < noa and idx[1] >= nCa and idx[2] < nva and idx[3] >= nva, # noqa: E221,E501 idx[0] >= noa and idx[1] < nCa and idx[2] >= nva and idx[3] < nva, # noqa: E221,E501 idx[0] < noa and idx[1] >= nCa and idx[2] >= nva and idx[3] < nva, # noqa: E221,E501 idx[0] >= noa and idx[1] < nCa and idx[2] < nva and idx[3] >= nva)) # noqa: E221,E501 ] sidcs = [idx for idx in sidcs if idx[2] != idx[3]] if not matrix.is_core_valence_separated: sidcs = [idx for idx in sidcs if idx[0] != idx[1]] # Group the indices by corresponding diagonal value def grouping(x): return np.round(diagonal[tuple(x)], decimals=12) gidcs = [[tuple(gitem) for gitem in group] for key, group in itertools.groupby(sidcs, grouping)] igroup = 0 # The current diagonal value group we are in for (i, guess) in enumerate(guesses): # Extract indices of non-zero elements nonzeros = np.dstack(np.where(guess[block].to_ndarray() != 0)) assert nonzeros.shape[0] == 1 nonzeros = [tuple(nzitem) for nzitem in nonzeros[0]] if i > 0 and igroup + 1 < len(gidcs): if nonzeros[0] in gidcs[igroup + 1]: igroup += 1 for nz in nonzeros: assert nz in gidcs[igroup]
def groupby(iterable, keyfunc): return itertools.groupby(sorted(iterable, key=keyfunc), keyfunc)
def reducer(lines): for key, values in it.groupby(lines, lambda t: t.rstrip()): print(key)
def discover_catalog(mysql_conn, config): '''Returns a Catalog describing the structure of the database.''' filter_dbs_config = config.get('filter_dbs') if filter_dbs_config: filter_dbs_clause = ",".join( ["'{}'".format(db) for db in filter_dbs_config.split(",")]) table_schema_clause = "WHERE table_schema IN ({})".format( filter_dbs_clause) else: table_schema_clause = """ WHERE table_schema NOT IN ( 'information_schema', 'performance_schema', 'mysql', 'sys' )""" with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: cur.execute(""" SELECT table_schema, table_name, table_type, table_rows FROM information_schema.tables {} """.format(table_schema_clause)) table_info = {} for (db, table, table_type, rows) in cur.fetchall(): if db not in table_info: table_info[db] = {} table_info[db][table] = { 'row_count': rows, 'is_view': table_type == 'VIEW' } cur.execute(""" SELECT table_schema, table_name, column_name, data_type, character_maximum_length, numeric_precision, numeric_scale, column_type, column_key FROM information_schema.columns {} ORDER BY table_schema, table_name """.format(table_schema_clause)) columns = [] rec = cur.fetchone() while rec is not None: columns.append(Column(*rec)) rec = cur.fetchone() entries = [] for (k, cols) in itertools.groupby( columns, lambda c: (c.table_schema, c.table_name)): cols = list(cols) (table_schema, table_name) = k schema = Schema(type='object', properties={ c.column_name: schema_for_column(c) for c in cols }) md = create_column_metadata(cols) md_map = metadata.to_map(md) md_map = metadata.write(md_map, (), 'database-name', table_schema) is_view = table_info[table_schema][table_name]['is_view'] if table_schema in table_info and table_name in table_info[ table_schema]: row_count = table_info[table_schema][table_name].get( 'row_count') if row_count is not None: md_map = metadata.write(md_map, (), 'row-count', row_count) md_map = metadata.write(md_map, (), 'is-view', is_view) column_is_key_prop = lambda c, s: ( c.column_key == 'PRI' and s.properties[ c.column_name].inclusion != 'unsupported') key_properties = [ c.column_name for c in cols if column_is_key_prop(c, schema) ] if not is_view: md_map = metadata.write(md_map, (), 'table-key-properties', key_properties) entry = CatalogEntry( table=table_name, stream=table_name, metadata=metadata.to_list(md_map), tap_stream_id=common.generate_tap_stream_id( table_schema, table_name), schema=schema) entries.append(entry) return Catalog(entries)
def action_create_invoice(self): """Create the invoice associated to the PO. """ precision = self.env['decimal.precision'].precision_get('Product Unit of Measure') # 1) Prepare invoice vals and clean-up the section lines invoice_vals_list = [] for order in self: if order.invoice_status != 'to invoice': continue pending_section = None # Invoice values. invoice_vals = order._prepare_invoice() # Invoice line values (keep only necessary sections). for line in order.order_line: if line.display_type == 'line_section': pending_section = line continue if not float_is_zero(line.qty_to_invoice, precision_digits=precision): if pending_section: invoice_vals['invoice_line_ids'].append((0, 0, pending_section._prepare_account_move_line())) pending_section = None invoice_vals['invoice_line_ids'].append((0, 0, line._prepare_account_move_line())) invoice_vals_list.append(invoice_vals) if not invoice_vals_list: raise UserError(_('There is no invoiceable line. If a product has a control policy based on received quantity, please make sure that a quantity has been received.')) # 2) group by (company_id, partner_id, currency_id) for batch creation new_invoice_vals_list = [] for grouping_keys, invoices in groupby(invoice_vals_list, key=lambda x: (x.get('company_id'), x.get('partner_id'), x.get('currency_id'))): origins = set() payment_refs = set() refs = set() ref_invoice_vals = None for invoice_vals in invoices: if not ref_invoice_vals: ref_invoice_vals = invoice_vals else: ref_invoice_vals['invoice_line_ids'] += invoice_vals['invoice_line_ids'] origins.add(invoice_vals['invoice_origin']) payment_refs.add(invoice_vals['invoice_payment_ref']) refs.add(invoice_vals['ref']) ref_invoice_vals.update({ 'ref': ', '.join(refs)[:2000], 'invoice_origin': ', '.join(origins), 'invoice_payment_ref': len(payment_refs) == 1 and payment_refs.pop() or False, }) new_invoice_vals_list.append(ref_invoice_vals) invoice_vals_list = new_invoice_vals_list # 3) Create invoices. moves = self.env['account.move'] AccountMove = self.env['account.move'].with_context(default_move_type='in_invoice') for vals in invoice_vals_list: moves |= AccountMove.with_company(vals['company_id']).create(vals) # 4) Some moves might actually be refunds: convert them if the total amount is negative # We do this after the moves have been created since we need taxes, etc. to know if the total # is actually negative or not moves.filtered(lambda m: m.currency_id.round(m.amount_total) < 0).action_switch_invoice_into_refund_credit_note() return self.action_view_invoice(moves)
def get_raw(entries, raw_data=False, med_and_avg=False, max_and_min=False): entries = sorted(list(entries), key=lambda x: x['case_name']) grouped = groupby(entries, key=lambda x: x['case_name']) result_list = [] for case_name, group in grouped: json_data = [result for result in group] results = [result['result'] for result in json_data] unit = [result['unit'] for result in json_data][0] if med_and_avg: try: num_results = filter(is_number, results) results = [float(result) for result in num_results] results.sort() median = results[int(len(results) / 2)] if len(results) % 2 else None avg = sum(results) / len(results) if median is None else None result = median if median else avg except ZeroDivisionError: result = 'N/A' avg = 'N/A' median = 'N/A' except TypeError: result = 'N/A' avg = 'N/A' median = 'N/A' except ValueError: result = 'N/A' avg = 'N/A' median = 'N/A' else: median = None avg = None try: if max_and_min: max_val = max(results) min_val = min(results) else: max_val = None min_val = None except ValueError: max_val = None min_val = None vic_dict = { 'full boot': { 'stack_bar': ('report', 'csv', 'boot chart'), }, 'boot_analyze_2.0': { 'stack_bar': ('report', 'csv', 'boot chart'), }, 'full_boot': { 'stack_bar': ('report', 'csv', 'boot chart'), }, 'Acrn_boot_time': { 'stack_bar': ('report', 'csv', 'boot chart'), }, 'instance_90': { 'usage_chart': ('aic_instance', 'json', 'usage trend') }, 'game_90': { 'usage_chart_by_instances': ('aic_game', 'json', 'usage trend') }, 'instance_ins': { 'usage_chart_v2': ('aic_instance', 'json', 'usage trend') } } if raw_data: if case_name in vic_dict: vic = vic_dict[case_name] else: vic = None raw = json_data else: vic = None raw = None rg = ResultGroup(case_name=case_name, unit=unit, maximum=max_val, minimum=min_val, median=median, average=avg, raw=raw, result=result, fluc=None, vic=vic) result_list.append(rg) return result_list
def __init__(self, parameters): self.parameters = {k: list(g) for k, g in itertools.groupby(parameters, key=lambda p: p['in'])}
def autodiscover(path=None, plugin_prefix='intake_', do_package_scan=True): r"""Discover intake drivers. In order of decreasing precedence: - Respect the 'drivers' section of the intake configuration file. - Find 'intake.drivers' entrypoints provided by any Python packages in the environment. - Search all packages in the environment for names that begin with ``intake\_``. Import them and scan them for subclasses of ``intake.source.base.Plugin``. This was previously the *only* mechanism for auto-discoverying intake drivers, and it is maintained for backward compatibility. In a future release, intake will issue a warning if any packages are located by the method that do not also have entrypoints. Parameters ---------- path : str or None Default is ``sys.path``. plugin_prefix : str DEPRECATED. Default is 'intake\_'. do_package_scan : boolean Default is True. In the future, the default will be changed to False, and the option may eventually be removed entirely. Returns ------- drivers : dict Name mapped to driver class. """ # Discover drivers via package scan. if do_package_scan: package_scan_results = _package_scan(path, plugin_prefix) if package_scan_results: warnings.warn( "The option `do_package_scan` may be removed in a future release.", PendingDeprecationWarning) else: package_scan_results = {} # Discover drivers via entrypoints. group = entrypoints.get_group_named('intake.drivers', path=path) group_all = entrypoints.get_group_all('intake.drivers', path=path) if len(group_all) != len(group): # There are some name collisions. Let's go digging for them. for name, matches in itertools.groupby(group_all, lambda ep: ep.name): matches = list(matches) if len(matches) != 1: winner = group[name] logger.debug( "There are %d 'intake.driver' entrypoints for the name " "%r. They are %r. The match %r has won the race.", len(matches), name, matches, winner) for name, entrypoint in group.items(): logger.debug("Discovered entrypoint '%s = %s.%s'", name, entrypoint.module_name, entrypoint.object_name) if name in package_scan_results: cls = package_scan_results[name] del package_scan_results[name] logger.debug("Entrypoint shadowed package_scan result '%s = %s.%s'", name, cls.__module__, cls.__name__) # Discover drivers via config. drivers_conf = conf.get('drivers', {}) logger.debug("Using configuration file at %s", cfile()) for name, dotted_object_name in drivers_conf.items(): if not dotted_object_name: logger.debug('Name %s is banned in config file', name) if name in group: entrypoint = group[name] del group[name] logger.debug("Disabled entrypoint '%s = %s.%s'", entrypoint.name, entrypoint.module_name, entrypoint.object_name) if name in package_scan_results: cls = package_scan_results[name] del package_scan_results[name] logger.debug("Disabled package_scan result '%s = %s.%s'", name, cls.__module__, cls.__name__) continue module_name, object_name = dotted_object_name.rsplit('.', 1) entrypoint = entrypoints.EntryPoint(name, module_name, object_name) logger.debug("Discovered config-specified '%s = %s.%s'", entrypoint.name, entrypoint.module_name, entrypoint.object_name) if name in group: shadowed = group[name] logger.debug("Config shadowed entrypoint '%s = %s.%s'", shadowed.name, shadowed.module_name, shadowed.object_name) if name in package_scan_results: cls = package_scan_results[name] del package_scan_results[name] logger.debug("Config shadowed package scan result '%s = %s.%s'", name, cls.__module__, cls.__name__) group[name] = entrypoint # Discovery is complete. if package_scan_results: warnings.warn( f"The drivers {list(package_scan_results)} do not specify entry_" f"points and were only discovered via a package scan. This may " f"break in a future release of intake. The packages should be " f"updated.", FutureWarning) # Load entrypoints. Any that were shadowed or banned have already been # removed above. drivers = {} for entrypoint in group.values(): try: drivers[entrypoint.name] = _load_entrypoint(entrypoint) except ConfigurationError: logger.exception( "Error while loading entrypoint %s", entrypoint.name) continue logger.debug("Loaded entrypoint '%s = %s.%s'", entrypoint.name, entrypoint.module_name, entrypoint.object_name) # Now include any package scan results. Any that were shadowed or # banned have already been removed above. for name, cls in package_scan_results.items(): drivers[name] = cls logger.debug("Loaded package scan result '%s = %s.%s'", name, cls.__module__, cls.__name__) return drivers
def report( url: str, ) -> typing.Union[context.Context, github.GithubInstallationClient, None]: path = url.replace("https://github.com/", "") pull_number: typing.Optional[str] repo: typing.Optional[str] try: owner, repo, _, pull_number = path.split("/") except ValueError: pull_number = None try: owner, repo = path.split("/") except ValueError: owner = path repo = None try: client = github.get_client(owner) except exceptions.MergifyNotInstalled: print(f"* Mergify is not installed on account {owner}") return None # Do a dumb request just to authenticate client.get("/") if client.auth.installation is None: print("No installation detected") return None print("* INSTALLATION ID: %s" % client.auth.installation["id"]) cached_sub, db_sub = utils.async_run( subscription.Subscription.get_subscription(client.auth.owner_id), subscription.Subscription._retrieve_subscription_from_db( client.auth.owner_id), ) if repo is None: slug = None else: slug = owner + "/" + repo print("* SUBSCRIBED (cache/db): %s / %s" % (cached_sub.active, db_sub.active)) print("* Features (cache):") for f in cached_sub.features: print(f" - {f.value}") report_sub(client.auth.installation["id"], cached_sub, "ENGINE-CACHE", slug) report_sub(client.auth.installation["id"], db_sub, "DASHBOARD", slug) utils.async_run(report_worker_status(client.auth.owner)) if repo is not None: repo_info = client.item(f"/repos/{owner}/{repo}") print( f"* REPOSITORY IS {'PRIVATE' if repo_info['private'] else 'PUBLIC'}" ) print("* CONFIGURATION:") mergify_config = None try: filename, mergify_config_content = rules.get_mergify_config_content( client, repo) except rules.NoRules: # pragma: no cover print(".mergify.yml is missing") else: print(f"Config filename: {filename}") print(mergify_config_content.decode()) try: mergify_config = rules.UserConfigurationSchema( mergify_config_content) except rules.InvalidRules as e: # pragma: no cover print("configuration is invalid %s" % str(e)) else: mergify_config["pull_request_rules"].rules.extend( engine.DEFAULT_PULL_REQUEST_RULES.rules) if pull_number is None: for branch in client.items(f"/repos/{owner}/{repo}/branches"): q = queue.Queue( utils.get_redis_for_cache(), repo_info["owner"]["id"], repo_info["owner"]["login"], repo_info["id"], repo_info["name"], branch["name"], ) pulls = q.get_pulls() if not pulls: continue print(f"* QUEUES {branch['name']}:") for priority, grouped_pulls in itertools.groupby( pulls, key=lambda v: q.get_config(v)["priority"]): try: fancy_priority = merge.PriorityAliases(priority).name except ValueError: fancy_priority = priority formatted_pulls = ", ".join( (f"#{p}" for p in grouped_pulls)) print(f"** {formatted_pulls} (priority: {fancy_priority})") else: pull_raw = client.item( f"/repos/{owner}/{repo}/pulls/{pull_number}") ctxt = context.Context( client, pull_raw, cached_sub, [{ "event_type": "mergify-debugger", "data": {} }], ) # FIXME queues could also be printed if no pull number given q = queue.Queue.from_context(ctxt) print("* QUEUES: %s" % ", ".join([f"#{p}" for p in q.get_pulls()])) print("* PULL REQUEST:") pr_data = dict(ctxt.pull_request.items()) pprint.pprint(pr_data, width=160) print("is_behind: %s" % ctxt.is_behind) print("mergeable_state: %s" % ctxt.pull["mergeable_state"]) print("* MERGIFY LAST CHECKS:") for c in ctxt.pull_engine_check_runs: print("[%s]: %s | %s" % (c["name"], c["conclusion"], c["output"].get("title"))) print("> " + "\n> ".join(c["output"].get("summary").split("\n"))) if mergify_config is not None: print("* MERGIFY LIVE MATCHES:") match = mergify_config[ "pull_request_rules"].get_pull_request_rule(ctxt) summary_title, summary = actions_runner.gen_summary( ctxt, match) print("> %s" % summary_title) print(summary) return ctxt return client