def names(self): classes = ', '.join( imap(str, sorted(set(d_itervalues(self.code_to_cls)))) ) attributes = '\n'.join('{0}:\tTrue, False.'.format(str(i)) for i in self.indicators) s = NAMES_TPL.format(classes, attributes) return s
def _iter_rows(self): for site, tstamp, code in d_itervalues(self.ids_to_samples): row = [site, self.code_to_cls[code], ] for indicator in self.indicators: try: feature = indicator.data[site] except KeyError: break row.append(feature) else: yield row
def run(self): tree = AssociationTree(split_domain) for sc in imap(SITE_COUNT_PARSER, self.args.site_count): tree.grow(sc, domain(sc.site)) companies = imap(COMPANY_PARSER, self.args.companies) s2c = tree.map(companies, lambda c: domain(c.hp)) cnt = count_by_key(s for l in d_itervalues(s2c) for s in l) for company, sites in d_iteritems(s2c): for site in sites: if cnt[site] == 1: self.out('\t'.join([company.permalink, site.site]))
def relation_stats(iterable): counter = count_by_key(iterable) return (sum(d_itervalues(counter)), len(counter), sum(v for v in d_itervalues(counter) if v > 1), sum(1 for v in d_itervalues(counter) if v > 1))