Пример #1
0
 def names(self):
     classes = ', '.join(
         imap(str, sorted(set(d_itervalues(self.code_to_cls))))
         )
     attributes = '\n'.join('{0}:\tTrue, False.'.format(str(i))
                            for i in self.indicators)
     s = NAMES_TPL.format(classes, attributes)
     return s
Пример #2
0
 def _iter_rows(self):
     for site, tstamp, code in d_itervalues(self.ids_to_samples):
         row = [site, self.code_to_cls[code], ]
         for indicator in self.indicators:
             try:
                 feature = indicator.data[site]
             except KeyError:
                 break
             row.append(feature)
         else:
             yield row
Пример #3
0
    def run(self):
        tree = AssociationTree(split_domain)
        for sc in imap(SITE_COUNT_PARSER, self.args.site_count):
            tree.grow(sc, domain(sc.site))

        companies = imap(COMPANY_PARSER, self.args.companies)
        s2c = tree.map(companies, lambda c: domain(c.hp))
        cnt = count_by_key(s
                           for l in d_itervalues(s2c) 
                           for s in l)
        for company, sites in d_iteritems(s2c):
            for site in sites:
                if cnt[site] == 1:
                    self.out('\t'.join([company.permalink, site.site]))
Пример #4
0
def relation_stats(iterable):
    counter = count_by_key(iterable)
    return (sum(d_itervalues(counter)),
            len(counter), 
            sum(v for v in d_itervalues(counter) if v > 1), 
            sum(1 for v in d_itervalues(counter) if v > 1))