示例#1
0
    def run(self):
        tree = AssociationTree(split_domain)
        for sc in imap(SITE_COUNT_PARSER, self.args.site_count):
            tree.grow(sc, domain(sc.site))

        companies = imap(COMPANY_PARSER, self.args.companies)
        s2c = tree.map(companies, lambda c: domain(c.hp))
        cnt = count_by_key(s
                           for l in d_itervalues(s2c) 
                           for s in l)
        for company, sites in d_iteritems(s2c):
            for site in sites:
                if cnt[site] == 1:
                    self.out('\t'.join([company.permalink, site.site]))
示例#2
0
 def produce_features_weka(self):
     root, _ = os.path.splitext(self.args.config)
     arff_f = '.'.join([root, 'arff'])
     relation = os.path.basename(root)
     names = [ 'site', 'code' ]
     names.extend(imap(str, self.indicators))
     arff.dump(arff_f, self._iter_rows(), relation=root, names=names)
示例#3
0
 def names(self):
     classes = ', '.join(
         imap(str, sorted(set(d_itervalues(self.code_to_cls))))
         )
     attributes = '\n'.join('{0}:\tTrue, False.'.format(str(i))
                            for i in self.indicators)
     s = NAMES_TPL.format(classes, attributes)
     return s
示例#4
0
文件: plots.py 项目: fuzzy-id/midas
def iter_sites_w_company(directory_or_file):
    contents = iter_files_content(directory_or_file)
    for swc in imap(SITES_W_COMPANY_PARSER, contents):
        ranks = map(operator.attrgetter('rank'), swc.ranking)
        index = pandas.DatetimeIndex(map(operator.attrgetter('tstamp'),
                                       swc.ranking))
        ts = pandas.Series(ranks, index=index)
        tstamp = pandas.Timestamp(swc.tstamp)
        yield (swc.site, ts, swc.company, swc.code, tstamp)
示例#5
0
 def __call__(self, parser, namespace, value, option_string=None):
     if os.path.isdir(value):
         files = []
         make_abs = functools.partial(os.path.join, value)
         for path in imap(make_abs, os.listdir(value)):
             if os.path.isfile(path):
                 files.append(path)
     else:
         files = [value, ]
     setattr(namespace, self.dest, fileinput.input(files))
示例#6
0
 def ids_to_samples(self):
     if self.args.samples:
         directory = self.args.samples
     else:
         directory = self.config['samples']
     if os.path.isfile(directory):
         files = [directory, ]
     else:
         files = []
         make_abs = functools.partial(os.path.join, directory)
         for path in imap(make_abs, os.listdir(directory)):
             if os.path.isfile(path):
                 files.append(path)
     samples = dict()
     for f in files:
         for site, tstamp, code in csv_file_reader(f, delimiter='\t'):
             tstamp = parse_tstamp(tstamp)
             site_id = self.sites_to_ids[site]
             samples[site_id] = (site, tstamp, code)
     return samples
示例#7
0
    def generate_missing_indicators(self):
        to_produce_q = Queue()
        self.out('Generating the following indicators:')
        for i in self.indicators:
            if not i.produced:
                to_produce_q.put(i)
                self.out(str(i))
        if not self.query_user_permission('Proceed?'):
            raise SystemExit('Canceled due to user interaction')

        threads = []
        for _ in range(min(self.num_threads, to_produce_q.qsize())):
            t = IndicatorUpdater(self.ids_to_samples, 
                                 to_produce_q,
                                 StreamAlexaIndicatorsCaller(self.cmd_path))
            t.start()
            threads.append(t)
        for t in threads:
            t.join()
        if any(imap(operator.attrgetter('failed'), threads)):
            raise Exception('At least one thread died!')
        to_produce_q.join()
示例#8
0
文件: plots.py 项目: fuzzy-id/midas
def make_fr_per_date_plot(companies, plot_file=None):
    contents = iter_files_content(companies)
    d = collections.defaultdict(list)
    min_date = datetime.date(2011, 3, 1)
    months = set()
    for c in imap(FLATTENED_PARSER, contents):
        if c.tstamp >= min_date:
            d[c.code].append(matplotlib.dates.date2num(c.tstamp))
            months.add(datetime.date(c.tstamp.year, c.tstamp.month, 1))
    
    months = sorted(months)

    right_border = months[-1] + datetime.timedelta(31)
    right_border = datetime.date(right_border.year, right_border.month, 1)
    months.append(right_border)

    fig = plt.figure(figsize=(4*1.4, 3*1.4))
    ax = fig.add_subplot(111)
    ax.hist(d.values(), label=map(str.title, d.keys()),
            bins=matplotlib.dates.date2num(months))
    ax.set_xlim(matplotlib.dates.date2num(months[0]),
                matplotlib.dates.date2num(months[-1]))
    ax.legend()
    ax.xaxis.set_major_locator(
        matplotlib.dates.MonthLocator(bymonthday=15, interval=2)
        )
    ax.xaxis.set_major_formatter(
        matplotlib.ticker.FuncFormatter(
            lambda d, _: matplotlib.dates.num2date(d).strftime('%B %Y')
            )
        )
    fig.autofmt_xdate()
    ax.set_ylabel('Number of Funding Rounds')
    ax.grid(True, axis='y')
    if plot_file:
        fig.savefig(plot_file)
    return fig