def main(): define("console", default=False, type=bool) define("mongo_host", default='localhost') parse_command_line() basicConfig(options=options) country_db = CountryDB(options.mongo_host) company_db = CompanyDB(options.mongo_host) stats_db = StatsDB(options.mongo_host) #companies = company_db.find({}, fields={'offices': 1}, count=None) ##for company in adv_enumerate(companies): ## for office in company['offices']: ## country_db.increment(office['country_code']) ##print company_db #_create_stats(stats_db, company_db) _create_csv(stats_db, company_db) return -1 categories = defaultdict(int) year_month = defaultdict(int) year = defaultdict(int) for stats in adv_enumerate(stats_db.find()): categories[get_dotted(stats, 'data.category_code')] += 1 founded_at = get_dotted(stats, 'data.founded_at') if not founded_at: continue #if founded_at.year < 1995: # print stats year_month[(founded_at.year, founded_at.month)] += 1 year[founded_at.year] += 1 print sum(categories.values()) print year print sorted(year_month.items())
def _create_csv(stats_db, company_db): stats_cursor = stats_db.find({}, None) print 'name,num_countries,funding,funding_capped,days_to_funding,num_rounds,employees,year,category,country' for stats in adv_enumerate(stats_cursor, frequency=1000): name = stats.get('_id').replace(' ', '_') stats = stats['data'] countries = stats.get('countries') country = countries[0] if len(countries) else None country = country if country in COUNTRIES else 'other' num_rounds = len(stats.get('funding_rounds')) founded_at = stats.get('founded_at') days_to_funding = 0 funding_rounds = stats.get('funding_rounds') if funding_rounds and funding_rounds[0]['funded_at'] and founded_at.year > 1995: days_to_funding = (funding_rounds[0]['funded_at'] - founded_at).days funding_capped = min(float(stats.get('total_money_raised')) / 10**6, 100) funding = float(stats.get('total_money_raised')) / 10**6 employees = stats.get('number_of_employees') year = founded_at.year category = stats.get('category_code') try: print ','.join(map(lambda k: str(k), [name, len(countries), funding, funding_capped, days_to_funding, num_rounds, employees, year, category, country])) except Exception, e: pass
def _create_stats(stats_db, company_db): companies = company_db.find({},{}, count=None) for company in adv_enumerate(companies, frequency=1000): stats = {} name = company.get('name') countries = [] for office in company['offices']: countries.append(office['country_code']) stats['countries'] = countries for fn in STATS: stat_name, val = fn(company) stats[stat_name] = val stats_db.save(name, stats)