示例#1
0
    def analyze(self, extracts: Iterable[Tuple[str, Any]]) -> None:
        for company_id, company in extracts:
            zip_code = composites.get_property(company, self.source_zip_var)
            city = composites.get_property(company, self.source_city_var)
            state = composites.get_property(company, self.source_state_var)

            # Create a transient composite for the city. It will be processed into its final form in emit().
            if zip_code not in self.city_data:
                self.city_data[zip_code] = {
                    'invariant': {
                        "zip": zip_code,
                        "city": city,
                        "state": state
                    }
                }

            for period in composites.get_periods(company):
                if period not in self.city_data[zip_code]:
                    self.city_data[zip_code][period] = {
                        "n_companies": 0,
                        "tot_employees": 0,
                        "tot_revenue": 0.0
                    }

                p_dict = self.city_data[zip_code][period]
                p_dict["n_companies"] += 1
                p_dict["tot_employees"] += composites.get_observation(
                    company, period, self.n_employee_var)
                p_dict["tot_revenue"] += composites.get_observation(
                    company, period, self.revenue_var)
示例#2
0
 def __call__(self, composite: Dict):
     periods: List[str] = list(composites.get_periods(composite))
     annual_prods = [
         composites.get_observation(composite, period, self.annual_prod_var)
         for period in periods
     ]
     mean_prod = numpy.average(annual_prods)
     composites.put_property(composite, self.mean_prod_var, mean_prod)
示例#3
0
 def __call__(self, composite: Dict):
     years = sorted(
         [int(year) for year in composites.get_periods(composite)])
     weights = [
         composites.get_observation(composite, str(year),
                                    self.annual_weight_var)
         for year in years
     ]
     slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
         np.asarray(years), np.asarray(weights))
     composites.put_property(composite, self.weight_slope_var, slope)
     composites.put_property(composite, self.weight_pval_var, p_value)
示例#4
0
 def get_rows(self, composite_id, composite):
     if self.invariant:
         data = {
             name: get_property(composite, var)
             for name, var in self.column_vars.items()
         }
         data['composite_id'] = composite_id
         yield [data]
     else:
         for period, value in composite.items():
             if period.isdigit():
                 row = {}
                 row['composite_id'] = composite_id
                 row['period'] = period
                 for name, var in self.column_vars.items():
                     row[name] = get_observation(composite, period, var,
                                                 True)
                 yield row