Exemplo n.º 1
0
class AssignProductivityRank(Scan):
    mean_prod_var: VariableId = VariableValidator(data_type=Decimal,
                                                  temporal=-1)
    prod_rank_var: VariableId = VariableValidator(data_type=Integer,
                                                  temporal=-1)

    def __post_init__(self):
        self.ranked: Dict[str, int] = {}

    def extract(self, composite: Composite) -> float:
        mean_prod = composite.get_immutable(self.mean_prod_var)
        return mean_prod

    def analyze(self, extracts: Iterable[Tuple[str, Any]]) -> None:
        # Initialize data structure for analyzed data
        mean_prod_dict: Dict[str, float] = {}
        for composite_id, mean_prod in extracts:
            mean_prod_dict[composite_id] = mean_prod

        cities_ranked = list(
            sorted(mean_prod_dict.keys(),
                   key=lambda k: -1 * mean_prod_dict[k]))
        for k, city in enumerate(cities_ranked):
            self.ranked[city] = k + 1

    def alter(self, composite_id: str, composite: Composite) -> None:
        rank = self.ranked[composite_id]
        composite.put_immutable(self.prod_rank_var, rank)
Exemplo n.º 2
0
class AssignAverageBMIRank(Scan):
    mean_bmi_var: str = VariableValidator(data_type=Decimal, temporal=-1)
    bmi_rank_var: str = VariableValidator(data_type=Integer, temporal=-1)

    def __post_init__(self):
        self.ranked: Dict[str, int] = {}

    def extract(self, composite: Composite) -> float:
        mean_bmi = composite.get_immutable(self.mean_bmi_var)
        return mean_bmi

    def analyze(self, extracts: Iterable[Tuple[str, Any]]) -> None:
        # Initialize data structure for analyzed data
        mean_bmi_dict: Dict[str, float] = {}

        for composite_id, mean_bmi in extracts:
            mean_bmi_dict[composite_id] = mean_bmi

        people_ranked = list(
            sorted(mean_bmi_dict.keys(), key=lambda k: -1 * mean_bmi_dict[k]))
        for k, person in enumerate(people_ranked):
            self.ranked[person] = k + 1

    def alter(self, composite_id: str, composite: Composite) -> None:
        rank = self.ranked[composite_id]
        composite.put_immutable(self.bmi_rank_var, rank)
Exemplo n.º 3
0
class CalculateWeightGain(Change):
    """Determine the total weight gain over the observation period."""
    weight_var: str = VariableValidator(data_type=Decimal)
    weight_gain_var: str = VariableValidator(data_type=Decimal)

    def __call__(self, composite: Composite):
        logging.debug("Beginning CalculateWeightGain")
        periods = list(composite.periods)
        logging.debug("Observed the following periods: %s" %
                      ", ".join(periods))
        earliest = min(periods)
        latest = max(periods)

        earliest_weight = composite.get_observation(self.weight_var, earliest)
        logging.debug("Earliest weight: %0.2f" % earliest_weight)

        latest_weight = composite.get_observation(self.weight_var, latest)
        logging.debug("Latest weight: %0.2f" % latest_weight)

        # I know, should have called it "weight change."
        weight_gain = round(latest_weight - earliest_weight, 2)
        logging.debug("Weight gain: %0.2f" % weight_gain)

        composite.put_immutable(self.weight_gain_var, weight_gain)
        logging.debug("Finished CalculateWeightGain.")
Exemplo n.º 4
0
class AssignMeanBMI(Change):
    annual_bmi_var: VariableId = VariableValidator(data_type=Decimal, temporal=1)
    mean_bmi_var: VariableId = VariableValidator(data_type=Decimal, temporal=-1)

    def __call__(self, composite: Composite):
        bmis = [bmi for period, bmi in composite.get_all_observations(self.annual_bmi_var)]
        mean_bmi = numpy.average(bmis)
        composite.put_immutable(self.mean_bmi_var, mean_bmi)
Exemplo n.º 5
0
class CalculateMeanProductivity(Change):
    annual_prod_var: str = VariableValidator(data_type=Decimal, temporal=1)
    mean_prod_var: str = VariableValidator(data_type=Decimal, temporal=-1)

    def __call__(self, composite: Composite):
        periods: List[str] = list(composite.periods)
        annual_prods = [composite.get_observation(self.annual_prod_var, period) for period in periods]
        mean_prod = numpy.average(annual_prods)
        composite.put_immutable(self.mean_prod_var, mean_prod)
Exemplo n.º 6
0
class AssignAnnualBMI(Change):
    annual_weight_var: VariableId = VariableValidator(data_type=Decimal, temporal=1)
    height_var: VariableId = VariableValidator(data_type=[Decimal, Integer], temporal=-1)
    annual_bmi_var: VariableId = VariableValidator(data_type=Decimal, temporal=1)

    def __call__(self, composite: Composite):
        h_squared = composite.get_immutable(self.height_var) ** 2
        for period, weight in composite.get_all_observations(self.annual_weight_var):
            bmi = weight / h_squared * 703
            composite.put_observation(self.annual_bmi_var, period, bmi)
Exemplo n.º 7
0
class ColorNameToRGB(Change):
    """Look up the RGB value for the color name specified by color_name_var, and store it in rgb_var."""
    color_name_var: str = VariableValidator(data_type=Text)
    rgb_var: str = VariableValidator(data_type=Text)

    def __call__(self, composite: Composite):
        # IRL, you'd have to handle nulls, decide how to deal with temporal variables, etc.
        color_name = composite.get_immutable(self.color_name_var)

        rgb_value = self.lookups["color_names"][color_name]
        composite.put_immutable(self.rgb_var, rgb_value)
Exemplo n.º 8
0
class AssignAverageBMIRank(Scan):
    male_flag: VariableId = VariableValidator(data_type=Binary, temporal=-1)
    mean_bmi_var: VariableId = VariableValidator(data_type=Decimal,
                                                 temporal=-1)
    bmi_rank_gender_var: VariableId = VariableValidator(data_type=Integer,
                                                        temporal=-1)
    bmi_rank_overall_var: VariableId = VariableValidator(data_type=Integer,
                                                         temporal=-1)

    def __post_init__(self):
        self.ranked: Dict[str, Dict[str, int]] = {}

    def extract(self, composite: Composite) -> Tuple[bool, float]:
        mean_bmi = composite.get_immutable(self.mean_bmi_var)
        is_male = composite.get_immutable(self.male_flag)
        return is_male, mean_bmi

    def analyze(self, extracts: Iterable[Tuple[str, Any]]) -> None:
        # Initialize data structure for analyzed data
        mean_bmi_dict: Dict[str, Dict[str, float]] = {}
        genders = ["male", "female", "overall"]
        for gender in genders:
            mean_bmi_dict[gender] = {}
            self.ranked[gender] = {}

        for composite_id, (is_male, mean_bmi) in extracts:
            if is_male:
                gender: str = "male"
            else:
                gender = "female"
            mean_bmi_dict[gender][composite_id] = mean_bmi
            mean_bmi_dict["overall"][composite_id] = mean_bmi

        for gender in genders:
            people_ranked = list(
                sorted(mean_bmi_dict[gender].keys(),
                       key=lambda k: -1 * mean_bmi_dict[gender][k]))
            for k, person in enumerate(people_ranked):
                self.ranked[gender][person] = k + 1

    def _get_gender(self, composite_id: str) -> str:
        if composite_id in self.ranked["male"]:
            return "male"
        return "female"

    def alter(self, composite_id: str, composite: Composite) -> None:
        overall_rank = self.ranked["overall"][composite_id]
        composite.put_immutable(self.bmi_rank_overall_var, overall_rank)

        gender: str = self._get_gender(composite_id)
        gender_rank = self.ranked[gender][composite_id]
        composite.put_immutable(self.bmi_rank_gender_var, gender_rank)
Exemplo n.º 9
0
class AssignCityState(Change):
    zip_var: VariableId = VariableValidator(data_type=Text, temporal=-1)
    city_var: VariableId = VariableValidator(data_type=Text, temporal=-1)
    state_var: VariableId = VariableValidator(data_type=Text, temporal=-1)

    def __call__(self, composite: Composite):
        zip_code: str = composite.get_immutable(self.zip_var)

        city = self.lookups["zipcodes"][zip_code]["City"]
        composite.put_immutable(self.city_var, city)

        state = self.lookups["zipcodes"][zip_code]["State"]
        composite.put_immutable(self.state_var, state)
Exemplo n.º 10
0
class AssignRegressionStats(Change):
    annual_weight_var: str = VariableValidator(data_type=Decimal, temporal=1)
    weight_slope_var: str = VariableValidator(data_type=Decimal, temporal=-1)
    weight_pval_var: str = VariableValidator(data_type=Decimal, temporal=-1)

    def __call__(self, composite: Composite):
        years = sorted([int(year) for year in composite.periods])
        weights = [composite.get_observation(self.annual_weight_var, str(year)) for year in years]
        slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
            np.asarray(years), np.asarray(weights)
        )
        composite.put_immutable(self.weight_slope_var, slope)
        composite.put_immutable(self.weight_pval_var, p_value)
Exemplo n.º 11
0
class GeneratePersonDescription(Change):
    color_name_var: str = VariableValidator(data_type=Text)
    rgb_var: str = VariableValidator(data_type=Text)
    person_name_var: str = VariableValidator(data_type=Text)
    gender_var: str = VariableValidator(data_type=Text)
    weight_gain_var: str = VariableValidator(data_type=Decimal)
    sentence_var: str = VariableValidator(data_type=Text)

    def get_pronoun(self, composite: Composite):
        gender: str = composite.get_immutable(self.gender_var)

        pronoun_mapping = {
            "male": "he",
            "female": "she"
        }

        return pronoun_mapping[gender]

    def __call__(self, composite: Composite):
        template = "%s's favorite color is %s (%s). Over the observation period, %s gained %0.1f lbs."

        name: str = composite.get_immutable(self.person_name_var)
        color: str = composite.get_immutable(self.color_name_var)
        rgb: str = composite.get_immutable(self.rgb_var)
        weight_gain: float = composite.get_immutable(self.weight_gain_var)

        pronoun = self.get_pronoun(composite)

        sentence = template % (name, color, rgb, pronoun, weight_gain)
        composite.put_immutable(self.sentence_var, sentence)
Exemplo n.º 12
0
class EconomicOverview(Aggregate):
    n_employee_var: str = VariableValidator(data_type=Integer, temporal=1)
    revenue_var: str = VariableValidator(data_type=Decimal, temporal=1)
    source_zip_var: str = VariableValidator(data_type=Text, temporal=-1)
    source_city_var: str = VariableValidator(data_type=Text, temporal=-1)
    source_state_var: str = VariableValidator(data_type=Text, temporal=-1)

    # Output schema variables
    n_company_var: str = VariableValidator(data_type=Integer, temporal=1)
    mean_employee_var: str = VariableValidator(data_type=Decimal, temporal=1)
    annual_prod_var: str = VariableValidator(data_type=Decimal, temporal=1)
    target_zip_var: str = VariableValidator(data_type=Text, temporal=-1)
    target_city_var: str = VariableValidator(data_type=Text, temporal=-1)
    target_state_var: str = VariableValidator(data_type=Text, temporal=-1)

    def __post_init__(self):
        # Internal variable used for reduce/analyze step
        self.city_data: Dict[str, Dict] = {}

    def extract(self, composite: Dict) -> Optional[Any]:
        """In this case, the source document is very small and the analysis uses every variable in it, so we just return
        the composite. In real-world cases, only a few variables are likely to be used."""
        return composite

    def analyze(self, extracts: Iterable[Tuple[str, Composite]]) -> None:
        for company_id, company in extracts:
            zip_code = company.get_immutable(self.source_zip_var)
            city = company.get_immutable(self.source_city_var)
            state = company.get_immutable(self.source_state_var)

            # Create a transient composite for the city. It will be processed into its final form in emit().
            if zip_code not in self.city_data:
                self.city_data[zip_code] = {
                    'immutable': {
                        "zip": zip_code,
                        "city": city,
                        "state": state
                    }
                }

            for period in company.periods:
                if period not in self.city_data[zip_code]:
                    self.city_data[zip_code][period] = {
                        "n_companies": 0,
                        "tot_employees": 0,
                        "tot_revenue": 0.0
                    }

                p_dict = self.city_data[zip_code][period]
                p_dict["n_companies"] += 1
                p_dict["tot_employees"] += company.get_observation(
                    self.n_employee_var, period)
                p_dict["tot_revenue"] += company.get_observation(
                    self.revenue_var, period)

    def emit(self) -> Iterator[Tuple[str, Composite]]:
        for zip_code, transient in self.city_data.items():
            city: Composite = Composite(self.target_schema, {})

            transient_periods = set(transient.keys()) - {"immutable"}
            for period in sorted(transient_periods):
                n_companies = transient[period]["n_companies"]
                tot_employees = transient[period]["tot_employees"]
                tot_revenue = transient[period]["tot_revenue"]

                mean_employees = tot_employees / n_companies
                productivity = tot_revenue / tot_employees

                city.put_observation(period, self.n_company_var, n_companies)
                city.put_observation(period, self.annual_prod_var,
                                     productivity)
                city.put_observation(period, self.mean_employee_var,
                                     mean_employees)

            city.put_immutable(self.target_zip_var,
                               transient["immutable"]["zip"])
            city.put_immutable(self.target_city_var,
                               transient["immutable"]["city"])
            city.put_immutable(self.target_state_var,
                               transient["immutable"]["state"])

            yield zip_code, city
Exemplo n.º 13
0
class RetainOnlyFemales(Filter):
    male_flag: VariableId = VariableValidator(data_type=Binary)

    def passes(self, composite: Composite) -> bool:
        male: bool = composite.get_immutable(self.male_flag)
        return not male