def main_process_data( paths, gsheets_api, google_spreadsheet_vax_id: str, skip_complete: list = None, skip_monotonic: dict = {}, skip_anomaly: dict = {}, ): print("-- Processing data... --") # Get data from sheets logger.info("Getting data from Google Spreadsheet...") gsheet = VaccinationGSheet(gsheets_api, google_spreadsheet_vax_id) df_manual_list = gsheet.df_list() # Get automated-country data logger.info("Getting data from output...") automated = gsheet.automated_countries filepaths_auto = [paths.tmp_vax_out(country) for country in automated] df_auto_list = [read_csv(filepath) for filepath in filepaths_auto] # Concatenate vax = df_manual_list + df_auto_list # Check that no location is present in both manual and automated data manual_locations = set([df.location[0] for df in df_manual_list]) auto_locations = os.listdir( os.path.join(paths.tmp_vax_out_dir, "main_data")) auto_locations = set([loc.replace(".csv", "") for loc in auto_locations]) common_locations = auto_locations.intersection(manual_locations) if len(common_locations) > 0: raise DataError( f"The following locations have data in both output/main_data and GSheet: {common_locations}" ) # vax = [v for v in vax if v.location.iloc[0] == "Pakistan"] # DEBUG # Process locations def _process_location(df): monotonic_check_skip = skip_monotonic.get(df.loc[0, "location"], []) anomaly_check_skip = skip_anomaly.get(df.loc[0, "location"], []) return process_location(df, monotonic_check_skip, anomaly_check_skip) logger.info("Processing and exporting data...") vax_valid = [] for df in vax: if "location" not in df: raise ValueError(f"Column `location` missing. df: {df.tail(5)}") country = df.loc[0, "location"] if country.lower() not in skip_complete: df = _process_location(df) vax_valid.append(df) # Export df.to_csv(paths.pub_vax_loc(country), index=False) logger.info(f"{country}: SUCCESS ✅") else: logger.info(f"{country}: SKIPPED 🚧") df = pd.concat(vax_valid).sort_values(by=["location", "date"]) df.to_csv(paths.tmp_vax_all, index=False) gsheet.metadata.to_csv(paths.tmp_met_all, index=False) logger.info("Exported ✅") print_eoe()
def check_symmetry_hcc(self): """Checks the toplevel symmetry of the pairwise harmony matrix.""" M = self.Hcc.reshape((self.nF * self.nR, -1)) if not is_symmetric(M): raise DataError( "The Hcc Matrix should be symmetric at the top level!")
def __init__(self, df: DataFrame) -> None: if df.shape[0] > 0: self.df = df else: raise DataError('Dataframe has to have some data in it') self.appointment_list = [] logger.debug(': about: Quote Service started') quote_service = QuoteService() self.quote_service_quote = quote_service.__get_random_quote()
def _apply(self, func, **kwargs): """ Rolling statistical measure using supplied function. Designed to be used with passed-in Cython array-based functions. Parameters ---------- func : str/callable to apply Returns ------- y : same type as input argument """ blocks, obj = self._create_blocks() block_list = list(blocks) results = [] exclude = [] for i, b in enumerate(blocks): try: values = self._prep_values(b.values) except (TypeError, NotImplementedError) as err: if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) del block_list[i] continue else: raise DataError("No numeric types to aggregate") from err if values.size == 0: results.append(values.copy()) continue # if we have a string function name, wrap it if isinstance(func, str): cfunc = getattr(window_aggregations, func, None) if cfunc is None: raise ValueError( f"we do not support this function in window_aggregations.{func}" ) def func(arg): return cfunc( arg, self.com, int(self.adjust), int(self.ignore_na), int(self.min_periods), ) results.append(np.apply_along_axis(func, self.axis, values)) return self._wrap_results(results, block_list, obj, exclude)
def _apply(self, func): """ Rolling statistical measure using supplied function. Designed to be used with passed-in Cython array-based functions. Parameters ---------- func : str/callable to apply Returns ------- y : same type as input argument """ blocks, obj = self._create_blocks(self._selected_obj) block_list = list(blocks) results = [] exclude = [] for i, b in enumerate(blocks): try: values = self._prep_values(b.values) except (TypeError, NotImplementedError) as err: if isinstance(obj, ABCDataFrame): exclude.extend(b.columns) del block_list[i] continue else: raise DataError("No numeric types to aggregate") from err if values.size == 0: results.append(values.copy()) continue results.append(np.apply_along_axis(func, self.axis, values)) return self._wrap_results(results, block_list, obj, exclude)
# ability_list = [None]*30 # for i, ability in enumerate(fighter['Abilities']): # if ability: # ability_list[i] = ability for ability in ability_list: ability_data.append(ability) ability_lens.append(len(ability_list)) # ability_data.append(ability_list) # print(skill_match_data) # print(ability_match_data) # input() if len(skill_match_data) != 113: raise DataError('len(skill_match_data) not 113, instead', len(skill_match_data)) skill_data.append(skill_match_data) winner_data.append(match['winner']) skill_data = np.array(skill_data) print(skill_data.shape) print('start building ability array') ability_data = pd.DataFrame(ability_data) # print(ability_data[0]) col_len = 14 col_transformer = ColumnTransformer([ ('Passthrough', 'passthrough', [0] + [(n * col_len) + x for n in range(8) for x in (1, 3, 4)]), ('Sign', OrdinalEncoder(), [(n * col_len) + 2 for n in range(8)]),