def aggregate_preprocess_results(codes, dict_edits, dict_newcomers, dict_reverts): # df_topics, topics = load_topics(path_topics) aggs = [] for code in codes: start = time.time() try: df_gb = process_edits(dict_edits, code) # group edits df_gb.rename({"title": "index", 'event_user_id': 'count', 'revision_text_bytes_diff': 'rev_len_sum'}, inplace=True, axis=1) final = df_gb.groupby(["date", "covid", "user_kind"]).sum().reset_index() final = process_newcomers(dict_newcomers, code, final) final = process_reverts(dict_reverts, code, final) final = final.fillna(0) final["code"] = code aggs.append(final.loc[:, final.columns != 'index']) except Exception as e: traceback.print_exc() Logger.instance('pipeline').info(f'Error for {code}: {str(e)}') Logger.instance('pipeline').info(f'Processing {code} took {time.time() - start}') final_aggs = pd.concat(aggs) return final_aggs
import json import namedtupled import os from helpers.logger import Logger logger = Logger.instance() CONFIGURATION_FILENAME = 'conf.json' def filename_to_named_tuple(filename): with open(filename) as data_file: c_ = json.load(data_file) # pprint(c_) return namedtupled.map(c_) def load_constants(): c_ = None try: c_ = filename_to_named_tuple(CONFIGURATION_FILENAME) except FileNotFoundError as e: try: c_ = filename_to_named_tuple( os.path.join('..', CONFIGURATION_FILENAME)) except FileNotFoundError as e: try: c_ = filename_to_named_tuple( os.path.join('..', '..', CONFIGURATION_FILENAME)) except: