def main(user, google_cloud_credentials_file_path, pipeline_configuration_file_path, raw_data_dir): # Read the settings from the configuration file log.info("Loading Pipeline Configuration File...") with open(pipeline_configuration_file_path) as f: pipeline_configuration = PipelineConfiguration.from_configuration_file(f) Logger.set_project_name(pipeline_configuration.pipeline_name) log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}") log.info("Downloading Firestore UUID Table credentials...") firestore_uuid_table_credentials = json.loads(google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration.phone_number_uuid_table.firebase_credentials_file_url )) phone_number_uuid_table = FirestoreUuidTable( pipeline_configuration.phone_number_uuid_table.table_name, firestore_uuid_table_credentials, "avf-phone-uuid-" ) log.info("Initialised the Firestore UUID table") log.info(f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources...") for i, raw_data_source in enumerate(pipeline_configuration.raw_data_sources): log.info(f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}...") if isinstance(raw_data_source, RapidProSource): fetch_from_rapid_pro(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table, raw_data_source) elif isinstance(raw_data_source, GCloudBucketSource): fetch_from_gcloud_bucket(google_cloud_credentials_file_path, raw_data_dir, raw_data_source) elif isinstance(raw_data_source, RecoveryCSVSource): fetch_from_recovery_csv(user, google_cloud_credentials_file_path, raw_data_dir, phone_number_uuid_table, raw_data_source) else: assert False, f"Unknown raw_data_source type {type(raw_data_source)}"
import geopandas import matplotlib.pyplot as plt import plotly.express as px from core_data_modules.cleaners import Codes from core_data_modules.data_models.code_scheme import CodeTypes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from src import AnalysisUtils from configuration.code_schemes import CodeSchemes from src.lib.configuration_objects import CodingModes from src.mapping_utils import MappingUtils from src.lib.pipeline_configuration import PipelineConfiguration log = Logger(__name__) IMG_SCALE_FACTOR = 10 # Increase this to increase the resolution of the outputted PNGs CONSENT_WITHDRAWN_KEY = "consent_withdrawn" if __name__ == "__main__": parser = argparse.ArgumentParser( description="Runs automated analysis over the outputs produced by " "`generate_outputs.py`, and optionally uploads the outputs to Drive.") parser.add_argument("user", help="User launching this program") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file") parser.add_argument(
import geopandas import matplotlib.pyplot as plt from core_data_modules.cleaners import Codes from core_data_modules.data_models.code_scheme import CodeTypes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from src import AnalysisUtils from configuration.code_schemes import CodeSchemes from src.lib.configuration_objects import CodingModes from src.mapping_utils import MappingUtils from src.lib.pipeline_configuration import PipelineConfiguration log = Logger(__name__) IMG_SCALE_FACTOR = 10 # Increase this to increase the resolution of the outputted PNGs CONSENT_WITHDRAWN_KEY = "consent_withdrawn" if __name__ == "__main__": parser = argparse.ArgumentParser( description="Runs automated analysis over the outputs produced by " "`generate_outputs.py`, and optionally uploads the outputs to Drive.") parser.add_argument("user", help="User launching this program") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file") parser.add_argument(
import argparse import csv import json from core_data_modules.cleaners import Codes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from id_infrastructure.firestore_uuid_table import FirestoreUuidTable from storage.google_cloud import google_cloud_utils from src.lib import PipelineConfiguration log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description= "Generates lists of phone numbers to advertise to using project " "traced data and KK exclusion lists") parser.add_argument( "--exclusion-list-file-path", nargs="?", help="List of phone numbers to exclude from the ad group") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path",
import argparse import json import os from core_data_modules.logging import Logger from storage.google_cloud import google_cloud_utils from storage.google_drive import drive_client_wrapper from src.lib import PipelineConfiguration log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Uploads output files") parser.add_argument("user", help="User launching this program") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file-path", help="Path to the pipeline configuration json file") parser.add_argument("run_id", metavar="run-id", help="Identifier of this pipeline run") parser.add_argument( "production_csv_input_path", metavar="production-csv-input-path",
import argparse import json import os from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from id_infrastructure.firestore_uuid_table import FirestoreUuidTable from storage.google_cloud import google_cloud_utils from storage.google_drive import drive_client_wrapper from src import AutoCodeShowMessages, AutoCodeSurveys, CombineRawDatasets, \ ProductionFile, TranslateRapidProKeys, AnalysisFile, ApplyManualCodes from src.lib import PipelineConfiguration Logger.set_project_name("LQ") log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Runs the post-fetch phase of the ReDSS pipeline", # Support \n and long lines formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("user", help="User launching this program") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket")
import json import argparse from dateutil.parser import isoparse from temba_client.v2 import Message from core_data_modules.cleaners import PhoneCleaner from core_data_modules.logging import Logger log = Logger(__name__) log.set_project_name("ComputeWindowOfDowntime") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Compute maximum window of time with 0 messages") parser.add_argument( "raw_messages_file_path", metavar="input-file", help="File to read the raw messages data downloaded as JSON") parser.add_argument( "window_of_downtimes_output_file_path", metavar="output-file", help="File to write the raw messages data downloaded as jSON.") parser.add_argument("target_operator", metavar="operator", help="Operator to analyze for downtime") parser.add_argument( "target_message_direction", metavar="direction-of-message", choices=('in', 'out'), help="Direction of messages to limit the search for downtime to") parser.add_argument(
import geopandas import matplotlib.pyplot as plt from core_data_modules.cleaners import Codes from core_data_modules.data_models.code_scheme import CodeTypes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from src import AnalysisUtils from configuration.code_schemes import CodeSchemes from src.lib.configuration_objects import CodingModes from src.mapping_utils import MappingUtils from src.lib.pipeline_configuration import PipelineConfiguration log = Logger(__name__) IMG_SCALE_FACTOR = 10 # Increase this to increase the resolution of the outputted PNGs CONSENT_WITHDRAWN_KEY = "consent_withdrawn" if __name__ == "__main__": parser = argparse.ArgumentParser(description="Runs automated analysis over the outputs produced by " "`generate_outputs.py`, and optionally uploads the outputs to Drive.") parser.add_argument("user", help="User launching this program") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file") parser.add_argument("messages_json_input_path", metavar="messages-json-input-path", help="Path to a JSONL file to read the TracedData of the messages data from") parser.add_argument("individuals_json_input_path", metavar="individuals-json-input-path",
import argparse from core_data_modules.logging import Logger from storage.google_cloud import google_cloud_utils from src.lib import PipelineConfiguration Logger.set_project_name("OCHA") log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Uploads output files") parser.add_argument("user", help="User launching this program") parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help="Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file-path", help="Path to the pipeline configuration json file") parser.add_argument("run_id", metavar="run-id", help="Identifier of this pipeline run") parser.add_argument("memory_profile_file_path", metavar="memory-profile-file-path", help="Path to the memory profile log file to upload") parser.add_argument("data_archive_file_path", metavar="data-archive-file-path", help="Path to the data archive file to upload") args = parser.parse_args() user = args.user google_cloud_credentials_file_path = args.google_cloud_credentials_file_path pipeline_configuration_file_path = args.pipeline_configuration_file_path
# Validation tool for firebase collections # Relies on an undocumented .collections() API call and a hard coded list of top level collections import time import json import sys import os.path import firebase_util import validate_nook_model as model import validate_nook_model_custom as custom from core_data_modules.logging import Logger log = Logger(__name__) firebase_client = None def validate_documents(collection_root, validationMethod): log.info(f"validate_documents {collection_root}") time_start = time.perf_counter_ns() doc_count = 0 for doc in firebase_client.collection(collection_root).stream(): log.info(f"validating '{doc.id}'") data = doc.to_dict() try: validationMethod("doc", doc.id, data) except model.ValidationError as e: print(f"") print(f"Validation failed:")
import argparse import csv import json from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from id_infrastructure.firestore_uuid_table import FirestoreUuidTable from storage.google_cloud import google_cloud_utils from src.lib import PipelineConfiguration from src.lib.code_schemes import CodeSchemes Logger.set_project_name("UNDP-RCO") log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description= "Generates lists of phone numbers of UNDP-RCO respondents who " "reported living in baidoa or bossaso") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file") parser.add_argument(
import argparse import csv from core_data_modules.logging import Logger from core_data_modules.util import TimeUtils from rapid_pro_tools.rapid_pro_client import RapidProClient from storage.google_cloud import google_cloud_utils from temba_client.v2 import Message log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Downloads all inbound messages from Rapid Pro and exports " "the phone numbers we heard from") parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help="Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("rapid_pro_domain", help="URL of the Rapid Pro server to download data from") parser.add_argument("rapid_pro_token_file_url", metavar="rapid-pro-token-file-url", help="GS URLs of a text file containing the authorisation token for the Rapid Pro server") parser.add_argument("output_file_path", metavar="output-file-path", help="Output CSV file to write the phone numbers to") args = parser.parse_args() google_cloud_credentials_file_path = args.google_cloud_credentials_file_path rapid_pro_domain = args.rapid_pro_domain rapid_pro_token_file_url = args.rapid_pro_token_file_url output_file_path = args.output_file_path
import argparse import json from core_data_modules.logging import Logger from core_data_modules.util import TimeUtils from src import FirestoreWrapper from storage.google_cloud import google_cloud_utils from rapid_pro_tools.rapid_pro_client import RapidProClient log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description= "Downloads the definitions for all the flows being used by this " "project, and uploads them to a bucket.") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument( "firestore_credentials_url", metavar="firestore-credentials-url", help= "GS URL to the credentials file to use to access the Firestore instance containing " "the operations statistics")
import plotly.express as px from core_data_modules.cleaners import Codes from core_data_modules.data_models.code_scheme import CodeTypes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from storage.google_cloud import google_cloud_utils from storage.google_drive import drive_client_wrapper from configuration.code_schemes import CodeSchemes from src import AnalysisUtils from src.lib import PipelineConfiguration from src.lib.configuration_objects import CodingModes from src.mapping_utils import MappingUtils log = Logger(__name__) IMG_SCALE_FACTOR = 10 # Increase this to increase the resolution of the outputted PNGs CONSENT_WITHDRAWN_KEY = "consent_withdrawn" if __name__ == "__main__": parser = argparse.ArgumentParser(description="Runs automated analysis over the outputs produced by " "`generate_outputs.py`, and optionally uploads the outputs to Drive.") parser.add_argument("user", help="User launching this program") parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help="Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file")
import argparse import csv import sys from core_data_modules.cleaners import Codes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import PhoneNumberUuidTable Logger.set_project_name("OCHA") log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Exports a list of phone numbers for the consenting participants " "to REACH") parser.add_argument("traced_data_path", metavar="traced-data-path", help="Path to the REACH traced data file to extract phone numbers from") parser.add_argument("phone_number_uuid_table_path", metavar="phone-number-uuid-table-path", help="JSON file containing the phone number <-> UUID lookup table for the messages/surveys " "datasets") parser.add_argument("output_path", metavar="output-path", help="CSV file to write the REACH contacts to") args = parser.parse_args() traced_data_path = args.traced_data_path phone_number_uuid_table_path = args.phone_number_uuid_table_path output_path = args.output_path sys.setrecursionlimit(15000)
import argparse import json import os from glob import glob from core_data_modules.logging import Logger from storage.google_cloud import google_cloud_utils from storage.google_drive import drive_client_wrapper from src.lib import PipelineConfiguration log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Uploads analysis output files to google drive") parser.add_argument("user", help="User launching this program") parser.add_argument("pipeline_run_mode", help="whether to generate analysis files or not", choices=["all-stages", "auto-code-only"]) parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file-path", help="Path to the pipeline configuration json file") parser.add_argument("run_id",
import glob import json from collections import OrderedDict import altair from core_data_modules.cleaners import Codes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from storage.google_cloud import google_cloud_utils from storage.google_drive import drive_client_wrapper from src.lib import PipelineConfiguration from src.lib.pipeline_configuration import CodingModes log = Logger(__name__) IMG_SCALE_FACTOR = 10 # Increase this to increase the resolution of the outputted PNGs if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generates graphs for analysis") parser.add_argument("user", help="User launching this program") parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help="Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file") parser.add_argument("messages_json_input_path", metavar="messages-json-input-path", help="Path to a JSONL file to read the TracedData of the messages data from")
import argparse import hashlib import json import sys import time from core_data_modules.logging import Logger from datetime import datetime, timezone, timedelta import demogs_helper as demogs log = Logger(__name__) CONVERSATIONS_COLLECTION_KEY = 'nook_conversations' CONVERSATION_SHARDS_COLLECTION_KEY = 'nook_conversation_shards' CONVERSATION_TAGS_COLLECTION_KEY = 'conversationTags' DAILY_TAG_METRICS_COLLECTION_KEY = 'daily_tag_metrics' TOTAL_COUNTS_METRICS_COLLECTION_KEY = 'total_counts_metrics' NEEDS_REPLY_METRICS_COLLECTION_KEY = 'needs_reply_metrics' NEEDS_REPLY_TAG = "Needs Reply" ESCALATE_TAG = "escalate" KK_PROJECT = None coda_tags = {} tag_id_to_name = {} def tag_ids(tags):
from core_data_modules.analysis import AnalysisConfiguration, engagement_counts, theme_distributions, \ repeat_participations, sample_messages, traffic_analysis, analysis_utils, traffic_analysis from core_data_modules.analysis.mapping import participation_maps, somalia_mapper from core_data_modules.cleaners import Codes from core_data_modules.data_models.code_scheme import CodeTypes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from dateutil.parser import isoparse from configuration.code_schemes import CodeSchemes from src.lib.configuration_objects import CodingModes from src.lib.pipeline_configuration import PipelineConfiguration log = Logger(__name__) IMG_SCALE_FACTOR = 10 # Increase this to increase the resolution of the outputted PNGs CONSENT_WITHDRAWN_KEY = "consent_withdrawn" SENT_ON_KEY = "sent_on" def coding_plans_to_analysis_configurations(coding_plans): analysis_configurations = [] for plan in coding_plans: for cc in plan.coding_configurations: if not cc.include_in_theme_distribution: continue analysis_configurations.append( AnalysisConfiguration(cc.analysis_file_key, plan.raw_field, cc.coded_field, cc.code_scheme)
import argparse import json import os from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from storage.google_cloud import google_cloud_utils from storage.google_drive import drive_client_wrapper from src import LoadData, TranslateRapidProKeys, AutoCode, ProductionFile, \ ApplyManualCodes, AnalysisFile, WSCorrection from src.lib import PipelineConfiguration, MessageFilters from configurations.code_schemes import CodeSchemes Logger.set_project_name("WUSC-KEEP-II") log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Runs the post-fetch phase of the pipeline") parser.add_argument("user", help="User launching this program") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file",
# Migration tool for firebase collections # Relies on an undocumented .collections() API call and a hard coded list of top level collections import time import json import sys import os.path from firebase_admin import firestore import migrate_nook_model import firebase_util from core_data_modules.logging import Logger log = Logger(__name__) firebase_client = None def read_document_ids(collection_root): log.info(f"read_document_ids {collection_root}") cache_file_path = f"{cache_dir}/{collection_root}_doc_ids.json" doc_ids = [] if reset_flag and os.path.exists(cache_file_path): os.remove(cache_file_path) if os.path.isfile(cache_file_path): log.info(f"reloading cached ids from {cache_file_path}") with open(cache_file_path, "r") as f: doc_ids = json.load(f) else: time_start = time.perf_counter_ns()
import pytz from core_data_modules.cleaners import Codes, PhoneCleaner from core_data_modules.cleaners.cleaning_utils import CleaningUtils from core_data_modules.logging import Logger from core_data_modules.traced_data import Metadata, TracedData from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils, TimeUtils, SHAUtils from id_infrastructure.firestore_uuid_table import FirestoreUuidTable from rapid_pro_tools.rapid_pro_client import RapidProClient from storage.google_cloud import google_cloud_utils from temba_client.v2 import Contact, Run from src.lib import PipelineConfiguration, CodeSchemes from src.lib.pipeline_configuration import RapidProSource, GCloudBucketSource, ShaqadoonCSVSource Logger.set_project_name("OCHA") log = Logger(__name__) def label_somalia_operator(user, traced_runs, phone_number_uuid_table): # Set the operator codes for each message. uuids = {td["avf_phone_id"] for td in traced_runs} uuid_to_phone_lut = phone_number_uuid_table.uuid_to_data_batch(uuids) for td in traced_runs: operator_raw = uuid_to_phone_lut[td["avf_phone_id"]][:5] # Returns the country code 252 and the next two digits operator_code = PhoneCleaner.clean_operator(operator_raw) if operator_code == Codes.NOT_CODED: operator_label = CleaningUtils.make_label_from_cleaner_code( CodeSchemes.SOMALIA_OPERATOR, CodeSchemes.SOMALIA_OPERATOR.get_code_with_control_code(Codes.NOT_CODED),
import argparse import json from core_data_modules.logging import Logger from dateutil.parser import isoparse from id_infrastructure.firestore_uuid_table import FirestoreUuidTable from rapid_pro_tools.rapid_pro_client import RapidProClient from storage.google_cloud import google_cloud_utils from src.lib import PipelineConfiguration log = Logger(__name__) # TODO: Read these from pipeline configuration rather than hard-coding rapid_pro_domain = "textit.in" rapid_pro_token_url = "gs://avf-credentials/covid19-2-text-it-token.txt" demog_flow_name = "undp_kenya_s01_demog" demogs_attempted_variable = "undp_kenya_s01_demogs_attempted" if __name__ == "__main__": parser = argparse.ArgumentParser( description="Triggers demogs to people who haven't yet received them") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file",
import argparse import json from core_data_modules.data_models import Message, Label, Origin from core_data_modules.logging import Logger from core_data_modules.traced_data import Metadata from core_data_modules.util import TimeUtils Logger.set_project_name("CodeMerge") log = Logger(__name__) parser = argparse.ArgumentParser( description="Performs a code merge on a local dataset. " "To use with Coda, use get.py, code_merge.py, then set.py") parser.add_argument( "messages_input_file_path", metavar="messages-input-file-path", help="Path to the file to read the Coda messages to be code-merged from") parser.add_argument("code_ids_to_merge", metavar="code-ids-to-merge", nargs="+", help="Ids of the codes to merge") parser.add_argument("merged_code_id", metavar="merged-code-id", help="Id of the code to merge the source codes to") parser.add_argument( "messages_output_file_path", metavar="messages-output-file-path", help= "Path to the Coda messages file to write the messages to after performing the code merge" )
import argparse from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from src import LoadData, TranslateSourceKeys, AutoCode, ProductionFile, \ ApplyManualCodes, AnalysisFile, WSCorrection from src.lib import PipelineConfiguration, MessageFilters log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Runs the post-fetch phase of the pipeline") parser.add_argument("user", help="User launching this program") parser.add_argument("pipeline_run_mode", help="whether to generate analysis files or not", choices=["all-stages", "auto-code-only"]) parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file") parser.add_argument( "raw_data_dir", metavar="raw-data-dir", help= "Path to a directory containing the raw data files exported by fetch_raw_data.py" ) parser.add_argument(
from core_data_modules.cleaners.codes import KenyaCodes from core_data_modules.cleaners.location_tools import KenyaLocations from core_data_modules.data_models.code_scheme import CodeTypes from core_data_modules.logging import Logger from core_data_modules.traced_data.io import TracedDataJsonIO from core_data_modules.util import IOUtils from storage.google_cloud import google_cloud_utils from storage.google_drive import drive_client_wrapper from configuration.code_schemes import CodeSchemes from src import AnalysisUtils from src.lib import PipelineConfiguration from src.lib.configuration_objects import CodingModes from src.mapping_utils import MappingUtils log = Logger(__name__) IMG_SCALE_FACTOR = 10 # Increase this to increase the resolution of the outputted PNGs CONSENT_WITHDRAWN_KEY = "consent_withdrawn" if __name__ == "__main__": parser = argparse.ArgumentParser( description="Runs automated analysis over the outputs produced by " "`generate_outputs.py`, and optionally uploads the outputs to Drive.") parser.add_argument("user", help="User launching this program") parser.add_argument( "google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help= "Path to a Google Cloud service account credentials file to use to access the "
from core_data_modules.logging import Logger from core_data_modules.cleaners import PhoneCleaner from temba_client.v2 import Message def date_time_range(start, end, delta): current = start intervals = [] while current < end: intervals.append(current) current += delta return intervals log = Logger(__name__) log.set_project_name("ComputeMessagesBetweenTwoFirebaseTimePeriods") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Compute message difference between two firebase time periods `the time period for firebase is a constant number`") parser.add_argument("raw_messages_input_file_path", metavar="raw-messages-input-file-path", help="File to read the serialized Rapid Pro message data from") parser.add_argument("messages_difference_per_two_firebase_time_period_output_file_path", metavar="message-difference-output-file-path", help=" File to write the messages difference between two firebase time periods data downloaded as JSON") parser.add_argument("target_operator", metavar="target-operator", help="Operator to compute message difference between two firebase time periods") parser.add_argument("target_message_direction", metavar="target-message-direction", choices=('in', 'out'), help="Direction of messages to limit the search for downtime to") parser.add_argument("start_date", metavar="start-date", type=lambda s: isoparse(s), help="The start date as ISO 8601 string from which the number of messages will be computed")
import argparse import csv import json from core_data_modules.cleaners import PhoneCleaner from core_data_modules.logging import Logger from id_infrastructure.firestore_uuid_table import FirestoreUuidTable from storage.google_cloud import google_cloud_utils from src.lib import PipelineConfiguration log = Logger(__name__) if __name__ == "__main__": parser = argparse.ArgumentParser(description="De-identifies a CSV by converting the phone numbers in " "the specified column to avf phone ids") parser.add_argument("csv_input_path", metavar="recovered-csv-input-url", help="Path to a CSV file to de-identify a column of") parser.add_argument("google_cloud_credentials_file_path", metavar="google-cloud-credentials-file-path", help="Path to a Google Cloud service account credentials file to use to access the " "credentials bucket") parser.add_argument("pipeline_configuration_file_path", metavar="pipeline-configuration-file", help="Path to the pipeline configuration json file") parser.add_argument("column_to_de_identify", metavar="column-to-de-identify", help="Name of the column containing phone numbers to be de-identified") parser.add_argument("de_identified_csv_output_path", metavar="de-identified-csv-output-path", help="Path to write the de-identified CSV to") args = parser.parse_args()
import time from core_data_modules.cleaners import Codes from core_data_modules.cleaners.cleaning_utils import CleaningUtils from core_data_modules.logging import Logger from core_data_modules.traced_data import Metadata from core_data_modules.traced_data.io import TracedDataCodaV2IO from src.lib import PipelineConfiguration from src.lib.configuration_objects import CodingModes log = Logger(__name__) class _WSUpdate(object): def __init__(self, message, timestamp, source_field, source_td): self.message = message self.timestamp = timestamp self.source_field = source_field self.source_td = source_td class WSCorrection(object): @staticmethod def move_wrong_scheme_messages(user, data, coda_input_dir): log.info("Importing manually coded Coda files to '_WS' fields...") for plan in PipelineConfiguration.RQA_CODING_PLANS + PipelineConfiguration.SURVEY_CODING_PLANS: if plan.coda_filename is None: continue TracedDataCodaV2IO.compute_message_ids(user, data, plan.raw_field,
import argparse import os import re from core_data_modules.logging import Logger from storage.google_cloud import google_cloud_utils from src.lib import PipelineConfiguration log = Logger(__name__) def get_file_paths(dir_path): # search for .gzip (data archive) and .profile (memory profile) files only because os.listdir(dir_path) # returns all files in the directory files_list = [ file for file in os.listdir(dir_path) if file.endswith((".gzip", ".profile")) ] file_paths = [os.path.join(dir_path, basename) for basename in files_list] return file_paths def get_uploaded_file_dates(uploaded_files_list, date_pattern): dates_match = [ re.search(date_pattern, file) for file in uploaded_files_list ] uploaded_file_dates = [] for date_match in dates_match: if date_match == None: