def get(self, variable_id): parser = reqparse.RequestParser() parser.add_argument('filter', action='append') parser.add_argument('location', required=True, help="Please, provide a location parameter.") args = parser.parse_args() filters = args['filter'] parent_id = int(args['location']) all_locations_dict = get_locations(db.session) children_location_ids = get_children(parent_id, all_locations_dict) results_by_location = [] for location_id in children_location_ids: result = [] location = Location.get_location_by_id(location_id) sql_alchemy_filters = self.get_sql_alchemy_filters(filters) if location.deviceid: device_ids = location.deviceid.split(',') for device_id in device_ids: result.append(self._get_variable_count_for_deivce_id(device_id, variable_id, sql_alchemy_filters)) results_by_location.append({ "clinicId": location_id, "deviceSubmissions": result }) return jsonify({ "parentLocationId": parent_id, "clinicCount": len(children_location_ids), "clinicSubmissions": results_by_location })
def get(self, variable_id, location=1, start_date=None, end_date=None, include_all_clinics=False): start_date, end_date = fix_dates(start_date, end_date) location = int(location) allowed_location = 1 if g: allowed_location = g.allowed_location if not is_allowed_location(location, allowed_location): return {} vi = str(variable_id) results = db.session.query( func.sum(Data.variables[vi].astext.cast(Float)).label('value'), Data.geolocation, Data.clinic).filter( Data.variables.has_key(variable_id), Data.date >= start_date, Data.date < end_date, or_(loc == location for loc in (Data.country, Data.region, Data.district, Data.clinic))).group_by( "clinic", "geolocation") locations = get_locations(db.session) ret = {} for r in results.all(): if r[1] is not None: geo = to_shape(r[1]) if r[2]: # Leaflet uses LatLng ret[str(r[2])] = { "value": r[0], "geolocation": [geo.y, geo.x], "clinic": locations[r[2]].name } else: if not include_all_clinics: cords = [geo.y, geo.x] # Leaflet uses LatLng ret[str(cords)] = { "value": r[0], "geolocation": cords, "clinic": "Outbreak Investigation" } if include_all_clinics: results = db.session.query(model.Locations) for row in results.all(): if is_allowed_location(row.id, location): if row.case_report and row.point_location is not None and str( row.id) not in ret.keys(): geo = to_shape(row.point_location) ret[str(row.id)] = { "value": 0, "geolocation": [geo.y, geo.x], "clinic": row.name } return ret
def get(self, location_id, clinic_type=None, require_case_report="yes"): locations = get_locations(db.session) other_conditions = {} for arg in request.args: other_conditions[arg] = request.args.get(arg) points = [] if not is_allowed_location(location_id, g.allowed_location): return FeatureCollection(points) for l in locations: if ((locations[l].case_report or require_case_report == "no") and is_child(location_id, l, locations) and locations[l].point_location is not None and (not clinic_type or locations[l].clinic_type == clinic_type)): other_cond = True for cond in other_conditions: if locations[l].other.get(cond, None) != other_conditions[cond]: other_cond = False break if not other_cond: continue geo = to_shape(locations[l].point_location) p = Point( (float(geo.x), float(geo.y) )) # Note that this is the specified order for geojson points.append( Feature(geometry=p, properties={ "name": locations[l].name, "other": locations[l].other })) return FeatureCollection(points)
def get(self, category, location=1, start_date=None, end_date=None, include_all_clinics=False): start_date, end_date = fix_dates(start_date, end_date) location = int(location) allowed_location = 1 if g: allowed_location = g.allowed_location if not is_allowed_location(location, allowed_location): return {} results = db.session.query( Data.categories[category], Data.geolocation, Data.clinic, Data.date).distinct(Data.clinic).filter( Data.categories.has_key(category), Data.date >= start_date, Data.date < end_date, or_(loc == location for loc in (Data.country, Data.region, Data.district, Data.clinic))).order_by(Data.clinic).order_by( Data.date.desc()) locations = get_locations(db.session) ret = {} for r in results.all(): print(r) if r[1] is not None: geo = to_shape(r[1]) if r[2]: # Leaflet uses LatLng ret[str(r[2])] = { "value": r[0], "geolocation": [geo.y, geo.x], "clinic": locations[r[2]].name } else: if not include_all_clinics: cords = [geo.y, geo.x] # Leaflet uses LatLng ret[str(cords)] = { "value": r[0], "geolocation": cords, "clinic": "Outbreak Investigation" } if include_all_clinics: results = db.session.query(model.Locations) for row in results.all(): if is_allowed_location(row.id, location): if row.case_report and row.point_location is not None and str( row.id) not in ret.keys(): geo = to_shape(row.point_location) ret[str(row.id)] = { "value": 0, "geolocation": [geo.y, geo.x], "clinic": row.name } return ret
def get(self, location_id, clinic_type=None): locs = get_locations(db.session) children = get_children(location_id, locs) if clinic_type: res = db.session.query(func.count(model.Locations.id)).filter( model.Locations.id.in_(children), model.Locations.case_report == 1, model.Locations.clinic_type == clinic_type).first() else: res = db.session.query(func.count(model.Locations.id)).filter( model.Locations.id.in_(children), model.Locations.case_report == 1).first() return {"total": res[0]}
def is_allowed_location(location, allowed_location): """" Returns true if the location is allowed_location Args: location: location id allowed_location: allowed_location Returns: is_allowed(bool): Is location allowed. """ if allowed_location == 1: return True global allowed_locations_locs if allowed_locations_locs is None: allowed_locations_locs = get_locations(db.session) if is_child(allowed_location, int(location), allowed_locations_locs): return True return False
def get(self, variable_id, identifier_id, level, weekly=True, location_id=1): variable_id = str(variable_id) identifier_id = str(identifier_id) if weekly == "0": weekly = False year = datetime.today().year start_date = datetime(year, 1, 1) end_date = datetime(year + 1, 1, 1) result = latest_query( db, variable_id, identifier_id, start_date, end_date, location_id, weeks=True ) ret = {} locs = get_locations(db.session) if result: for r in result[level]: ret[locs[r].name] = {"total": result[level][r]["total"], "weeks": result[level][r]["weeks"], "id": r} return ret
def get_locations_by_level(level, only_loc): """ Returns all the locations with the given level. If only_loc is given we only include children of only_loc.If we ask for the clinic level we also require that the clinic sends case reports Args: level: clinic, district or region only_loc: location to restrict wich locations are included Returns: names: {id: name} """ locations = abacus_util.get_locations(db.session) names = {} for l in locations.values(): if (l.level == level and (not only_loc or abacus_util.is_child(only_loc, l.id, locations)) and (level != "clinic" or l.case_report)): names[l.id] = l.name return names
def get(self): # First get clinics and total population locs = get_locations(db.session) refugee_clinics = get_children(1, locs, clinic_type="Refugee") tot_pop = 0 clinic_map = [] for clinic in refugee_clinics: result = get_latest_category("population", clinic, datetime(2015, 1, 1), datetime.now()) clinic_pop = 0 if (result): clinic_pop = sum( [sum(result[x].values()) for x in result.keys()]) tot_pop += clinic_pop geo = to_shape(locs[clinic].point_location) clinic_map.append({"value": clinic_pop, "geolocation": [geo.y, geo.x], "clinic": locs[clinic].name, "location_id": clinic}) return clinic_map
def get(self, variable_id): ir = IncidenceRate() incidence_rates = ir.get(variable_id, "clinic") locations = get_locations(db.session) ret = {} for clinic in incidence_rates.keys(): if incidence_rates[clinic]: print(clinic) if locations[clinic].point_location is not None: geo = to_shape(locations[clinic].point_location) ret[clinic] = { "value": incidence_rates[clinic], "geolocation": [geo.y, geo.x], # Leaflet uses LatLng "clinic": locations[clinic].name } return ret
def export_data(uuid, allowed_location, use_loc_ids=False, param_config_yaml=yaml.dump(config)): """ Exports the data table from db Inserts finished file in to databse Args: uuid: uuid for download use_loc_ids: If we use names are location ids """ db, session = get_db_engine() status = DownloadDataFiles( uuid=uuid, generation_time=datetime.now(), type="data", success=0, status=0 ) session.add(status) session.commit() results = session.query( func.distinct( func.jsonb_object_keys(Data.variables))) variables = [] for row in results: variables.append(row[0]) locs = get_locations(session) fieldnames = ["id", "zone", "country", "region", "district", "clinic", "zone_id", "country_id", "region_id", "district_id", "clinic_id", "clinic_type", "geolocation", "date", "uuid"] + list(variables) dict_rows = [] filename = base_folder + "/exported_data/" + uuid + "/data" os.mkdir(base_folder + "/exported_data/" + uuid) output = open(filename + ".csv", "w") writer = csv.DictWriter(output, fieldnames, extrasaction="ignore") writer.writeheader() results = session.query(Data).yield_per(500) i = 0 for row in results: dict_row = dict( (col, getattr(row, col)) for col in row.__table__.columns.keys() ) for l in ["country", "zone", "region", "district", "clinic"]: if dict_row[l]: dict_row[l + "_id"] = dict_row[l] dict_row[l] = locs[dict_row[l]].name dict_row.update(dict_row.pop("variables")) dict_rows.append(dict_row) if i % 1000 == 0: writer.writerows(dict_rows) dict_rows = [] i += 1 writer.writerows(dict_rows) status.status = 1 status.success = 1 session.commit() return True
def get(self, location, start_date=None, end_date=None): start_date, end_date = fix_dates(start_date, end_date) self.locs = get_locations(db.session) clinics = get_children(parent=location, locations=self.locs, require_case_report=True) kit_contents = db.session.query(CalculationParameters.parameters) \ .filter(CalculationParameters.name == 'medicine_kits') \ .one()[0] barcode_category = 'barcode_prescription' conditions = [Data.categories.has_key(barcode_category), Data.clinic.in_(clinics)] # Get first and last prescription for a clinic and medicine without time constraints first_last_prescr_query = db.session.query(Data.clinic, Data.categories[barcode_category].astext, func.count(Data.id), func.min(Data.date), func.max(Data.date)) first_last_prescr_query = first_last_prescr_query.filter(*conditions) first_last_prescr_query = first_last_prescr_query.group_by(Data.clinic, Data.categories[barcode_category].astext) # Get first and last prescription for a clinic without time constraints clinic_info = db.session.query(Data.clinic, func.count(Data.id), func.min(Data.date), func.max(Data.date)) clinic_info = clinic_info.filter(*conditions).group_by(Data.clinic) # Get number of prescriptions within time constraints date_conditions = [Data.date >= start_date, Data.date < end_date] prescription_in_date_range_query = db.session.query(Data.clinic, Data.categories[barcode_category].astext, func.count(Data.id)) prescription_in_date_range_query = prescription_in_date_range_query.filter(*conditions) prescription_in_date_range_query = prescription_in_date_range_query.filter(*date_conditions) prescription_in_date_range_query = prescription_in_date_range_query.group_by(Data.clinic, Data.categories[barcode_category].astext) prescriptions = { 'clinic_table': [], 'medicine_table': [], 'clinic_table_title': 'Prescribing clinics', 'clinic_data': {} } # Restructure the DB return sets into a JSON for prescription in first_last_prescr_query.all(): location_id = prescription[0] location_id_str = str(location_id) medicine_key = prescription[1] prescription_count = prescription[2] prescription_min_date = prescription[3] prescription_max_date = prescription[4] # if the medicine type is not configured to be reported, skip medicine_kit_details = kit_contents.get(medicine_key) if not medicine_kit_details: continue # get number of kits in the clinic kits_in_clinic = self._get_number_of_kits_in_clinic(location_id) # If clinic is not in JSON yet prescription_for_clinic = prescriptions['clinic_data'].setdefault(location_id_str, {}) prescription_for_clinic[medicine_key] = { "min_date": prescription_min_date.strftime("%Y-%m-%d"), "max_date": prescription_max_date.strftime("%Y-%m-%d"), "total_prescriptions": prescription_count, "inventory": (medicine_kit_details["total"] * kits_in_clinic if medicine_kit_details["tablets_in_kit"] == "" else int(medicine_kit_details["tablets_in_kit"]) * kits_in_clinic - prescription_count ), "depletion": (prescription_count / (float(medicine_kit_details["total"]) * kits_in_clinic) if medicine_kit_details["tablets_in_kit"] == "" else prescription_count / (float(medicine_kit_details["tablets_in_kit"]) * kits_in_clinic) ), "stock": (1 - prescription_count / (float(medicine_kit_details["total"]) * kits_in_clinic) if medicine_kit_details["tablets_in_kit"] == "" else 1 - prescription_count / (float(medicine_kit_details["tablets_in_kit"]) * kits_in_clinic) ), } # Assign the number of prescriptions to data object for prescription in prescription_in_date_range_query.all(): str_prescription_location = str(prescription[0]) medicine_key = str(prescription[1]) prescription_count = prescription[2] prescription_for_location = prescriptions['clinic_data'].setdefault(str_prescription_location, {}) medicine = prescription_for_location.setdefault(medicine_key, {}) medicine['prescriptions'] = prescription_count barcode_variables = get_variables(barcode_category) # create clinic table info for prescription in clinic_info.all(): location_id = prescription[0] location_id_str = str(location_id) prescription_min_date = prescription[2] prescription_max_date = prescription[3] prescriptions_for_location = prescriptions['clinic_data'].setdefault(location_id_str, {}) highest_depletion = find_highest_depletion(prescriptions_for_location) if highest_depletion: depletion_round_percent = round(highest_depletion['depletion'] * 100, 1) prescriptions['clinic_table'].append({ "clinic_id": location_id_str, "clinic_name": self.locs[location_id].name, "min_date": prescription_min_date.strftime("%Y-%m-%d"), "max_date": prescription_max_date.strftime("%Y-%m-%d"), "most_depleted_medicine": barcode_variables[highest_depletion['medicine']], "depletion": highest_depletion['depletion'], "str_depletion": str(depletion_round_percent) + '%' }) # create medicine table info for clinic in prescriptions['clinic_data']: for medicine_key, medicine in prescriptions['clinic_data'][clinic].items(): kit_details_for_medicine = kit_contents.get(medicine_key, {}) if kit_details_for_medicine.get('tablets_in_kit', '') != '': medicine_round_stock_percentage = round(medicine['stock'] * 100, 1) prescriptions['medicine_table'].append({ "clinic_id": clinic, "clinic_name": self.locs[int(clinic)].name, "medicine_name": barcode_variables[medicine_key], "min_date": medicine['min_date'], "max_date": medicine['max_date'], "stock": medicine['stock'], "str_stock": str(medicine_round_stock_percentage) + '%', "old_str_stock": ( "-" if kit_contents[medicine_key]["tablets_in_kit"] == "" else str(medicine_round_stock_percentage) + '%' ), "total_prescriptions": medicine['total_prescriptions'] }) return prescriptions
def get(self, only_case_reports=True): # Load filters supplied in GET args inc_case_types = json.loads(request.args.get('inc_case_types', '[]')) exc_case_types = json.loads(request.args.get('exc_case_types', '[]')) key = f"{inc_case_types!r}_{exc_case_types!r}" if key in loc_trees: return loc_trees[key] # Get location data from db and any access restrictions set by auth locs = get_locations(db.session) loc = g.allowed_location # Start drawing the tree ret = {loc: {"id": loc, "text": locs[loc].name, "nodes": []}} for l in sorted(locs.keys()): if l >= loc and is_child(loc, l, locs): if not only_case_reports or (locs[l].case_report == 1 or not locs[l].deviceid): if is_child(l, loc, locs): ret.setdefault(locs[l].parent_location, {"nodes": []}) # Factor out the process of adding a location to the tree def add_loc(): ret.setdefault(l, {"nodes": []}) ret[l].update({"id": l, "text": locs[l].name}) ret[locs[l].parent_location]["nodes"].append(ret[l]) # Determine if the location matches incl and excl criteria loc_case_types = set() if locs[l].case_type: loc_case_types = set(locs[l].case_type) inc = bool(set(inc_case_types) & loc_case_types) exc = set(exc_case_types) >= loc_case_types # Add the location if it is not a clinic if not locs[l].level == 'clinic': add_loc() # Otherwise add the location if no filters provided at all elif not inc_case_types and not exc_case_types: add_loc() # Otherwise if both filters are provided, only add loc if # ...inclusion criteria is met but not exclusion criteria elif inc_case_types and exc_case_types: if inc and not exc: add_loc() # Otherwise add loc if incl criteria specified and met elif inc_case_types and inc: add_loc() # Otherwise add loc if excl criteria specified and not met elif exc_case_types and not exc: add_loc() # Recursively clean any branches without clinics in them. def clean(tree): for child in reversed(tree['nodes']): clean(child) if not (child['nodes'] or locs[child['id']].level == 'clinic'): tree['nodes'].remove(child) clean(ret[loc]) loc_trees[key] = jsonify(ret[loc]) return jsonify(ret[loc])
def _export_week_level_completeness(uuid, download_name, level, completeness_config, translator, param_config, start_date=None, end_date=None, wide_data_format=False): """ Exports completeness data by location and week ( and year), Args:\n uuid: uuid for the download process download_name: Name of download file level: level of location competeness_config: Specified the completeness call we want to make translator: Translator param_config: param config start_date: The date to start the data set end_date: End date for the aggregation wide_data_format: If true the data is returned in the wide format, else in long format """ db, session = get_db_engine() locs = get_locations(session) operation_status = OperationStatus(download_name, uuid) if start_date: start_date = parse(start_date).replace(tzinfo=None) if end_date: end_date = parse(end_date).replace(tzinfo=None) completeness_calls = construct_completeness_call(completeness_config[0], level, start_date, end_date) jwt_auth_token = meerkat_libs.authenticate( username=param_config.server_auth_username, password=param_config.server_auth_password, auth_root=param_config.auth_root) if not jwt_auth_token: raise AttributeError("Not sucessfully logged in for api access") headers = {'content-type': 'application/json', 'authorization': 'Bearer {}'.format(jwt_auth_token)} data = [] year_label = translator.gettext("Year") location_label = translator.gettext(level.title()) week_label = translator.gettext("Week") district_label = translator.gettext("District") completeness_config_label = translator.gettext(completeness_config[1]) for call, year, start_week in completeness_calls: api_result = requests.get(param_config.api_root + call, headers=headers) timeline = api_result.json()["timeline"] max_per_week = int(call.split("/")[4]) # Extract the maximum number from api call for location in timeline: loc_id = int(location) for week in range(len(timeline[location]["weeks"])): data.append({year_label: year, location_label: locs[loc_id].name, week_label: week + start_week, completeness_config_label: timeline[location]["values"][week] / max_per_week * 100 }) if level == "clinic" and loc_id != 1: data[-1][district_label] = locs[locs[loc_id].parent_location].name filename = base_folder + "/exported_data/" + uuid + "/" + download_name os.mkdir(base_folder + "/exported_data/" + uuid) df = pandas.DataFrame(data) if wide_data_format: if level == "clinic": index_labels = [year_label, district_label, location_label, week_label] else: index_labels = [year_label, location_label, week_label] df = df.set_index(index_labels).unstack() df.to_csv(filename + ".csv") df.to_excel(filename + ".xlsx") operation_status.submit_operation_success()
def get(self, variable, location, exclude_case_type=None, num_weeks=0, include_case_type=None, include_clinic_type=None, require_case_report=True): inc_case_types = set( json.loads(request.args.get('inc_case_types', '[]'))) exc_case_types = set( json.loads(request.args.get('exc_case_types', '[]'))) if not is_allowed_location(location, g.allowed_location): return {} if require_case_report in [0, "0"]: require_case_report = False if num_weeks == "0": num_weeks = 0 if exclude_case_type in [0, "0", "None"]: exclude_case_type = None if include_case_type in [0, "0", "None"]: include_case_type = None if include_clinic_type in [0, "0", "None"]: include_clinic_type = None locations = abacus_util.get_locations(db.session) location = int(location) clinics = get_children(location, locations, require_case_report=require_case_report) conditions = [Data.variables.has_key(variable)] if num_weeks: epi_year, epi_week = abacus_util.epi_week.epi_week_for_date( datetime.today()) start_date = meerkat_abacus.util.epi_week.epi_week_start_date( epi_year, int(epi_week) - int(num_weeks)) end_date = meerkat_abacus.util.epi_week.epi_week_start_date( epi_year, epi_week) conditions.append(Data.date >= start_date) conditions.append(Data.date < end_date) exclude_list = [] if exclude_case_type and "code:" in exclude_case_type: query = db.session.query(Data.clinic).filter( Data.variables.has_key(exclude_case_type.split(":")[1])) exclude_list = [r[0] for r in query.all()] query = db.session.query(Data.clinic).filter(*conditions) clinics_with_variable = [r[0] for r in query.all()] non_reporting_clinics = [] if include_clinic_type: if "," in include_clinic_type: include_clinic_type = set(include_clinic_type.split(",")) else: include_clinic_type = set([include_clinic_type]) if include_case_type: if "," in include_case_type: include_case_type = set(include_case_type.split(",")) else: include_case_type = set([include_case_type]) if inc_case_types: include_case_type = inc_case_types.union(include_case_type) elif inc_case_types: include_case_type = inc_case_types if exclude_case_type and "code:" not in exclude_case_type: if "," in exclude_case_type: exclude_case_type = set(exclude_case_type.split(",")) else: exclude_case_type = set([exclude_case_type]) if exc_case_types: exclude_case_type = exc_case_types.union(exclude_case_type) elif exc_case_types: exclude_case_type = exc_case_types for clinic in clinics: if include_clinic_type and locations[ clinic].clinic_type not in include_clinic_type: continue if clinic not in clinics_with_variable: if len(exclude_list) > 0: if clinic in exclude_list: continue if include_case_type: if set(locations[clinic].case_type) & include_case_type: non_reporting_clinics.append(clinic) elif exclude_case_type and "code:" not in exclude_case_type: if not set( locations[clinic].case_type) & exclude_case_type: non_reporting_clinics.append(clinic) else: non_reporting_clinics.append(clinic) return {"clinics": non_reporting_clinics}
def export_category(uuid, form_name, category, download_name, variables, data_type, allowed_location, start_date=None, end_date=None, language="en", param_config_yaml=yaml.dump(config)): """ We take a variable dictionary of form field name: display_name. There are some special commands that can be given in the form field name: * icd_name$category will translate an icd code in icd_code to names given by the variables in category * clinic,region and district will give this location information * the $translate keyword can be used to translate row values to other ones. I.e to change gender from male, female to M, F * field$month, field$year, field$epi_week: will extract the month, year or epi_week from the field * alert_links$alert_investigation$field: will get the field in the c orrepsonding alert_investigation Inserts the resulting csv file in the database Args:\n category: category to match\n variables: variable dictionary\n """ # Runner loads the config object through a function parameter. param_config = yaml.load(param_config_yaml) country_config = param_config.country_config config_directory = param_config.config_directory # Some strings in download data need to be translated translation_dir = country_config.get("translation_dir", None) t = get_translator(param_config, language) db, session = get_db_engine() db2, session2 = get_db_engine() status = DownloadDataFiles( uuid=uuid, generation_time=datetime.now(), type=download_name, success=0, status=0 ) session.add(status) session.commit() res = session.query(AggregationVariables).filter( AggregationVariables.category.has_key(category) ) locs = get_locations(session) data_keys = [] cat_variables = {} for r in res: data_keys.append(r.id) cat_variables[r.id] = r if len(data_keys) == 0: status.status = 1 session.commit() return_keys = [] translation_dict = {} icd_code_to_name = {} link_ids = [] min_translation = {} def add_translations_from_file(details): # Load the csv file and reader file_path = '{}api/{}'.format(config_directory, details['dict_file']) csv_file = open(file_path, 'rt') reader = csv.reader(csv_file) # Establish which column in each row we're translating from and to. headers = next(reader) from_index = headers.index(details['from']) to_index = headers.index(details['to']) # Add translations to the translation dictionary. trans_dict = {} for row in reader: trans_dict[row[from_index]] = row[to_index] return trans_dict # DB conditions conditions = [ or_(Data.variables.has_key(key) for key in data_keys) ] if data_type: conditions.append(Data.type == data_type) if start_date: conditions.append(Data.date >= parse(start_date)) if end_date: conditions.append(Data.date <= parse(end_date)) # Set up icd_code_to_name if needed and determine if # alert_links are included query_links = False to_columns_translations = {} for v in variables: if "every$" in v[0]: # Want to include all the fields in the dictionary # in v[1] for all the links in the name # First determine the maximum number of links link_name = v[0].split("$")[1] length_q = session.query( func.max(func.jsonb_array_length(Data.links[link_name]))).filter( *conditions) length = length_q.first()[0] for i in range(length): for variable in v[1]: name = link_name + "_" + str(i) + " " + variable[1] return_keys.append(name) translation_dict[name] = "many_links&" + link_name + "&" + str(i) + "&" + variable[0] query_links = link_name else: return_keys.append(v[1]) translation_dict[v[1]] = v[0] if "icd_name$" in v[0]: category = v[0].split("$")[-1] cat_variables = {} res = session.query(AggregationVariables).filter( AggregationVariables.category.has_key(category) ) for r in res: cat_variables.setdefault(r.id, []) cat_variables[r.id].append(r) icd_code_to_name[v[0]] = {} for i in cat_variables.keys(): for var in cat_variables[i]: condition = var.condition if ";" in condition: condition = condition.split(";")[0] if "," in condition: # If a variable have many icd codes # we take all of them into account codes = condition.split(",") else: codes = [condition] for c in codes: if c: icd_code_to_name[v[0]][c.strip()] = var.name if "$translate" in v[0]: split = v[0].split("$") field = "$".join(split[:-1]) trans = split[-1] tr_dict = json.loads(trans.split(";")[1].replace("'", '"')) # If the json specifies file details, load translation from file. if tr_dict.get('dict_file', False): min_translation[v[1]] = add_translations_from_file(tr_dict) else: min_translation[v[1]] = tr_dict v[0] = field translation_dict[v[1]] = v[0] if "$to_columns" in v[0]: # Create columns of every possible value split = v[0].split("$") field = "$".join(split[:-1]) trans = split[-1] tr_dict = {} if ";" in trans: tr_dict = json.loads(trans.split(";")[1].replace("'", '"')) # If the json specifies file details, load translation from file. # Get all possible options from the DB results = session2.query( func.distinct( func.regexp_split_to_table( form_tables(param_config)[form_name].data[field].astext, ' '))).join( Data, Data.uuid == form_tables(param_config)[form_name].uuid).filter( *conditions).all() if tr_dict.get('dict_file', False): translations = add_translations_from_file(tr_dict) else: translations = {} return_keys.pop() for r in results: if r[0]: name = v[1] + " " + translations.get(r[0], r[0]) if name not in return_keys: return_keys.append(name) if name in translation_dict: translation_dict[name] = translation_dict[name] + "," + r[0] else: translation_dict[name] = field + "$to_columns$" + r[0] if "gen_link$" in v[0]: link_ids.append(v[0].split("$")[1]) if "uuid" not in return_keys: return_keys.append("uuid") translation_dict["uuid"] = "meta/instanceID" link_ids = set(link_ids) links_by_type, links_by_name = get_links(config_directory + country_config["links_file"]) # DB query, with yield_per(200) for memory reasons columns = [Data, form_tables(param_config)[form_name]] link_id_index = {} joins = [] if query_links: link_data = shelve.open(base_folder + "/exported_data/" + uuid) link_data_query = session.query(Links).filter(Links.type == link_name).yield_per(300) for row in link_data_query: link_data[row.uuid_to] = row.data_to for i, l in enumerate(link_ids): form = aliased(form_tables(param_config)[links_by_name[l]["to_form"]]) joins.append((form, Data.links[(l, -1)].astext == form.uuid)) link_id_index[l] = i + 2 columns.append(form.data) number_query = session2.query(func.count(Data.id)).join( form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid) results = session2.query(*columns).join( form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid) for join in joins: results = results.outerjoin(join[0], join[1]) total_number = number_query.filter(*conditions).first()[0] results = results.filter(*conditions).yield_per(200) locs = get_locations(session) list_rows = [] filename = base_folder + "/exported_data/" + uuid + "/" + download_name os.mkdir(base_folder + "/exported_data/" + uuid) csv_content = open(filename + ".csv", "w") csv_writer = csv.writer(csv_content) csv_writer.writerows([return_keys]) # XlsxWriter with "constant_memory" set to true, flushes mem after each row xls_content = open(filename + ".xlsx", "wb") xls_book = xlsxwriter.Workbook(xls_content, {'constant_memory': True}) xls_sheet = xls_book.add_worksheet() # xls_sheet = pyexcel.Sheet([keys]) # Little utility function write a row to file. def write_xls_row(data, row, sheet): for cell in range(len(data)): xls_sheet.write(row, cell, data[cell]) write_xls_row(return_keys, 0, xls_sheet) i = 0 def _list_category_variables(category, data_row): """ Lists the variables from the specified category that are assigned to the specified row. This can be used to create data columns such as 'Age Group' using 'category$ncd_age'. """ # Get the category's variables' data, indexed by ID. cat_variables = {} variable_list = "" db_results = session.query(AggregationVariables).filter( AggregationVariables.category.has_key(category) ) for variable in db_results: cat_variables[variable.id] = variable # Build a string listing the row's variables from specified category. for var_id, var in cat_variables.items(): if var_id in r[0].variables: variable_list += var.name + ", " # Remove the last comma and space. return variable_list[:-2] # Prepare each row for r in results: list_row = [''] * len(return_keys) if not is_child(allowed_location, r[0].clinic, locs): continue dates = {} for k in return_keys: form_var = translation_dict[k] index = return_keys.index(k) raw_data = r[1].data if "many_links&" in form_var: link_name, number, form_var = form_var.split("&")[1:] number = int(number) if link_name in r[0].links: links = r[0].links[link_name] if len(links) >= number + 1: link_uuid = links[number] raw_data = link_data[link_uuid] else: list_row[index] = None continue else: list_row[index] = None continue if "icd_name$" in form_var: fields = form_var.split("$") if len(fields) > 2: field = fields[1] else: field = "icd_code" if raw_data[field] in icd_code_to_name[form_var]: list_row[index] = icd_code_to_name[form_var][raw_data[ field]] else: list_row[index] = None elif form_var == "clinic": list_row[index] = locs[r[0].clinic].name elif form_var == "region": list_row[index] = locs[r[0].region].name elif form_var == "zone": list_row[index] = locs[r[0].zone].name elif form_var == "district": if r[0].district: list_row[index] = locs[r[0].district].name else: list_row[index] = None elif "$year" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = dates[field].year else: list_row[index] = None elif "$month" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = dates[field].month else: list_row[index] = None elif "$day" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = dates[field].day else: list_row[index] = None elif "$quarter" in form_var: field = form_var.split("$")[0] if raw_data.get(field): if field not in dates: dates[field] = parse(raw_data[field]) quarter = 1 + (dates[field].month - 1)//3 list_row[index] = quarter else: list_row[index] = None elif "$epi_week" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = epi_week_for_date(dates[field])[1] else: list_row[index] = None # A general framework for referencing links in the # download data. # link$<link id>$<linked form field> elif "gen_link$" in form_var: link = form_var.split("$")[1] link_index = link_id_index[link] if r[link_index]: list_row[index] = r[link_index].get( form_var.split("$")[2], None ) else: list_row[index] = None elif "code" == form_var.split("$")[0]: # code$cod_1,cod_2,Text_1,Text_2$default_value split = form_var.split("$") codes = split[1].split(",") text = split[2].split(",") if len(split) > 3: default_value = split[3] else: default_value = None final_text = [] for j in range(len(codes)): if codes[j] in r[0].variables: final_text.append(text[j]) if len(final_text) > 0: list_row[index] = " ".join(final_text) else: list_row[index] = default_value elif "category" == form_var.split("$")[0]: list_row[index] = _list_category_variables( form_var.split("$")[1], r ) elif "code_value" == form_var.split("$")[0]: code = form_var.split("$")[1] if code in r[0].variables: list_row[index] = float(r[0].variables[code]) else: list_row[index] = None elif "value" == form_var.split(":")[0]: list_row[index] = form_var.split(":")[1] elif "$to_columns$" in form_var: int_has_code = 0 field = form_var.split("$")[0] codes = form_var.split("$")[-1].split(",") str_elements = raw_data.get(field) if type(str_elements) == str: elements = str_elements.split(" ") has_code = any(code in elements for code in codes) int_has_code = int(has_code) list_row[index] = int_has_code else: if form_var.split("$")[0] in raw_data: list_row[index] = raw_data[form_var.split("$")[0]] else: list_row[index] = None # Standardise date formating if "$date" in form_var: field = form_var.split("$")[0] if list_row[index]: if field not in dates: dates[field] = parse(list_row[index]) list_row[index] = dates[field].strftime( "%d/%m/%Y" ) else: list_row[index] = None # If the final value is a float, round to 2 dp. # This proceedure ensures integers are shown as integers. # Also accepts string values. try: a = float(list_row[index]) b = int(float(list_row[index])) if a == b: list_row[index] = b else: list_row[index] = round(a, 2) except (ValueError, TypeError): pass # If a translation dictionary is defined in which the key exists... if min_translation and k in min_translation and list_row[index]: tr_dict = min_translation[k] if list_row[index] in tr_dict: list_row[index] = tr_dict[list_row[index]] else: parts = [x.strip() for x in str(list_row[index]).split(' ')] for x in range(len(parts)): # Get the translation using the appropriate key. # If that doesn't exist get the wild card key: * # If that doesn't exist just return the value parts[x] = str( tr_dict.get(parts[x], tr_dict.get('*', parts[x])) ) list_row[index] = ' '.join(list(filter(bool, parts))) if translation_dir and language != "en" and list_row[index]: list_row[index] = t.gettext(list_row[index]) list_rows.append(list_row) # Can write row immediately to xls file as memory is flushed after. write_xls_row(list_row, i + 1, xls_sheet) # Append the row to list of rows to be written to csv. if i % 1000 == 0: logging.warning("{} rows completed...".format(i)) csv_writer.writerows(list_rows) list_rows = [] status.status = i / total_number session.commit() i += 1 csv_writer.writerows(list_rows) csv_content.close() xls_book.close() xls_content.close() status.status = 1 status.success = 1 session.commit() if query_links: link_data.close() dir_path = os.path.dirname(os.path.realpath(__file__)) filename = dir_path + "/exported_data/" + uuid logging.warning("Filename: " + filename) if os.path.exists(filename + ".dir"): os.remove(filename + ".dir") if os.path.exists(filename + ".dat"): os.remove(filename + ".dat") return True
def export_data_table(uuid, download_name, restrict_by, variables, group_by, location_conditions=None, start_date=None, end_date=None, wide_data_format=False, param_config_yaml=yaml.dump(config)): """ Export an aggregated data table restricted by restrict by, Args:\n uuid: uuid for the download process variables: the variables we want to aggregate group_by: The data to group by (clinic, epi_week) data_orientation: long or wide data set start_date: The date to start the data set end_date: End date for the aggregation wide_data_format: If true the data is returned in the wide format, else in long format param_config: The configuration values """ return_keys = [] db, session = get_db_engine() locs = get_locations(session) list_rows = [] operation_status = OperationStatus(download_name, uuid) level = "region" columns = [] groups = [] location_subs = [] only_latest_from_clinic_in_week = False if "only_latest_from_clinic_in_week:" in restrict_by: restrict_by_variable = restrict_by.split(":")[1] only_latest_from_clinic_in_week = True else: restrict_by_variable = restrict_by for i, v in enumerate(group_by): field = v[0] if ":location" in field: field_column = field.split(":")[0] level = field_column location_subs.append(i) else: field_column = field columns.append(getattr(Data, field_column)) groups.append(getattr(Data, field_column)) return_keys.append(v[1]) conditions = [Data.variables.has_key(restrict_by_variable)] if start_date: start_date = parse(start_date).replace(tzinfo=None) conditions.append(Data.date >= start_date) if end_date: end_date = parse(end_date).replace(tzinfo=None) conditions.append(Data.date <= end_date) for v in variables: if only_latest_from_clinic_in_week: columns.append(Data.variables[v[0]].astext.cast(Float)) else: columns.append(func.sum(Data.variables[v[0]].astext.cast(Float))) return_keys.append(v[1]) if only_latest_from_clinic_in_week: conditions.append(Data.variables.has_key(restrict_by_variable)) result = session.query(*columns).distinct(Data.clinic).filter(*conditions).order_by(Data.clinic).order_by(Data.date.desc()) else: result = session.query(*columns).filter(*conditions).group_by(*groups) filename = base_folder + "/exported_data/" + uuid + "/" + download_name os.mkdir(base_folder + "/exported_data/" + uuid) i = 0 for row in result: row_list = list(row) location_condition = True for l in location_subs: if row_list[l]: if location_conditions: tmp = getattr(locs[row_list[l]], location_conditions[0][0]) if location_conditions[0][1] in tmp: location_condition = False row_list[l] = locs[row_list[l]].name if location_condition: row_list = [x if x is not None else 0 for x in row_list] list_rows.append(row_list) i += 1 df = pandas.DataFrame(list_rows, columns=return_keys) if wide_data_format: df = df.set_index(return_keys[:-len(variables)]).unstack().fillna(0) df.to_csv(filename + ".csv") df.to_excel(filename + ".xlsx") operation_status.submit_operation_success() return True
def get(self, variable, location, number_per_week, weekend=None, start_week=1, end_date=None, non_reporting_variable=None, sublevel=None): inc_case_types = set( json.loads(request.args.get('inc_case_types', '[]'))) exc_case_types = set( json.loads(request.args.get('exc_case_types', '[]'))) if not is_allowed_location(location, g.allowed_location): return {} if not non_reporting_variable: non_reporting_variable = variable number_per_week = int(number_per_week) locs = abacus_util.get_locations(db.session) location = int(location) location_type = locs[location].level parsed_sublevel = self._get_sublevel(location_type, sublevel) conditions = [ Data.variables.has_key(variable), or_(loc == location for loc in (Data.country, Data.zone, Data.region, Data.district, Data.clinic)), ] if exc_case_types and exc_case_types != []: conditions.append(~Data.case_type.contains(exc_case_types)) if inc_case_types and inc_case_types != []: conditions.append(Data.case_type.overlap(inc_case_types)) if "tag" in request.args.keys(): conditions.append(Data.tags.has_key(request.args["tag"])) # get the data data = pd.read_sql( db.session.query(Data.region, Data.zone, Data.district, Data.clinic, Data.date, Data.variables[variable].label(variable)).filter( *conditions).statement, db.session.bind) if len(data) == 0: return jsonify(self.__empty_response) # We drop duplicates so each clinic can only have one record per day data = data.drop_duplicates( subset=["region", "district", "clinic", "date", variable]) shifted_end_date, timeseries_freq = self._get_shifted_end_date_and_timeseries_frequency( end_date) beginning_of_epi_start_week = self._get_epi_week_start( shifted_end_date, start_week) if parsed_sublevel: # We first create an index with sublevel, clinic, dates # Where dates are the dates after the clinic started reporting sublocations = [] for l in locs.values(): if abacus_util.is_child(location, l.id, locs) and l.level == parsed_sublevel: sublocations.append(l.id) tuples = [] for name in sublocations: for clinic in get_children(name, locs): if locs[clinic].case_report: if inc_case_types and not set( locs[clinic].case_type) & inc_case_types: continue if exc_case_types and set( locs[clinic].case_type) >= exc_case_types: continue start_date = locs[clinic].start_date if start_date < beginning_of_epi_start_week: start_date = beginning_of_epi_start_week if shifted_end_date - start_date < timedelta(days=7): start_date = (shifted_end_date - timedelta(days=6)).date() for date in pd.date_range(start_date, shifted_end_date, freq=timeseries_freq): tuples.append((name, clinic, date)) if len(tuples) == 0: return jsonify(self.__empty_response) new_index = pd.MultiIndex.from_tuples( tuples, names=[parsed_sublevel, "clinic", "date"]) completeness = data.groupby([ parsed_sublevel, "clinic", pd.TimeGrouper(key="date", freq=timeseries_freq, label="left") ]).sum().reindex(new_index)[variable].fillna(0).sort_index() # Drop clinics with no submissions clinic_sums = completeness.groupby(level=1).sum() zero_clinics = clinic_sums[clinic_sums == 0].index nr = NonReporting() non_reporting_clinics = nr.get(non_reporting_variable, location)["clinics"] completeness = completeness.drop(non_reporting_clinics, level=1) completeness.reindex() # We only want to count a maximum of number per week per week completeness[completeness > number_per_week] = number_per_week location_completeness_per_week = completeness.groupby( level=2).mean() sublocations_completeness_per_week = completeness.groupby( level=[0, 2]).mean() # Find last two weeks idx = pd.IndexSlice last_two_weeks = location_completeness_per_week.index[-1:] last_year = location_completeness_per_week.index[:] # Get sublocation completeness for last two weeks as a percentage completeness_last_two_weeks = sublocations_completeness_per_week.loc[ idx[:, last_two_weeks]] score = completeness_last_two_weeks.groupby( level=0).mean() / number_per_week * 100 completeness_last_year = sublocations_completeness_per_week.loc[ idx[:, last_year]] yearly_score = completeness_last_year.groupby( level=0).mean() / number_per_week * 100 # Add current location score[location] = location_completeness_per_week[ last_two_weeks].mean() / number_per_week * 100 yearly_score[location] = location_completeness_per_week.mean( ) / number_per_week * 100 # Sort the timeline data timeline = {} for sl in sublocations_completeness_per_week.index.get_level_values( parsed_sublevel): sl_time = sublocations_completeness_per_week.iloc[ sublocations_completeness_per_week.index.get_level_values( parsed_sublevel) == sl] timeline[str(sl)] = { "weeks": sl_time.index.get_level_values("date"), "values": sl_time } # Add current location timeline[str(location)] = { "weeks": location_completeness_per_week.index, "values": location_completeness_per_week } # Calculate completness score for each clinic clinic_completeness_last_two_weeks = completeness.loc[ idx[:, :, last_two_weeks]] clinic_scores = clinic_completeness_last_two_weeks.groupby( level=1).mean() / number_per_week * 100 clinic_completeness_last_year = completeness.loc[idx[:, :, :]] clinic_yearly_scores = clinic_completeness_last_year.groupby( level=1).mean() / number_per_week * 100 dates_not_reported = [] # Not needed for this level else: # Take into account clinic start_date if locs[location].start_date > beginning_of_epi_start_week: beginning_of_epi_start_week = locs[location].start_date not_reported_dates_begining = beginning_of_epi_start_week if shifted_end_date - beginning_of_epi_start_week < timedelta( days=7): beginning_of_epi_start_week = (shifted_end_date - timedelta(days=6)).date() dates = pd.date_range(beginning_of_epi_start_week, shifted_end_date, freq=timeseries_freq) completeness = data.groupby( pd.TimeGrouper( key="date", freq=timeseries_freq, label="left")).sum().fillna(0)[variable].reindex( dates).sort_index().fillna(0) # We only want to count a maximum of number per week per week completeness[completeness > number_per_week] = number_per_week timeline = { str(location): { "weeks": [ d.isoformat() for d in completeness.index.to_pydatetime() ], "values": [float(v) for v in completeness.values] } } last_two_weeks = completeness.index[-1:] score = pd.Series() score.loc[location] = completeness[last_two_weeks].mean( ) / number_per_week * 100 yearly_score = pd.Series() yearly_score.loc[location] = completeness.mean( ) / number_per_week * 100 # Sort out the dates on which nothing was reported # Can specify on which weekdays we expect a record bdays = self._get_business_days(weekend_days=weekend) expected_days = pd.date_range(not_reported_dates_begining, shifted_end_date, freq=bdays) found_dates = data["date"] dates_not_reported = expected_days.drop( found_dates.values, errors="ignore").to_pydatetime() dates_not_reported = [d.isoformat() for d in dates_not_reported] clinic_scores = None # Not needed for this level clinic_yearly_scores = None # Not needed for this level return jsonify({ "score": series_to_json_dict(score), "timeline": timeline, "clinic_score": series_to_json_dict(clinic_scores), "clinic_yearly_score": series_to_json_dict(clinic_yearly_scores), "dates_not_reported": dates_not_reported, "yearly_score": series_to_json_dict(yearly_score) })
def get(self, variable, group_by, start_date=None, end_date=None, only_loc=None, use_ids=None, date_variable=None, additional_variables=None, group_by_variables=None): variable = str(variable) if not only_loc: if "only_loc" in request.args: only_loc = request.args["only_loc"] else: only_loc = g.allowed_location if not is_allowed_location(only_loc, g.allowed_location): return {} start_date, end_date = fix_dates(start_date, end_date) if "use_ids" in request.args.keys() or use_ids: use_ids = True else: use_ids = False if date_variable: date_conditions = [ func.to_date(Data.variables[date_variable].astext, "YYYY-MM-DDTHH-MI-SS") >= start_date, func.to_date(Data.variables[date_variable].astext, "YYYY-MM-DDTHH-MI-SS") < end_date ] else: date_conditions = [Data.date >= start_date, Data.date < end_date] if "location" in variable: location_id = variable.split(":")[1] conditions = date_conditions + [ or_(loc == location_id for loc in (Data.country, Data.zone, Data.region, Data.district, Data.clinic)) ] else: conditions = [Data.variables.has_key(variable)] + date_conditions if additional_variables: # add additional variable filters if there are and for i in additional_variables: conditions.append(Data.variables.has_key(i)) if only_loc: conditions += [ or_(loc == only_loc for loc in (Data.country, Data.zone, Data.region, Data.district, Data.clinic)) ] epi_year_start = meerkat_abacus.util.epi_week.epi_year_start_date( start_date) # Determine which columns we want to extract from the Data table columns_to_extract = [func.count(Data.id).label('value')] if date_variable: columns_to_extract.append( func.floor( extract( 'days', func.to_date(Data.variables[date_variable].astext, "YYYY-MM-DDTHH-MI-SS") - epi_year_start) / 7 + 1).label("week")) else: columns_to_extract.append( func.floor( extract('days', Data.date - epi_year_start) / 7 + 1).label("week")) # We want to add the columns to extract based on the group_by value # in addition we create a names dict that translates ids to names if "locations" in group_by: # If we have locations in group_by we also specify the level at # which we want to group the locations, clinic, district or region if ":" in group_by: level = group_by.split(":")[1] else: level = "clinic" locations = abacus_util.get_locations(db.session) ids = locations.keys() names = get_locations_by_level(level, only_loc) columns_to_extract += [getattr(Data, level, None)] group_by_query = level else: if not group_by_variables: names = get_variables(group_by) else: names = group_by_variables if len(names) == 0: return {} ids = names.keys() for i in ids: columns_to_extract.append( Data.variables.has_key(str(i)).label("id" + str(i))) group_by_query = ",".join(["id" + str(i) for i in ids]) if use_ids: names = {vid: vid for vid in names.keys()} start_epi_week = abacus_util.epi_week.epi_week_for_date(start_date)[1] end_epi_week = abacus_util.epi_week.epi_week_for_date(end_date)[1] # How we deal with start and end dates in different years if start_date.year != end_date.year: end_epi_week += 53 * (end_date.year - start_date.year) # DB Query results = db.session.query(*tuple(columns_to_extract)).filter( *conditions).group_by("week," + group_by_query) # Assemble return dict ret = {} for n in names.values(): ret[n] = { "total": 0, "weeks": {i: 0 for i in range(start_epi_week, end_epi_week + 1)} } for r in results: # r = (number, week, other_columns_to_extract if "locations" in group_by: # r[2] = location if r[2]: ret[names[r[2]]]["total"] += r[0] ret[names[r[2]]]["weeks"][int(r[1])] = int(r[0]) else: # r[2:] are the ids that the record has for i, i_d in enumerate(ids): if r[i + 2]: ret[names[i_d]]["total"] += r[0] ret[names[i_d]]["weeks"][int(r[1])] = int(r[0]) return ret