def get(self, location_id, clinic_type=None, require_case_report="yes"): locations = get_locations(db.session) other_conditions = {} for arg in request.args: other_conditions[arg] = request.args.get(arg) points = [] if not is_allowed_location(location_id, g.allowed_location): return FeatureCollection(points) for l in locations: if ((locations[l].case_report or require_case_report == "no") and is_child(location_id, l, locations) and locations[l].point_location is not None and (not clinic_type or locations[l].clinic_type == clinic_type)): other_cond = True for cond in other_conditions: if locations[l].other.get(cond, None) != other_conditions[cond]: other_cond = False break if not other_cond: continue geo = to_shape(locations[l].point_location) p = Point( (float(geo.x), float(geo.y) )) # Note that this is the specified order for geojson points.append( Feature(geometry=p, properties={ "name": locations[l].name, "other": locations[l].other })) return FeatureCollection(points)
def get_children(parent, locations, clinic_type=None, require_case_report=True, case_type=None): """ Return all clinics that are children of parent Args: parent: parent_id locations: all locations in dict Returns: list of location ids """ ret = [] for location_id in locations.keys(): if ((not require_case_report or locations[location_id].case_report) and (not clinic_type or locations[location_id].clinic_type == clinic_type)): if (case_type is None or locations[location_id].case_type == case_type): if abacus_util.is_child(parent, location_id, locations): ret.append(location_id) return ret
def is_allowed_location(location, allowed_location): """" Returns true if the location is allowed_location Args: location: location id allowed_location: allowed_location Returns: is_allowed(bool): Is location allowed. """ if allowed_location == 1: return True global allowed_locations_locs if allowed_locations_locs is None: allowed_locations_locs = get_locations(db.session) if is_child(allowed_location, int(location), allowed_locations_locs): return True return False
def find_level(location, sublevel, locations): """ Returns the isntance of level that location is a child of Args: location: location sublevel: the sublevel we are interested in locations: all locations in dict Returns: location_id(int): id of the mathcing location """ location = int(location) for loc in locations: if locations[loc].level == sublevel and abacus_util.is_child( loc, location, locations): return loc return None
def get_locations_by_level(level, only_loc): """ Returns all the locations with the given level. If only_loc is given we only include children of only_loc.If we ask for the clinic level we also require that the clinic sends case reports Args: level: clinic, district or region only_loc: location to restrict wich locations are included Returns: names: {id: name} """ locations = abacus_util.get_locations(db.session) names = {} for l in locations.values(): if (l.level == level and (not only_loc or abacus_util.is_child(only_loc, l.id, locations)) and (level != "clinic" or l.case_report)): names[l.id] = l.name return names
def __save_form_data(xls_csv_writer, query_form_data, operation_status, keys, allowed_location, location_data): (locations, locs_by_deviceid, zones, regions, districts, devices) = location_data results = query_form_data.yield_per(1000) results_count = query_form_data.count() for i, result in enumerate(results): if not result: logging.error("Skipping result %d which is None", i) continue if not result.data: logging.error("Skipping result %d. Data is None", i) continue if not isinstance(result.data, dict): logging.error("Skipping result %d which data is not of a dictionary type", i) continue # Initialise empty result for header line row = [] for key in keys: try: row.append(result.data.get(key, '')) except AttributeError: logging.exception("Error while parsing row %s with data:\n%s", result, result.data, exc_info=True) # Add the location data if it has been requested and exists. if 'deviceid' in result.data: clinic_id = locs_by_deviceid.get(result.data["deviceid"], None) if not is_child(allowed_location, clinic_id, locations): continue populate_row_locations(row, keys, clinic_id, location_data) else: if allowed_location != 1: continue set_empty_locations(keys, row) xls_csv_writer.write_xls_row(row) xls_csv_writer.write_csv_row(row) five_percent_progress = i % (results_count / 20) == 0 if five_percent_progress: new_status = float(i) / results_count operation_status.update_operation_status(new_status)
def test_is_child(self): """Testing is_child""" locations = { 1: model.Locations(name="Demo"), 2: model.Locations(name="Region 1", parent_location=1), 3: model.Locations(name="Region 2", parent_location=1), 4: model.Locations(name="District 1", parent_location=2), 5: model.Locations(name="District 2", parent_location=3), 6: model.Locations(name="Clinic 1", parent_location=4), 7: model.Locations(name="Clinic 2", parent_location=5) } self.assertTrue(abacus_util.is_child(1, 3, locations)) self.assertTrue(abacus_util.is_child(2, 4, locations)) self.assertTrue(abacus_util.is_child(1, 7, locations)) self.assertTrue(abacus_util.is_child(3, 7, locations)) self.assertTrue(abacus_util.is_child("3", "7", locations)) self.assertFalse(abacus_util.is_child(3, 6, locations)) self.assertFalse(abacus_util.is_child(2, 5, locations))
def get(self, variable, location, number_per_week, weekend=None, start_week=1, end_date=None, non_reporting_variable=None, sublevel=None): inc_case_types = set( json.loads(request.args.get('inc_case_types', '[]'))) exc_case_types = set( json.loads(request.args.get('exc_case_types', '[]'))) if not is_allowed_location(location, g.allowed_location): return {} if not non_reporting_variable: non_reporting_variable = variable number_per_week = int(number_per_week) locs = abacus_util.get_locations(db.session) location = int(location) location_type = locs[location].level parsed_sublevel = self._get_sublevel(location_type, sublevel) conditions = [ Data.variables.has_key(variable), or_(loc == location for loc in (Data.country, Data.zone, Data.region, Data.district, Data.clinic)), ] if exc_case_types and exc_case_types != []: conditions.append(~Data.case_type.contains(exc_case_types)) if inc_case_types and inc_case_types != []: conditions.append(Data.case_type.overlap(inc_case_types)) if "tag" in request.args.keys(): conditions.append(Data.tags.has_key(request.args["tag"])) # get the data data = pd.read_sql( db.session.query(Data.region, Data.zone, Data.district, Data.clinic, Data.date, Data.variables[variable].label(variable)).filter( *conditions).statement, db.session.bind) if len(data) == 0: return jsonify(self.__empty_response) # We drop duplicates so each clinic can only have one record per day data = data.drop_duplicates( subset=["region", "district", "clinic", "date", variable]) shifted_end_date, timeseries_freq = self._get_shifted_end_date_and_timeseries_frequency( end_date) beginning_of_epi_start_week = self._get_epi_week_start( shifted_end_date, start_week) if parsed_sublevel: # We first create an index with sublevel, clinic, dates # Where dates are the dates after the clinic started reporting sublocations = [] for l in locs.values(): if abacus_util.is_child(location, l.id, locs) and l.level == parsed_sublevel: sublocations.append(l.id) tuples = [] for name in sublocations: for clinic in get_children(name, locs): if locs[clinic].case_report: if inc_case_types and not set( locs[clinic].case_type) & inc_case_types: continue if exc_case_types and set( locs[clinic].case_type) >= exc_case_types: continue start_date = locs[clinic].start_date if start_date < beginning_of_epi_start_week: start_date = beginning_of_epi_start_week if shifted_end_date - start_date < timedelta(days=7): start_date = (shifted_end_date - timedelta(days=6)).date() for date in pd.date_range(start_date, shifted_end_date, freq=timeseries_freq): tuples.append((name, clinic, date)) if len(tuples) == 0: return jsonify(self.__empty_response) new_index = pd.MultiIndex.from_tuples( tuples, names=[parsed_sublevel, "clinic", "date"]) completeness = data.groupby([ parsed_sublevel, "clinic", pd.TimeGrouper(key="date", freq=timeseries_freq, label="left") ]).sum().reindex(new_index)[variable].fillna(0).sort_index() # Drop clinics with no submissions clinic_sums = completeness.groupby(level=1).sum() zero_clinics = clinic_sums[clinic_sums == 0].index nr = NonReporting() non_reporting_clinics = nr.get(non_reporting_variable, location)["clinics"] completeness = completeness.drop(non_reporting_clinics, level=1) completeness.reindex() # We only want to count a maximum of number per week per week completeness[completeness > number_per_week] = number_per_week location_completeness_per_week = completeness.groupby( level=2).mean() sublocations_completeness_per_week = completeness.groupby( level=[0, 2]).mean() # Find last two weeks idx = pd.IndexSlice last_two_weeks = location_completeness_per_week.index[-1:] last_year = location_completeness_per_week.index[:] # Get sublocation completeness for last two weeks as a percentage completeness_last_two_weeks = sublocations_completeness_per_week.loc[ idx[:, last_two_weeks]] score = completeness_last_two_weeks.groupby( level=0).mean() / number_per_week * 100 completeness_last_year = sublocations_completeness_per_week.loc[ idx[:, last_year]] yearly_score = completeness_last_year.groupby( level=0).mean() / number_per_week * 100 # Add current location score[location] = location_completeness_per_week[ last_two_weeks].mean() / number_per_week * 100 yearly_score[location] = location_completeness_per_week.mean( ) / number_per_week * 100 # Sort the timeline data timeline = {} for sl in sublocations_completeness_per_week.index.get_level_values( parsed_sublevel): sl_time = sublocations_completeness_per_week.iloc[ sublocations_completeness_per_week.index.get_level_values( parsed_sublevel) == sl] timeline[str(sl)] = { "weeks": sl_time.index.get_level_values("date"), "values": sl_time } # Add current location timeline[str(location)] = { "weeks": location_completeness_per_week.index, "values": location_completeness_per_week } # Calculate completness score for each clinic clinic_completeness_last_two_weeks = completeness.loc[ idx[:, :, last_two_weeks]] clinic_scores = clinic_completeness_last_two_weeks.groupby( level=1).mean() / number_per_week * 100 clinic_completeness_last_year = completeness.loc[idx[:, :, :]] clinic_yearly_scores = clinic_completeness_last_year.groupby( level=1).mean() / number_per_week * 100 dates_not_reported = [] # Not needed for this level else: # Take into account clinic start_date if locs[location].start_date > beginning_of_epi_start_week: beginning_of_epi_start_week = locs[location].start_date not_reported_dates_begining = beginning_of_epi_start_week if shifted_end_date - beginning_of_epi_start_week < timedelta( days=7): beginning_of_epi_start_week = (shifted_end_date - timedelta(days=6)).date() dates = pd.date_range(beginning_of_epi_start_week, shifted_end_date, freq=timeseries_freq) completeness = data.groupby( pd.TimeGrouper( key="date", freq=timeseries_freq, label="left")).sum().fillna(0)[variable].reindex( dates).sort_index().fillna(0) # We only want to count a maximum of number per week per week completeness[completeness > number_per_week] = number_per_week timeline = { str(location): { "weeks": [ d.isoformat() for d in completeness.index.to_pydatetime() ], "values": [float(v) for v in completeness.values] } } last_two_weeks = completeness.index[-1:] score = pd.Series() score.loc[location] = completeness[last_two_weeks].mean( ) / number_per_week * 100 yearly_score = pd.Series() yearly_score.loc[location] = completeness.mean( ) / number_per_week * 100 # Sort out the dates on which nothing was reported # Can specify on which weekdays we expect a record bdays = self._get_business_days(weekend_days=weekend) expected_days = pd.date_range(not_reported_dates_begining, shifted_end_date, freq=bdays) found_dates = data["date"] dates_not_reported = expected_days.drop( found_dates.values, errors="ignore").to_pydatetime() dates_not_reported = [d.isoformat() for d in dates_not_reported] clinic_scores = None # Not needed for this level clinic_yearly_scores = None # Not needed for this level return jsonify({ "score": series_to_json_dict(score), "timeline": timeline, "clinic_score": series_to_json_dict(clinic_scores), "clinic_yearly_score": series_to_json_dict(clinic_yearly_scores), "dates_not_reported": dates_not_reported, "yearly_score": series_to_json_dict(yearly_score) })
def get(self, only_case_reports=True): # Load filters supplied in GET args inc_case_types = json.loads(request.args.get('inc_case_types', '[]')) exc_case_types = json.loads(request.args.get('exc_case_types', '[]')) key = f"{inc_case_types!r}_{exc_case_types!r}" if key in loc_trees: return loc_trees[key] # Get location data from db and any access restrictions set by auth locs = get_locations(db.session) loc = g.allowed_location # Start drawing the tree ret = {loc: {"id": loc, "text": locs[loc].name, "nodes": []}} for l in sorted(locs.keys()): if l >= loc and is_child(loc, l, locs): if not only_case_reports or (locs[l].case_report == 1 or not locs[l].deviceid): if is_child(l, loc, locs): ret.setdefault(locs[l].parent_location, {"nodes": []}) # Factor out the process of adding a location to the tree def add_loc(): ret.setdefault(l, {"nodes": []}) ret[l].update({"id": l, "text": locs[l].name}) ret[locs[l].parent_location]["nodes"].append(ret[l]) # Determine if the location matches incl and excl criteria loc_case_types = set() if locs[l].case_type: loc_case_types = set(locs[l].case_type) inc = bool(set(inc_case_types) & loc_case_types) exc = set(exc_case_types) >= loc_case_types # Add the location if it is not a clinic if not locs[l].level == 'clinic': add_loc() # Otherwise add the location if no filters provided at all elif not inc_case_types and not exc_case_types: add_loc() # Otherwise if both filters are provided, only add loc if # ...inclusion criteria is met but not exclusion criteria elif inc_case_types and exc_case_types: if inc and not exc: add_loc() # Otherwise add loc if incl criteria specified and met elif inc_case_types and inc: add_loc() # Otherwise add loc if excl criteria specified and not met elif exc_case_types and not exc: add_loc() # Recursively clean any branches without clinics in them. def clean(tree): for child in reversed(tree['nodes']): clean(child) if not (child['nodes'] or locs[child['id']].level == 'clinic'): tree['nodes'].remove(child) clean(ret[loc]) loc_trees[key] = jsonify(ret[loc]) return jsonify(ret[loc])
def export_category(uuid, form_name, category, download_name, variables, data_type, allowed_location, start_date=None, end_date=None, language="en", param_config_yaml=yaml.dump(config)): """ We take a variable dictionary of form field name: display_name. There are some special commands that can be given in the form field name: * icd_name$category will translate an icd code in icd_code to names given by the variables in category * clinic,region and district will give this location information * the $translate keyword can be used to translate row values to other ones. I.e to change gender from male, female to M, F * field$month, field$year, field$epi_week: will extract the month, year or epi_week from the field * alert_links$alert_investigation$field: will get the field in the c orrepsonding alert_investigation Inserts the resulting csv file in the database Args:\n category: category to match\n variables: variable dictionary\n """ # Runner loads the config object through a function parameter. param_config = yaml.load(param_config_yaml) country_config = param_config.country_config config_directory = param_config.config_directory # Some strings in download data need to be translated translation_dir = country_config.get("translation_dir", None) t = get_translator(param_config, language) db, session = get_db_engine() db2, session2 = get_db_engine() status = DownloadDataFiles( uuid=uuid, generation_time=datetime.now(), type=download_name, success=0, status=0 ) session.add(status) session.commit() res = session.query(AggregationVariables).filter( AggregationVariables.category.has_key(category) ) locs = get_locations(session) data_keys = [] cat_variables = {} for r in res: data_keys.append(r.id) cat_variables[r.id] = r if len(data_keys) == 0: status.status = 1 session.commit() return_keys = [] translation_dict = {} icd_code_to_name = {} link_ids = [] min_translation = {} def add_translations_from_file(details): # Load the csv file and reader file_path = '{}api/{}'.format(config_directory, details['dict_file']) csv_file = open(file_path, 'rt') reader = csv.reader(csv_file) # Establish which column in each row we're translating from and to. headers = next(reader) from_index = headers.index(details['from']) to_index = headers.index(details['to']) # Add translations to the translation dictionary. trans_dict = {} for row in reader: trans_dict[row[from_index]] = row[to_index] return trans_dict # DB conditions conditions = [ or_(Data.variables.has_key(key) for key in data_keys) ] if data_type: conditions.append(Data.type == data_type) if start_date: conditions.append(Data.date >= parse(start_date)) if end_date: conditions.append(Data.date <= parse(end_date)) # Set up icd_code_to_name if needed and determine if # alert_links are included query_links = False to_columns_translations = {} for v in variables: if "every$" in v[0]: # Want to include all the fields in the dictionary # in v[1] for all the links in the name # First determine the maximum number of links link_name = v[0].split("$")[1] length_q = session.query( func.max(func.jsonb_array_length(Data.links[link_name]))).filter( *conditions) length = length_q.first()[0] for i in range(length): for variable in v[1]: name = link_name + "_" + str(i) + " " + variable[1] return_keys.append(name) translation_dict[name] = "many_links&" + link_name + "&" + str(i) + "&" + variable[0] query_links = link_name else: return_keys.append(v[1]) translation_dict[v[1]] = v[0] if "icd_name$" in v[0]: category = v[0].split("$")[-1] cat_variables = {} res = session.query(AggregationVariables).filter( AggregationVariables.category.has_key(category) ) for r in res: cat_variables.setdefault(r.id, []) cat_variables[r.id].append(r) icd_code_to_name[v[0]] = {} for i in cat_variables.keys(): for var in cat_variables[i]: condition = var.condition if ";" in condition: condition = condition.split(";")[0] if "," in condition: # If a variable have many icd codes # we take all of them into account codes = condition.split(",") else: codes = [condition] for c in codes: if c: icd_code_to_name[v[0]][c.strip()] = var.name if "$translate" in v[0]: split = v[0].split("$") field = "$".join(split[:-1]) trans = split[-1] tr_dict = json.loads(trans.split(";")[1].replace("'", '"')) # If the json specifies file details, load translation from file. if tr_dict.get('dict_file', False): min_translation[v[1]] = add_translations_from_file(tr_dict) else: min_translation[v[1]] = tr_dict v[0] = field translation_dict[v[1]] = v[0] if "$to_columns" in v[0]: # Create columns of every possible value split = v[0].split("$") field = "$".join(split[:-1]) trans = split[-1] tr_dict = {} if ";" in trans: tr_dict = json.loads(trans.split(";")[1].replace("'", '"')) # If the json specifies file details, load translation from file. # Get all possible options from the DB results = session2.query( func.distinct( func.regexp_split_to_table( form_tables(param_config)[form_name].data[field].astext, ' '))).join( Data, Data.uuid == form_tables(param_config)[form_name].uuid).filter( *conditions).all() if tr_dict.get('dict_file', False): translations = add_translations_from_file(tr_dict) else: translations = {} return_keys.pop() for r in results: if r[0]: name = v[1] + " " + translations.get(r[0], r[0]) if name not in return_keys: return_keys.append(name) if name in translation_dict: translation_dict[name] = translation_dict[name] + "," + r[0] else: translation_dict[name] = field + "$to_columns$" + r[0] if "gen_link$" in v[0]: link_ids.append(v[0].split("$")[1]) if "uuid" not in return_keys: return_keys.append("uuid") translation_dict["uuid"] = "meta/instanceID" link_ids = set(link_ids) links_by_type, links_by_name = get_links(config_directory + country_config["links_file"]) # DB query, with yield_per(200) for memory reasons columns = [Data, form_tables(param_config)[form_name]] link_id_index = {} joins = [] if query_links: link_data = shelve.open(base_folder + "/exported_data/" + uuid) link_data_query = session.query(Links).filter(Links.type == link_name).yield_per(300) for row in link_data_query: link_data[row.uuid_to] = row.data_to for i, l in enumerate(link_ids): form = aliased(form_tables(param_config)[links_by_name[l]["to_form"]]) joins.append((form, Data.links[(l, -1)].astext == form.uuid)) link_id_index[l] = i + 2 columns.append(form.data) number_query = session2.query(func.count(Data.id)).join( form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid) results = session2.query(*columns).join( form_tables(param_config)[form_name], Data.uuid == form_tables(param_config)[form_name].uuid) for join in joins: results = results.outerjoin(join[0], join[1]) total_number = number_query.filter(*conditions).first()[0] results = results.filter(*conditions).yield_per(200) locs = get_locations(session) list_rows = [] filename = base_folder + "/exported_data/" + uuid + "/" + download_name os.mkdir(base_folder + "/exported_data/" + uuid) csv_content = open(filename + ".csv", "w") csv_writer = csv.writer(csv_content) csv_writer.writerows([return_keys]) # XlsxWriter with "constant_memory" set to true, flushes mem after each row xls_content = open(filename + ".xlsx", "wb") xls_book = xlsxwriter.Workbook(xls_content, {'constant_memory': True}) xls_sheet = xls_book.add_worksheet() # xls_sheet = pyexcel.Sheet([keys]) # Little utility function write a row to file. def write_xls_row(data, row, sheet): for cell in range(len(data)): xls_sheet.write(row, cell, data[cell]) write_xls_row(return_keys, 0, xls_sheet) i = 0 def _list_category_variables(category, data_row): """ Lists the variables from the specified category that are assigned to the specified row. This can be used to create data columns such as 'Age Group' using 'category$ncd_age'. """ # Get the category's variables' data, indexed by ID. cat_variables = {} variable_list = "" db_results = session.query(AggregationVariables).filter( AggregationVariables.category.has_key(category) ) for variable in db_results: cat_variables[variable.id] = variable # Build a string listing the row's variables from specified category. for var_id, var in cat_variables.items(): if var_id in r[0].variables: variable_list += var.name + ", " # Remove the last comma and space. return variable_list[:-2] # Prepare each row for r in results: list_row = [''] * len(return_keys) if not is_child(allowed_location, r[0].clinic, locs): continue dates = {} for k in return_keys: form_var = translation_dict[k] index = return_keys.index(k) raw_data = r[1].data if "many_links&" in form_var: link_name, number, form_var = form_var.split("&")[1:] number = int(number) if link_name in r[0].links: links = r[0].links[link_name] if len(links) >= number + 1: link_uuid = links[number] raw_data = link_data[link_uuid] else: list_row[index] = None continue else: list_row[index] = None continue if "icd_name$" in form_var: fields = form_var.split("$") if len(fields) > 2: field = fields[1] else: field = "icd_code" if raw_data[field] in icd_code_to_name[form_var]: list_row[index] = icd_code_to_name[form_var][raw_data[ field]] else: list_row[index] = None elif form_var == "clinic": list_row[index] = locs[r[0].clinic].name elif form_var == "region": list_row[index] = locs[r[0].region].name elif form_var == "zone": list_row[index] = locs[r[0].zone].name elif form_var == "district": if r[0].district: list_row[index] = locs[r[0].district].name else: list_row[index] = None elif "$year" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = dates[field].year else: list_row[index] = None elif "$month" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = dates[field].month else: list_row[index] = None elif "$day" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = dates[field].day else: list_row[index] = None elif "$quarter" in form_var: field = form_var.split("$")[0] if raw_data.get(field): if field not in dates: dates[field] = parse(raw_data[field]) quarter = 1 + (dates[field].month - 1)//3 list_row[index] = quarter else: list_row[index] = None elif "$epi_week" in form_var: field = form_var.split("$")[0] if field in raw_data and raw_data[field]: if field not in dates: dates[field] = parse(raw_data[field]) list_row[index] = epi_week_for_date(dates[field])[1] else: list_row[index] = None # A general framework for referencing links in the # download data. # link$<link id>$<linked form field> elif "gen_link$" in form_var: link = form_var.split("$")[1] link_index = link_id_index[link] if r[link_index]: list_row[index] = r[link_index].get( form_var.split("$")[2], None ) else: list_row[index] = None elif "code" == form_var.split("$")[0]: # code$cod_1,cod_2,Text_1,Text_2$default_value split = form_var.split("$") codes = split[1].split(",") text = split[2].split(",") if len(split) > 3: default_value = split[3] else: default_value = None final_text = [] for j in range(len(codes)): if codes[j] in r[0].variables: final_text.append(text[j]) if len(final_text) > 0: list_row[index] = " ".join(final_text) else: list_row[index] = default_value elif "category" == form_var.split("$")[0]: list_row[index] = _list_category_variables( form_var.split("$")[1], r ) elif "code_value" == form_var.split("$")[0]: code = form_var.split("$")[1] if code in r[0].variables: list_row[index] = float(r[0].variables[code]) else: list_row[index] = None elif "value" == form_var.split(":")[0]: list_row[index] = form_var.split(":")[1] elif "$to_columns$" in form_var: int_has_code = 0 field = form_var.split("$")[0] codes = form_var.split("$")[-1].split(",") str_elements = raw_data.get(field) if type(str_elements) == str: elements = str_elements.split(" ") has_code = any(code in elements for code in codes) int_has_code = int(has_code) list_row[index] = int_has_code else: if form_var.split("$")[0] in raw_data: list_row[index] = raw_data[form_var.split("$")[0]] else: list_row[index] = None # Standardise date formating if "$date" in form_var: field = form_var.split("$")[0] if list_row[index]: if field not in dates: dates[field] = parse(list_row[index]) list_row[index] = dates[field].strftime( "%d/%m/%Y" ) else: list_row[index] = None # If the final value is a float, round to 2 dp. # This proceedure ensures integers are shown as integers. # Also accepts string values. try: a = float(list_row[index]) b = int(float(list_row[index])) if a == b: list_row[index] = b else: list_row[index] = round(a, 2) except (ValueError, TypeError): pass # If a translation dictionary is defined in which the key exists... if min_translation and k in min_translation and list_row[index]: tr_dict = min_translation[k] if list_row[index] in tr_dict: list_row[index] = tr_dict[list_row[index]] else: parts = [x.strip() for x in str(list_row[index]).split(' ')] for x in range(len(parts)): # Get the translation using the appropriate key. # If that doesn't exist get the wild card key: * # If that doesn't exist just return the value parts[x] = str( tr_dict.get(parts[x], tr_dict.get('*', parts[x])) ) list_row[index] = ' '.join(list(filter(bool, parts))) if translation_dir and language != "en" and list_row[index]: list_row[index] = t.gettext(list_row[index]) list_rows.append(list_row) # Can write row immediately to xls file as memory is flushed after. write_xls_row(list_row, i + 1, xls_sheet) # Append the row to list of rows to be written to csv. if i % 1000 == 0: logging.warning("{} rows completed...".format(i)) csv_writer.writerows(list_rows) list_rows = [] status.status = i / total_number session.commit() i += 1 csv_writer.writerows(list_rows) csv_content.close() xls_book.close() xls_content.close() status.status = 1 status.success = 1 session.commit() if query_links: link_data.close() dir_path = os.path.dirname(os.path.realpath(__file__)) filename = dir_path + "/exported_data/" + uuid logging.warning("Filename: " + filename) if os.path.exists(filename + ".dir"): os.remove(filename + ".dir") if os.path.exists(filename + ".dat"): os.remove(filename + ".dat") return True