def _diff_dicts(old_dict, new_dict): old_default = None new_default = None if isinstance(new_dict, dict): new = flatten(new_dict, ".") else: new = defaultdict(lambda: "not a dict") new_default = "unable to parse" if isinstance(old_dict, dict): old = flatten(old_dict, ".") else: old = defaultdict(lambda: "not a dict") old_default = "unable to parse" res = defaultdict(dict) xpaths = set(old.keys()) xpaths.update(set(new.keys())) for xpath in xpaths: old_val = old.get(xpath, old_default) new_val = new.get(xpath, new_default) val_diff = _diff_vals(old_val, new_val) if val_diff: res[xpath] = val_diff return dict(res)
def gen_invoices_function(sales_data, sales_type, invoice_term, gen_json, file_name): invoice_list = [] if not sales_type == "b2cs": for sales in sales_data[sales_type]: current_supplier = sales.copy() current_supplier.pop(invoice_term) for invoice in sales[invoice_term]: current_invoice = invoice.copy() if len(invoice["itms"]) > 1: current_invoice.pop("itms") for current_inv_item in invoice["itms"]: new_invoice = current_invoice.copy() new_invoice["itms"] = [current_inv_item] flattened_inv = flatten(new_invoice) invoice_list.append({ **current_supplier, **flattened_inv }) else: flattened_inv = flatten(invoice) invoice_list.append({**current_supplier, **flattened_inv}) if gen_json: with open(file_name, mode="w") as b2b_sales_data: json.dump(invoice_list, b2b_sales_data) else: for invoice in sales_data[sales_type]: flattened_invoice = flatten(invoice) invoice_list.append(flattened_invoice) return invoice_list
def parse(self): """Parse the file at given url and returns the generated csv file path Returns: a list of strings. Each string is the output of CSV file content """ r = requests.get(self.url) try: data = r.json() except JSONDecodeError: raise Exception("Sorry, url didn't return json") except: raise Exception("Something went wrong") if self.json_format == 'flat': if isinstance(data, list): for i, d in enumerate(data): data[i] = flatten(d) else: data = flatten(data) df = pd.json_normalize(data) ret = [] ret.append(df.to_csv()) return ret
def obtener_vacante(token,vacante='all'): if vacante == 'all': URL_vacante = "https://api.hiringroom.com/v0/vacancies" params_vacancies={ "page":0, "pageSize":100, "token":token, } request_vacancies = requests.get(url=URL_vacante,params=params_vacancies) data_vacancies = request_vacancies.json() # Listado de Vacantes dict_flattened_vacancies = (flatten(record, '.') for record in data_vacancies['vacantes']) df_vacancies = pd.DataFrame(dict_flattened_vacancies) else: URL_vacante = "https://api.hiringroom.com/v0/vacancies/"+vacante params_vacancies={ #"page":0, #"pageSize":100, "token":token, } request_vacancies = requests.get(url=URL_vacante,params=params_vacancies) data_vacancies = request_vacancies.json() # Listado de Vacantes dict_flattened_vacancies = flatten(data_vacancies['vacante'], '.') df_vacancies = pd.DataFrame.from_records([dict_flattened_vacancies]) df_vacancies.rename(columns={'id':'idVacante','nombre':'nombre_vacante'},inplace=True) #id_vacante df_vacancies = df_vacancies[['idVacante','nombre_vacante','estadoActual','client.compañia','fechaCreacion', 'ubicacion.pais', 'ubicacion.provincia', 'ubicacion.ciudad' ]] return df_vacancies
def _get_data(self, table, project_id, flat=False, info_only=False): """Get the data for a given table.""" repo, query = self.repositories[table] data = getattr(repo, query)(project_id=project_id) ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS') if info_only: if flat: tmp = [] for row in data: inf = row.dictize()['info'] inf = self._clean_ignore_keys(inf, ignore_keys, info_only) if inf and type(inf) == dict: tmp.append(flatten(inf)) else: tmp.append({'info': inf}) else: tmp = [] for row in data: if row.dictize()['info']: tmp.append(row.dictize()['info']) else: tmp.append({}) else: if flat: tmp = [] for row in data: cleaned = self._clean_ignore_keys(row.dictize(), ignore_keys, info_only) tmp.append(flatten(cleaned)) else: tmp = [row.dictize() for row in data] return tmp
def _get_data(self, table, project_id, flat=False, info_only=False): """Get the data for a given table.""" repo, query = self.repositories[table] data = getattr(repo, query)(project_id=project_id) if info_only: if flat: tmp = [] for row in data: inf = row.dictize()['info'] if inf and type(inf) == dict: tmp.append(flatten(inf)) else: tmp.append({'info': inf}) else: tmp = [] for row in data: if row.dictize()['info']: tmp.append(row.dictize()['info']) else: tmp.append({}) else: if flat: tmp = [flatten(row.dictize()) for row in data] else: tmp = [row.dictize() for row in data] return tmp
def get_events(): try: eventService = client['SoftLayer_Notification_Occurrence_Event'] eventMask = config.eventMask eventFilterEndDate = config.eventFilterEndDate eventFilterNoEndDate = config.eventFilterNoEndDate resultEndDate = eventService.getAllObjects(mask=eventMask, filter=eventFilterEndDate) resultNoEndDate = eventService.getAllObjects( mask=eventMask, filter=eventFilterNoEndDate) for event in resultEndDate: eventInfo = flatten(event) keys = extractKeys(eventInfo) values = [] insertSql = generateInsertSql(eventInfo, 'sl_events', values, doMap=False) cursor.execute(insertSql, values) for event in resultNoEndDate: eventInfo = flatten(event) keys = extractKeys(eventInfo) values = [] insertSql = generateInsertSql(eventInfo, 'sl_events', values, doMap=False) cursor.execute(insertSql, values) except SoftLayer.SoftLayerAPIError as e: print("Error. %s" % e)
def flatten(x, name=''): if type(x) is dict: for a in x: flatten(x[a], name + a + '_') elif type(x) is list: i = 0 for a in x: flatten(a, name + str(i) + '_') i += 1 else: out[name[:-1]] = x
def main(self): global k k = 0 while True: logger = logging.getLogger(__name__) # consume from Queue routingConsume = 'bc' consumeRabbitMDM = ConsumeRabbitMQ() logger.info('First') global json_data json_data = consumeRabbitMDM.startConsuming(routingConsume) logger.info('Second') # print(json_data) logger.info('Third') taskOne = Task_JsonCsv() if k < 11: data = json.loads(json_data) x = flatten(data) firstkey = list(x.keys())[0] # taskOne = Task_JsonCsv() if firstkey == 'parkingAreaOccupancy': taskOne.JSONtoCsvArea(**x) logger.info('Stored data in csvArea') else: taskOne.JSONtoCsvFacility(**x) logger.info('Stored data in csvFacility') k = k + 1 print(k) else: data = json.loads(json_data) x = flatten(data) firstkey = list(x.keys())[0] # taskOne = Task_JsonCsv() if firstkey == 'parkingAreaOccupancy': taskOne.JSONtoCsvArea(**x) logger.info('Stored data in csvArea') else: taskOne.JSONtoCsvFacility(**x) logger.info('Stored data in csvFacility') routingPublish = 'cd' i = taskOne.getCsv(filenameArea) j = taskOne.getCsv(filenameFacility) print('GetCsv was executed') print(i) print(j) pushRabbitMDM = PublishRabbitMQ() pushRabbitMDM.startImport(i, routingPublish) pushRabbitMDM.startImport(j, routingPublish) print('Csv was pushed to Queue') k = k + 1 print(k)
def _get_data(self, table, project_id, flat=False, info_only=False): """Get the data for a given table.""" repo, query = self.repositories[table] data = getattr(repo, query)(project_id=project_id) ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS') or [] if info_only: if flat: tmp = [] for row in data: inf = row.dictize()['info'] if inf and type(inf) == dict: tmp.append(flatten(inf, root_keys_to_ignore=ignore_keys)) elif inf and type(inf) == list: for datum in inf: tmp.append(flatten(datum, root_keys_to_ignore=ignore_keys)) else: tmp.append({'info': inf}) else: tmp = [] for row in data: if row.dictize()['info']: tmp.append(row.dictize()['info']) else: tmp.append({}) else: if flat: tmp = [] for row in data: cleaned = row.dictize() fav_user_ids = None task_run_ids = None if cleaned.get('fav_user_ids'): fav_user_ids = cleaned['fav_user_ids'] cleaned.pop('fav_user_ids') if cleaned.get('task_run_ids'): task_run_ids = cleaned['task_run_ids'] cleaned.pop('task_run_ids') cleaned = flatten(cleaned, root_keys_to_ignore=ignore_keys) if fav_user_ids: cleaned['fav_user_ids'] = fav_user_ids if task_run_ids: cleaned['task_run_ids'] = task_run_ids tmp.append(cleaned) else: tmp = [row.dictize() for row in data] return tmp
def get_inventory(org): # Getting servers and vsis try: servers = accountClient.getHardware(mask=detailMaskBms) keys = dict() for server in servers: bmDetails = server try: monthly_fee = calculateMonthlyFee('SoftLayer_Hardware', server['id']) except: monthly_fee = 0.0 bmDetails['type'] = 'BM' serverInfo = flatten(server) print serverInfo bmInfo = flatten(bmDetails) keys = extractKeys(bmInfo, keys) values = [] insertSql = generateInsertSql(bmInfo, 'sl_servers', values) # print monthly_fee # print "%s" % (bmInfo) # print "%s" % (insertSql) cursor.execute(insertSql, values) cursor.execute( "update `sl_servers` set `monthly_cost` = %s where `id` = %s", [monthly_fee, server['id']]) except SoftLayer.SoftLayerAPIError as e: print("Error. %s" % e) try: vsis = accountClient.getVirtualGuests(mask=detailMaskVsi) keys = dict() for vsi in vsis: vmDetails = vsi try: monthly_fee = calculateMonthlyFee('SoftLayer_Virtual_Guest', vsi['id']) except: monthly_fee = '' vmDetails['type'] = 'VM' vsiInfo = flatten(vsi) vmInfo = flatten(vmDetails) keys = extractKeys(vmInfo, keys) values = [] insertSql = generateInsertSql(vmInfo, 'sl_servers', values) cursor.execute(insertSql, values) cursor.execute( "update `sl_servers` set `monthly_cost` = %s where `id` = %s", [monthly_fee, vsi['id']]) print "%s" % (insertSql) print "%s" % (vmInfo) except SoftLayer.SoftLayerAPIError as e: print("Error. %s" % e)
def flatten( x: (list, dict, str), name: str = '', exclude=exclude): if type(x) is dict: for a in x: if a not in exclude: flatten(x[a], f'{name}{a}{sep}') elif type(x) is list: i = 0 for a in x: flatten(a, f'{name}{i}{sep}') i += 1 else: out[name[:-1]] = x
def format_results(self, result_list, results, return_obj): """ Formatting the results :param result_list: list :param results: list, results :param return_obj: dict :return: dict """ for record in results: record_dict = dict() for data in record: record_dict[data['field']] = data['value'] if 'source' in record_dict.keys( ) and record_dict['source'] == 'aws.guardduty': json_message = record_dict['@message'] data = json.loads(json_message) flatten_results = flatten(data) flatten_results = { k: v for k, v in flatten_results.items() if v != "" and v != {} } if flatten_results.get( 'detail_service_action_actionType') is None: continue if flatten_results.get( 'detail_service_action_networkConnectionAction_protocol' ) == 'Unknown': continue guardduty_results = self.process_flatten_guardduty_results( flatten_results) guardduty_results['guardduty'].update( {'@timestamp': record_dict['@timestamp']}) guardduty_results['guardduty'].update({'event_count': 1}) result_list.append(guardduty_results) elif 'source' not in record_dict.keys(): vpc_dict = dict() vpc_dict['vpcflow'] = copy.deepcopy(record_dict) vpc_dict['vpcflow']['protocol'] = self.get_protocol( vpc_dict['vpcflow']['protocol']) vpc_dict['vpcflow']['event_count'] = 1 result_list.append(vpc_dict) else: json_message = record_dict['@message'] data = json.loads(json_message) flatten_results = flatten(data) result_list.append(flatten_results) return_obj['data'] = result_list
def get_from_json(self, coin_json: dict): """ Creates a Coin model entit from a json (dict). Parameters ---------- coin_json (json): json descrioption of coin as returned by Numista """ result = self.model(numistaId=coin_json['id'], title=coin_json['title']) if 'country' in coin_json: cntry, _ = Country.objects.get_or_create( code=coin_json['country']['code'], defaults=coin_json['country']) result.country = cntry if 'value' in coin_json and 'currency' in coin_json['value']: curr, _ = Currency.objects.get_or_create( numistaId=coin_json['value']['currency']['id'], defaults=coin_json['value']['currency']) result.value_currency = curr flat_obj = flatten(coin_json) for f in self.model._meta.get_fields(): if f.name == 'id': pass elif (f.name in flat_obj) and (getattr(result, f.name) in f.empty_values): setattr(result, f.name, flat_obj[f.name]) elif hasattr(f, 'json_id') and (f.json_id in flat_obj) and (getattr( result, f.name) in f.empty_values): setattr(result, f.name, flat_obj[f.json_id]) return result
def decorateFindingsWithObjects(self, data, mapping_overriden): for finding in data: flattened_finding = flatten(finding) self.regexAndDecorateWithStdObjects( flattened_finding, finding, r'((?:[\da-fA-F]{2}[:\-]){5}[\da-fA-F]{2})', "mac-addr", mapping_overriden) self.regexAndDecorateWithStdObjects(flattened_finding, finding, r'[0-9]+(?:\.[0-9]+){3}', "ipv4address", mapping_overriden) self.regexAndDecorateWithStdObjects( flattened_finding, finding, r'(?<![:.\w])(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}(?![:.\w])', "ipv6address", mapping_overriden) self.regexAndDecorateWithStdObjects( flattened_finding, finding, r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", "email", mapping_overriden) self.regexAndDecorateWithStdObjects( flattened_finding, finding, r"(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]", "domain-name", mapping_overriden) self.regexAndDecorateWithStdObjects(flattened_finding, finding, r'(https?://\S+)', "url", mapping_overriden) self.regexAndDecorateWithStdObjects( flattened_finding, finding, r'([~!@#$%^&*()\-_+={}\[\]|\\:;\"`\'<>.\?\w]+\.[a-z,A-Z][\w]+|[\w]+\.[a-z,A-Z][\W]+|\.[a-z,A-Z][\w]+)', "file", mapping_overriden) self.customFunctionAndDecorateWithStdObjects( flattened_finding, finding, "directory", mapping_overriden)
def outputManager(self, context, output, key, bucket): if len(output) == 0 or output is None: if self.total_output_files == 0: logging.info("No records pulled from Hubspot.") downstream_tasks = context['task'].get_flat_relatives(upstream=False) logging.info('Skipping downstream tasks...') logging.debug("Downstream task_ids %s", downstream_tasks) if downstream_tasks: self.skip(context['dag_run'], context['ti'].execution_date, downstream_tasks) else: logging.info('Logging {0} to S3...'.format(key)) output = [flatten(e) for e in output] output = '\n'.join([json.dumps({boa.constrict(k): v for k, v in i.items()}) for i in output]) s3 = S3Hook(self.s3_conn_id) s3.load_string( string_data=str(output), key=key, bucket_name=bucket, replace=True ) s3.connection.close() self.total_output_files += 1
def test_list_and_dict(self): dic = { 'a': 1, 'b': 2, 'c': [{ 'd': [2, 3, 4], 'e': [{ 'f': 1, 'g': 2 }] }] } expected = { 'a': 1, 'b': 2, 'c_0_d_0': 2, 'c_0_d_1': 3, 'c_0_d_2': 4, 'c_0_e_0_f': 1, 'c_0_e_0_g': 2 } actual = flatten(dic) self.assertEqual(actual, expected) expected_keys = { 'a', 'b', 'c_0_d_0', 'c_0_d_1', 'c_0_d_2', 'c_0_e_0_f', 'c_0_e_0_g' } actual_keys = flatten_keys(dic) assertCountEqual(self, actual_keys, expected_keys)
def test_flatten_filter_func(self): dic = { 'a': { 'a': [1, 2, 3] }, 'b': { 'b': 'foo', 'c': 'bar' }, 'c': { 'c': [{ 'foo': 5, 'bar': 6, 'baz': [1, 2, 3] }] } } expected = { 'c_c_0_baz_0': 1, 'c_c_0_baz_1': 2, 'c_c_0_baz_2': 3, 'c_c_0_foo': 5, 'c_c_0_bar': 6 } filter_func = lambda key: key.startswith('c_c_0_') actual = flatten(dic, filter_func=filter_func) self.assertEqual(actual, expected)
def test_flatten_filter_ignore_keys(self): dic = { 'a': { 'a': [1, 2, 3] }, 'b': { 'b': 'foo', 'c': 'bar' }, 'c': { 'c': [{ 'foo': 5, 'bar': 6, 'baz': [1, 2, 3] }] } } expected = { 'a_a_0': 1, 'a_a_1': 2, 'a_a_2': 3, 'b_b': 'foo', 'b_c': 'bar', 'c_c_0_foo': 5, 'c_c_0_bar': 6 } keys_to_ignore = {'c_c_0_baz_0', 'c_c_0_baz_1', 'c_c_0_baz_2'} actual = flatten(dic, keys_to_ignore=keys_to_ignore) self.assertEqual(actual, expected)
def test_unflatten_with_list_issue15(self): """https://github.com/amirziai/flatten/issues/15""" dic = { "Required": { "a": "1", "b": ["1", "2", "3"], "c": { "d": { "e": [[{ "s1": 1 }, { "s2": 2 }], [{ "s3": 1 }, { "s4": 2 }]] } }, "f": ["1", "2"] }, "Optional": { "x": "1", "y": ["1", "2", "3"] } } dic_flatten = flatten(dic) actual = unflatten_list(dic_flatten) self.assertEqual(actual, dic)
def test_blog_example(self): dic = { "a": 1, "b": 2, "c": [{ "d": ['2', 3, 4], "e": [{ "f": 1, "g": 2 }] }] } expected = { 'a': 1, 'b': 2, 'c_0_d_0': '2', 'c_0_d_1': 3, 'c_0_d_2': 4, 'c_0_e_0_f': 1, 'c_0_e_0_g': 2 } actual = flatten(dic) self.assertEqual(actual, expected) expected_keys = { 'a', 'b', 'c_0_d_0', 'c_0_d_1', 'c_0_d_2', 'c_0_e_0_f', 'c_0_e_0_g' } actual_keys = flatten_keys(dic) assertCountEqual(self, actual_keys, expected_keys)
def test_empty_list_and_dict(self): dic = { 'a': {}, 'b': [], 'c': '', 'd': None, 'e': [{ 'f': [], 'g': [{ 'h': {}, 'i': [], 'j': '', 'k': None }] }] } expected = { 'a': {}, 'b': [], 'c': '', 'd': None, 'e_0_f': [], 'e_0_g_0_h': {}, 'e_0_g_0_i': [], 'e_0_g_0_j': '', 'e_0_g_0_k': None } actual = flatten(dic) self.assertEqual(actual, expected) expected_keys = { 'a', 'b', 'c', 'd', 'e_0_f', 'e_0_g_0_h', 'e_0_g_0_i', 'e_0_g_0_j', 'e_0_g_0_k' } actual_keys = flatten_keys(dic) assertCountEqual(self, actual_keys, expected_keys)
def cleanse(self): print('Entrei no unfollowing mode') nonfollowers = [] # uma nova lista self.unfollow_me = [] unfollowing = 0 try: list_following(self) for unfollow in list_following(self): nonfollowers.append(unfollow) with urllib.request.urlopen( "https://www.instagram.com/{}/?__a=1".format( unfollow)) as url: data = json.loads(url.read().decode()) flat_list = flatten(data) # checar se já é nosso seguidor if not flat_list.get("graphql_user_follows_viewer") == False: self.unfollow_me.append(unfollow) unfollowing += 1 print("Deixarei de seguir: {}".format(unfollowing)) finally: print("Deixaremos de seguir : {}".format(len(self.unfollow_me), self.unfollow_me)) celebs_to_keep(self)
def outputManager(self, hook, output, key, bucket): """ This method handles the output of the data. """ if self.total_output_files == 0: logging.info("No records pulled.") if self.skip_if_null: downstream_tasks = self.context['task'].get_flat_relatives( upstream=False) logging.info('Skipping downstream tasks...') logging.debug("Downstream task_ids %s", downstream_tasks) if downstream_tasks: self.skip(self.context['dag_run'], self.context['ti'].execution_date, downstream_tasks) else: logging.info('Logging {0} to ...'.format(key)) output = [flatten(e) for e in output] output = '\n'.join([ json.dumps({boa.constrict(k): v for k, v in i.items()}) for i in output ]) if self.cs_type == 's3': hook.load_string(string_data=str(output), key=key, bucket_name=bucket, replace=True) hook.connection.close() self.total_output_files += 1
def File_Results_Window(True_Path): I = 0 Raw_Data = DATA.get_data(True_Path) # I slightly modified the flatten libary to make my program format easier, # it may not work for all users and I may need to copy over what i modified into my code so that it will work for all users Refined_Data = flatten(Raw_Data[I], separator="[", replace_separators="]") _jwp_object_name = Refined_Data["_jwp_object_name"] """ I commented these out beause these are important, but i have not yet found a proper use for them _jwp_object_name = Refined_Data["_jwp_object_name"] _jwp_arr_idx = Refined_Data["_jwp_arr_idx"] _jwp_is_asset = Refined_Data["_jwp_is_asset"] export_type = Refined_Data["export_type"] _jwp_export_idx = Refined_Data["_jwp_export_idx"] _apoc_data_ver = Refined_Data["_apoc_data_ver"] """ obj_name = str( True_Path + "." + _jwp_object_name + ", " ) # This will allow the users to copy the object path into the program so that they have an easier time moddign while I < len(Raw_Data): for key, value in Refined_Data.items(): attr_name = str(key) + " : " + str(value) File_Results_List.append(obj_name + attr_name) File_Results_List.append("\n") I += 1 ListBoxWindow(2)
def load_training_data(all_users=True): """ Pulls the historical ratings from the database into the ML library :param all_users: Optional Boolean flag to train on all users or the current user :return: train_data, test_data split """ if all_users: user = None filename = "data_all.txt" else: user = util.get_user() filename = "data_" + user + ".txt" # write the latest database value # TODO: optimize to load direct from database (look into connect_odbc()) json_string = models.get_user_history(user, True) user_history_json = json.loads(json_string) file_content = [] for history_dict in user_history_json: file_content.append(flatten_json.flatten(history_dict)) with open(filename, "w") as outfile: json.dump(file_content, outfile) # load training data into graphlab training_data = gl.SFrame.read_json(url=filename, orient="records") # kill temporary file now that data is loaded # TODO: when the recommender trains once daily, don't delete the file try: os.remove(filename) except OSError: pass return training_data.random_split(.8)
def post(self): """ Creates kafka event from json object :return: response code of post request to Kafka REST Proxy """ now = round(time.time() * 1000) # flatten json so nested attributes can be used in KSQL analysis flat_json_object = flatten_json.flatten(json.loads(request.data)) # extract timestamp and add it to json segment_timestamp = segment_timestamp_to_unix_millis( flat_json_object.get("timestamp")) flat_json_object["segment_timestamp"] = segment_timestamp flat_json_object['ingest_timestamp'] = now # determine topic to send event topic = ''.join(c for c in str( flat_json_object.get("type") + flat_json_object.get("event")) if c.isalnum()) + "_00_raw_flatJSON" # extract key, if there is a registered attribute that should serve as key for this event key = self.get_key(flat_json_object) self.p.send(topic=topic, key=str(key).encode('utf-8'), value=flat_json_object) self.p.flush() return 200
def show_metrics( metrics, all_branches=False, all_tags=False, all_commits=False ): from flatten_json import flatten from dvc.utils.diff import format_dict # When `metrics` contains a `None` key, it means that some files # specified as `targets` in `repo.metrics.show` didn't contain any metrics. missing = metrics.pop(None, None) for branch, val in metrics.items(): if all_branches or all_tags or all_commits: logger.info(f"{branch}:") for fname, metric in val.items(): if not isinstance(metric, dict): logger.info("\t{}: {}".format(fname, str(metric))) continue logger.info(f"\t{fname}:") for key, value in flatten(format_dict(metric), ".").items(): logger.info(f"\t\t{key}: {value}") if missing: raise BadMetricError(missing)
def lambda_handler(event, context): ruta = getprefix() # Sólo hay un fichero por carpeta del bucket: file = client.list_objects(Bucket=bucketname, Prefix=ruta).get('Contents', [])[0]['Key'] # Descargar objeto para editarlo: bytes_buffer = io.BytesIO() client.download_fileobj(Bucket=bucketname, Key=file, Fileobj=bytes_buffer) byte_value = bytes_buffer.getvalue() str_value = byte_value.decode() # Reemplazar substring }{ -> Cada tweet en una nueva línea repStr = re.sub("}{", "} \n{", str_value) separados = re.split('\n', repStr) texto = "" for i in range(0, len(separados)): d = json.loads(separados[i]) d = flatten(d) texto = texto + json.dumps(d) + "\n" # Cargar objeto en el bucket de destino newobj = s3.Object(destbucket, file) newobj.put(Body=texto.encode('ascii')) return { 'statusCode': 200, }
def flatten_result(self, results, service_type): """ Flattening the result response :param results: list, results :return: list, flattened and empty values removed """ flatten_results = [] private_ip_address_key = 'resource#instancedetails#networkinterfaces#0#privateipaddress' action_type_key = 'service#action#actiontype' for obj in results: for key, value in obj.items(): try: obj[key] = json.loads(value) except ValueError: pass flatten_obj = flatten(obj, '#') if service_type == 'vpcflow': flatten_obj.update({'name': 'VPC flow log'}) temp = flatten_obj.get("action") flatten_obj["action"] = "network-traffic-" + temp.lower() if 'id' in flatten_obj: flatten_obj['finding_id'] = flatten_obj.pop('id') # Formatting to differentiate common key available in different action types for to STIX mapping if private_ip_address_key in flatten_obj and flatten_obj[action_type_key] == 'PORT_PROBE': flatten_obj['portprobe#'+private_ip_address_key] = flatten_obj.pop(private_ip_address_key) elif private_ip_address_key in flatten_obj and flatten_obj[action_type_key] == 'DNS_REQUEST': flatten_obj['dnsrequest#'+private_ip_address_key] = flatten_obj.pop(private_ip_address_key) flatten_results.append(flatten_obj) # Remove null values and empty objects from response flatten_result_cleansed = self.format_flatten_result(flatten_results) return flatten_result_cleansed
def create(landingZoneWANDetails): """ This function creates a new landingZoneWAN in the landingZoneWAN structure based on the passed in landingZoneWAN data :param landingZoneWAN: landingZoneWAN to create in landingZoneWAN list :return: 201 on success, 406 on landingZoneWAN exists """ # we don't need the id, the is generated automatically on the database if ('id' in landingZoneWANDetails): del landingZoneWANDetails["id"] # flatten the python object into a python dictionary flattened_landingZoneWAN = flatten(landingZoneWANDetails, delimiter) schema = LandingZoneWANSchema(many=False) new_landingZoneWAN = schema.load(flattened_landingZoneWAN, session=db.session) # Save python object to the database db.session.add(new_landingZoneWAN) db.session.commit() idSchema = IdSchema(many=False) data = idSchema.dump(new_landingZoneWAN) app.logger.debug("landingZoneWAN data:") app.logger.debug(pformat(data)) return data, 201
def test_custom_separator(self): dic = {'a': '1', 'b': '2', 'c': {'c1': '3', 'c2': '4'} } expected = {'a': '1', 'b': '2', 'c*c1': '3', 'c*c2': '4'} actual = flatten(dic, '*') self.assertEqual(actual, expected)
def test_empty_tuple(self): dic = { 'a': 1, 'b': ({'c': ()},) } expected = {'a': 1, 'b_0_c': ()} actual = flatten(dic) self.assertEqual(actual, expected)
def test_tuple(self): dic = { 'a': 1, 'b': ({'c': (2, 3)},) } expected = {'a': 1, 'b_0_c_0': 2, 'b_0_c_1': 3} actual = flatten(dic) self.assertEqual(actual, expected)
def test_list(self): dic = { 'a': 1, 'b': [{'c': [2, 3]}] } expected = {'a': 1, 'b_0_c_0': 2, 'b_0_c_1': 3} actual = flatten(dic) self.assertEqual(actual, expected)
def test_one_flatten_utf8(self): dic = {'a': '1', u'ñ': u'áéö', 'c': {u'c1': '3', 'c2': '4'} } expected = {'a': '1', u'ñ': u'áéö', 'c_c1': '3', 'c_c2': '4'} actual = flatten(dic) self.assertEqual(actual, expected)
def test_one_flatten(self): dic = {'a': '1', 'b': '2', 'c': {'c1': '3', 'c2': '4'} } expected = {'a': '1', 'b': '2', 'c_c1': '3', 'c_c2': '4'} actual = flatten(dic) self.assertEqual(actual, expected)
def test_unflatten_with_list_issue31(self): """https://github.com/amirziai/flatten/issues/31""" dic = {"testdict": {"seconddict": [["firstvalue", "secondvalue"], ["thirdvalue", "fourthvalue"]]}} dic_flatten = flatten(dic) actual = unflatten_list(dic_flatten) self.assertEqual(actual, dic)
def test_list_and_dict(self): dic = { 'a': 1, 'b': 2, 'c': [{'d': [2, 3, 4], 'e': [{'f': 1, 'g': 2}]}] } expected = {'a': 1, 'b': 2, 'c_0_d_0': 2, 'c_0_d_1': 3, 'c_0_d_2': 4, 'c_0_e_0_f': 1, 'c_0_e_0_g': 2} actual = flatten(dic) self.assertEqual(actual, expected)
def test_unflatten_with_list_issue15(self): """https://github.com/amirziai/flatten/issues/15""" dic = {"Required": {"a": "1", "b": ["1", "2", "3"], "c": {"d": {"e": [[{"s1": 1}, {"s2": 2}], [{"s3": 1}, {"s4": 2}]]}}, "f": ["1", "2"]}, "Optional": {"x": "1", "y": ["1", "2", "3"]}} dic_flatten = flatten(dic) actual = unflatten_list(dic_flatten) self.assertEqual(actual, dic)
def test_blog_example(self): dic = { "a": 1, "b": 2, "c": [{"d": ['2', 3, 4], "e": [{"f": 1, "g": 2}]}] } expected = {'a': 1, 'b': 2, 'c_0_d_0': '2', 'c_0_d_1': 3, 'c_0_d_2': 4, 'c_0_e_0_f': 1, 'c_0_e_0_g': 2} actual = flatten(dic) self.assertEqual(actual, expected)
def test_empty_list_and_dict(self): dic = { 'a': {}, 'b': [], 'c': '', 'd': None, 'e': [{'f': [], 'g': [{'h': {}, 'i': [], 'j': '', 'k': None}]}] } expected = {'a': {}, 'b': [], 'c': '', 'd': None, 'e_0_f': [], 'e_0_g_0_h': {}, 'e_0_g_0_i': [], 'e_0_g_0_j': '', 'e_0_g_0_k': None} actual = flatten(dic) self.assertEqual(actual, expected)
def test_flatten_ignore_keys(self): """Ignore a set of root keys for processing""" dic = { 'a': {'a': [1, 2, 3]}, 'b': {'b': 'foo', 'c': 'bar'}, 'c': {'c': [{'foo': 5, 'bar': 6, 'baz': [1, 2, 3]}]} } expected = { 'a_a_0': 1, 'a_a_1': 2, 'a_a_2': 3 } actual = flatten(dic, root_keys_to_ignore={'b', 'c'}) self.assertEqual(actual, expected)
def test_unflatten_with_list_nested(self): dic = {"a": [[{"b": 1}], [{"d": 1}]]} dic_flatten = flatten(dic) actual = unflatten_list(dic_flatten) self.assertEqual(actual, dic)
def _get_data(self, table, project_id, flat=False, info_only=False): """Get the data for a given table.""" repo, query = self.repositories[table] data = getattr(repo, query)(project_id=project_id) ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS') or [] if table == 'task': csv_export_key = current_app.config.get('TASK_CSV_EXPORT_INFO_KEY') if table == 'task_run': csv_export_key = current_app.config.get('TASK_RUN_CSV_EXPORT_INFO_KEY') if table == 'result': csv_export_key = current_app.config.get('RESULT_CSV_EXPORT_INFO_KEY') if info_only: if flat: tmp = [] for row in data: inf = copy.deepcopy(row.dictize()['info']) if inf and type(inf) == dict and csv_export_key and inf.get(csv_export_key): inf = inf[csv_export_key] new_key = '%s_id' % table if inf and type(inf) == dict: inf[new_key] = row.id tmp.append(flatten(inf, root_keys_to_ignore=ignore_keys)) elif inf and type(inf) == list: for datum in inf: if type(datum) == dict: datum[new_key] = row.id tmp.append(flatten(datum, root_keys_to_ignore=ignore_keys)) else: tmp = [] for row in data: if row.dictize()['info']: tmp.append(row.dictize()['info']) else: tmp.append({}) else: if flat: tmp = [] for row in data: cleaned = row.dictize() fav_user_ids = None task_run_ids = None if cleaned.get('fav_user_ids'): fav_user_ids = cleaned['fav_user_ids'] cleaned.pop('fav_user_ids') if cleaned.get('task_run_ids'): task_run_ids = cleaned['task_run_ids'] cleaned.pop('task_run_ids') cleaned = flatten(cleaned, root_keys_to_ignore=ignore_keys) if fav_user_ids: cleaned['fav_user_ids'] = fav_user_ids if task_run_ids: cleaned['task_run_ids'] = task_run_ids tmp.append(cleaned) else: tmp = [row.dictize() for row in data] return tmp
def test_unflatten_with_list_deep(self): dic = {'a': [ {'b': [{'c': [{'a': 5, 'b': {'a': [1, 2, 3]}, 'c': {'x': 3}}]}]}]} dic_flatten = flatten(dic) actual = unflatten_list(dic_flatten) self.assertEqual(actual, dic)
def searchDictionaryKey(dict,key): flattenDict = flatten(dict,'#') for k in flattenDict: if key in k: print k
def test_one_flatten_utf8_dif(self): a = {u'eñe': 1} info = dict(info=a) expected = {u'info_{}'.format(u'eñe'): 1} actual = flatten(info) self.assertEqual(actual, expected)
def test_no_flatten(self): dic = {'a': '1', 'b': '2', 'c': 3} expected = dic actual = flatten(dic) self.assertEqual(actual, expected)