示例#1
0
def _diff_dicts(old_dict, new_dict):
    old_default = None
    new_default = None

    if isinstance(new_dict, dict):
        new = flatten(new_dict, ".")
    else:
        new = defaultdict(lambda: "not a dict")
        new_default = "unable to parse"

    if isinstance(old_dict, dict):
        old = flatten(old_dict, ".")
    else:
        old = defaultdict(lambda: "not a dict")
        old_default = "unable to parse"

    res = defaultdict(dict)

    xpaths = set(old.keys())
    xpaths.update(set(new.keys()))
    for xpath in xpaths:
        old_val = old.get(xpath, old_default)
        new_val = new.get(xpath, new_default)
        val_diff = _diff_vals(old_val, new_val)
        if val_diff:
            res[xpath] = val_diff
    return dict(res)
def gen_invoices_function(sales_data, sales_type, invoice_term, gen_json,
                          file_name):
    invoice_list = []
    if not sales_type == "b2cs":
        for sales in sales_data[sales_type]:
            current_supplier = sales.copy()
            current_supplier.pop(invoice_term)
            for invoice in sales[invoice_term]:
                current_invoice = invoice.copy()
                if len(invoice["itms"]) > 1:
                    current_invoice.pop("itms")
                    for current_inv_item in invoice["itms"]:
                        new_invoice = current_invoice.copy()
                        new_invoice["itms"] = [current_inv_item]
                        flattened_inv = flatten(new_invoice)
                        invoice_list.append({
                            **current_supplier,
                            **flattened_inv
                        })
                else:
                    flattened_inv = flatten(invoice)
                    invoice_list.append({**current_supplier, **flattened_inv})

        if gen_json:
            with open(file_name, mode="w") as b2b_sales_data:
                json.dump(invoice_list, b2b_sales_data)

    else:
        for invoice in sales_data[sales_type]:
            flattened_invoice = flatten(invoice)
            invoice_list.append(flattened_invoice)

    return invoice_list
示例#3
0
    def parse(self):
        """Parse the file at given url and returns the generated csv file path
        Returns:
            a list of strings. Each string is the output of CSV file content
        """

        r = requests.get(self.url)

        try:
            data = r.json()
        except JSONDecodeError:
            raise Exception("Sorry, url didn't return json")
        except:
            raise Exception("Something went wrong")

        if self.json_format == 'flat':
            if isinstance(data, list):
                for i, d in enumerate(data):
                    data[i] = flatten(d)
            else:
                data = flatten(data)

        df = pd.json_normalize(data)

        ret = []
        ret.append(df.to_csv())

        return ret
示例#4
0
def obtener_vacante(token,vacante='all'):
    
    if vacante == 'all':
        URL_vacante = "https://api.hiringroom.com/v0/vacancies"
        
        params_vacancies={
                            "page":0,
                            "pageSize":100,
                            "token":token,
                          }
        request_vacancies = requests.get(url=URL_vacante,params=params_vacancies)
        data_vacancies = request_vacancies.json() # Listado de Vacantes
        dict_flattened_vacancies = (flatten(record, '.') for record in data_vacancies['vacantes'])
        df_vacancies = pd.DataFrame(dict_flattened_vacancies)
        
    else:
        URL_vacante = "https://api.hiringroom.com/v0/vacancies/"+vacante
        
        params_vacancies={
                            #"page":0,
                            #"pageSize":100,
                            "token":token,
                          }
        
        request_vacancies = requests.get(url=URL_vacante,params=params_vacancies)
        data_vacancies = request_vacancies.json() # Listado de Vacantes
        dict_flattened_vacancies = flatten(data_vacancies['vacante'], '.')
        df_vacancies = pd.DataFrame.from_records([dict_flattened_vacancies])
    
    df_vacancies.rename(columns={'id':'idVacante','nombre':'nombre_vacante'},inplace=True) #id_vacante
    df_vacancies = df_vacancies[['idVacante','nombre_vacante','estadoActual','client.compañia','fechaCreacion',
                                 'ubicacion.pais', 'ubicacion.provincia', 'ubicacion.ciudad'
                                 ]]
    return df_vacancies
示例#5
0
    def _get_data(self, table, project_id, flat=False, info_only=False):
        """Get the data for a given table."""
        repo, query = self.repositories[table]
        data = getattr(repo, query)(project_id=project_id)
        ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS')
        if info_only:
            if flat:
                tmp = []
                for row in data:
                    inf = row.dictize()['info']
                    inf = self._clean_ignore_keys(inf, ignore_keys, info_only)

                    if inf and type(inf) == dict:
                        tmp.append(flatten(inf))
                    else:
                        tmp.append({'info': inf})
            else:
                tmp = []
                for row in data:
                    if row.dictize()['info']:
                        tmp.append(row.dictize()['info'])
                    else:
                        tmp.append({})
        else:
            if flat:
                tmp = []
                for row in data:
                    cleaned = self._clean_ignore_keys(row.dictize(),
                                                      ignore_keys, info_only)
                    tmp.append(flatten(cleaned))
            else:
                tmp = [row.dictize() for row in data]
        return tmp
示例#6
0
 def _get_data(self, table, project_id, flat=False, info_only=False):
     """Get the data for a given table."""
     repo, query = self.repositories[table]
     data = getattr(repo, query)(project_id=project_id)
     if info_only:
         if flat:
             tmp = []
             for row in data:
                 inf = row.dictize()['info']
                 if inf and type(inf) == dict:
                     tmp.append(flatten(inf))
                 else:
                     tmp.append({'info': inf})
         else:
             tmp = []
             for row in data:
                 if row.dictize()['info']:
                     tmp.append(row.dictize()['info'])
                 else:
                     tmp.append({})
     else:
         if flat:
             tmp = [flatten(row.dictize()) for row in data]
         else:
             tmp = [row.dictize() for row in data]
     return tmp
示例#7
0
def get_events():
    try:
        eventService = client['SoftLayer_Notification_Occurrence_Event']
        eventMask = config.eventMask
        eventFilterEndDate = config.eventFilterEndDate
        eventFilterNoEndDate = config.eventFilterNoEndDate
        resultEndDate = eventService.getAllObjects(mask=eventMask,
                                                   filter=eventFilterEndDate)
        resultNoEndDate = eventService.getAllObjects(
            mask=eventMask, filter=eventFilterNoEndDate)
        for event in resultEndDate:
            eventInfo = flatten(event)
            keys = extractKeys(eventInfo)
            values = []
            insertSql = generateInsertSql(eventInfo,
                                          'sl_events',
                                          values,
                                          doMap=False)
            cursor.execute(insertSql, values)
        for event in resultNoEndDate:
            eventInfo = flatten(event)
            keys = extractKeys(eventInfo)
            values = []
            insertSql = generateInsertSql(eventInfo,
                                          'sl_events',
                                          values,
                                          doMap=False)
            cursor.execute(insertSql, values)
    except SoftLayer.SoftLayerAPIError as e:
        print("Error. %s" % e)
示例#8
0
 def flatten(x, name=''):
     if type(x) is dict:
         for a in x:
             flatten(x[a], name + a + '_')
     elif type(x) is list:
         i = 0
         for a in x:
             flatten(a, name + str(i) + '_')
             i += 1
     else:
         out[name[:-1]] = x
示例#9
0
    def main(self):
        global k
        k = 0
        while True:
            logger = logging.getLogger(__name__)
            # consume from Queue
            routingConsume = 'bc'
            consumeRabbitMDM = ConsumeRabbitMQ()
            logger.info('First')
            global json_data
            json_data = consumeRabbitMDM.startConsuming(routingConsume)
            logger.info('Second')
            # print(json_data)
            logger.info('Third')
            taskOne = Task_JsonCsv()

            if k < 11:
                data = json.loads(json_data)
                x = flatten(data)
                firstkey = list(x.keys())[0]
                # taskOne = Task_JsonCsv()
                if firstkey == 'parkingAreaOccupancy':
                    taskOne.JSONtoCsvArea(**x)
                    logger.info('Stored data in csvArea')
                else:
                    taskOne.JSONtoCsvFacility(**x)
                    logger.info('Stored data in csvFacility')
                k = k + 1
                print(k)
            else:
                data = json.loads(json_data)
                x = flatten(data)
                firstkey = list(x.keys())[0]
                # taskOne = Task_JsonCsv()
                if firstkey == 'parkingAreaOccupancy':
                    taskOne.JSONtoCsvArea(**x)
                    logger.info('Stored data in csvArea')
                else:
                    taskOne.JSONtoCsvFacility(**x)
                    logger.info('Stored data in csvFacility')
                routingPublish = 'cd'
                i = taskOne.getCsv(filenameArea)
                j = taskOne.getCsv(filenameFacility)
                print('GetCsv was executed')
                print(i)
                print(j)
                pushRabbitMDM = PublishRabbitMQ()
                pushRabbitMDM.startImport(i, routingPublish)
                pushRabbitMDM.startImport(j, routingPublish)
                print('Csv was pushed to Queue')
                k = k + 1
                print(k)
示例#10
0
    def _get_data(self, table, project_id, flat=False, info_only=False):
        """Get the data for a given table."""
        repo, query = self.repositories[table]
        data = getattr(repo, query)(project_id=project_id)
        ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS') or []
        if info_only:
            if flat:
                tmp = []
                for row in data:
                    inf = row.dictize()['info']
                    if inf and type(inf) == dict:
                        tmp.append(flatten(inf,
                                           root_keys_to_ignore=ignore_keys))
                    elif inf and type(inf) == list:
                        for datum in inf:
                            tmp.append(flatten(datum,
                                               root_keys_to_ignore=ignore_keys))
                    else:
                        tmp.append({'info': inf})
            else:
                tmp = []
                for row in data:
                    if row.dictize()['info']:
                        tmp.append(row.dictize()['info'])
                    else:
                        tmp.append({})
        else:
            if flat:
                tmp = []
                for row in data:
                    cleaned = row.dictize()
                    fav_user_ids = None
                    task_run_ids = None
                    if cleaned.get('fav_user_ids'):
                        fav_user_ids = cleaned['fav_user_ids']
                        cleaned.pop('fav_user_ids')
                    if cleaned.get('task_run_ids'):
                        task_run_ids = cleaned['task_run_ids']
                        cleaned.pop('task_run_ids')

                    cleaned = flatten(cleaned,
                                      root_keys_to_ignore=ignore_keys)

                    if fav_user_ids:
                        cleaned['fav_user_ids'] = fav_user_ids
                    if task_run_ids:
                        cleaned['task_run_ids'] = task_run_ids

                    tmp.append(cleaned)
            else:
                tmp = [row.dictize() for row in data]
        return tmp
示例#11
0
def get_inventory(org):
    # Getting servers and vsis
    try:
        servers = accountClient.getHardware(mask=detailMaskBms)
        keys = dict()
        for server in servers:
            bmDetails = server
            try:
                monthly_fee = calculateMonthlyFee('SoftLayer_Hardware',
                                                  server['id'])
            except:
                monthly_fee = 0.0
            bmDetails['type'] = 'BM'
            serverInfo = flatten(server)
            print serverInfo
            bmInfo = flatten(bmDetails)
            keys = extractKeys(bmInfo, keys)
            values = []
            insertSql = generateInsertSql(bmInfo, 'sl_servers', values)
            # print monthly_fee
            # print "%s" % (bmInfo)
            # print "%s" % (insertSql)
            cursor.execute(insertSql, values)
            cursor.execute(
                "update `sl_servers` set `monthly_cost` = %s where `id` = %s",
                [monthly_fee, server['id']])
    except SoftLayer.SoftLayerAPIError as e:
        print("Error. %s" % e)
    try:
        vsis = accountClient.getVirtualGuests(mask=detailMaskVsi)
        keys = dict()
        for vsi in vsis:
            vmDetails = vsi
            try:
                monthly_fee = calculateMonthlyFee('SoftLayer_Virtual_Guest',
                                                  vsi['id'])
            except:
                monthly_fee = ''
            vmDetails['type'] = 'VM'
            vsiInfo = flatten(vsi)
            vmInfo = flatten(vmDetails)
            keys = extractKeys(vmInfo, keys)
            values = []
            insertSql = generateInsertSql(vmInfo, 'sl_servers', values)
            cursor.execute(insertSql, values)
            cursor.execute(
                "update `sl_servers` set `monthly_cost` = %s where `id` = %s",
                [monthly_fee, vsi['id']])
            print "%s" % (insertSql)
            print "%s" % (vmInfo)
    except SoftLayer.SoftLayerAPIError as e:
        print("Error. %s" % e)
示例#12
0
 def flatten(
         x: (list, dict, str), name: str = '', exclude=exclude):
     if type(x) is dict:
         for a in x:
             if a not in exclude:
                 flatten(x[a], f'{name}{a}{sep}')
     elif type(x) is list:
         i = 0
         for a in x:
             flatten(a, f'{name}{i}{sep}')
             i += 1
     else:
         out[name[:-1]] = x
    def format_results(self, result_list, results, return_obj):
        """
        Formatting the results
        :param result_list: list
        :param results: list, results
        :param return_obj: dict
        :return: dict
        """
        for record in results:
            record_dict = dict()
            for data in record:
                record_dict[data['field']] = data['value']
            if 'source' in record_dict.keys(
            ) and record_dict['source'] == 'aws.guardduty':
                json_message = record_dict['@message']
                data = json.loads(json_message)
                flatten_results = flatten(data)
                flatten_results = {
                    k: v
                    for k, v in flatten_results.items() if v != "" and v != {}
                }
                if flatten_results.get(
                        'detail_service_action_actionType') is None:
                    continue
                if flatten_results.get(
                        'detail_service_action_networkConnectionAction_protocol'
                ) == 'Unknown':
                    continue
                guardduty_results = self.process_flatten_guardduty_results(
                    flatten_results)
                guardduty_results['guardduty'].update(
                    {'@timestamp': record_dict['@timestamp']})
                guardduty_results['guardduty'].update({'event_count': 1})
                result_list.append(guardduty_results)
            elif 'source' not in record_dict.keys():
                vpc_dict = dict()
                vpc_dict['vpcflow'] = copy.deepcopy(record_dict)

                vpc_dict['vpcflow']['protocol'] = self.get_protocol(
                    vpc_dict['vpcflow']['protocol'])
                vpc_dict['vpcflow']['event_count'] = 1
                result_list.append(vpc_dict)
            else:
                json_message = record_dict['@message']
                data = json.loads(json_message)
                flatten_results = flatten(data)
                result_list.append(flatten_results)

        return_obj['data'] = result_list
示例#14
0
    def get_from_json(self, coin_json: dict):
        """
        Creates a Coin model entit from a json (dict).
        Parameters
        ----------
            coin_json (json): json descrioption of coin as returned by Numista
        """
        result = self.model(numistaId=coin_json['id'],
                            title=coin_json['title'])
        if 'country' in coin_json:
            cntry, _ = Country.objects.get_or_create(
                code=coin_json['country']['code'],
                defaults=coin_json['country'])
            result.country = cntry
        if 'value' in coin_json and 'currency' in coin_json['value']:
            curr, _ = Currency.objects.get_or_create(
                numistaId=coin_json['value']['currency']['id'],
                defaults=coin_json['value']['currency'])
            result.value_currency = curr

        flat_obj = flatten(coin_json)
        for f in self.model._meta.get_fields():
            if f.name == 'id':
                pass
            elif (f.name in flat_obj) and (getattr(result, f.name)
                                           in f.empty_values):
                setattr(result, f.name, flat_obj[f.name])
            elif hasattr(f,
                         'json_id') and (f.json_id in flat_obj) and (getattr(
                             result, f.name) in f.empty_values):
                setattr(result, f.name, flat_obj[f.json_id])
        return result
 def decorateFindingsWithObjects(self, data, mapping_overriden):
     for finding in data:
         flattened_finding = flatten(finding)
         self.regexAndDecorateWithStdObjects(
             flattened_finding, finding,
             r'((?:[\da-fA-F]{2}[:\-]){5}[\da-fA-F]{2})', "mac-addr",
             mapping_overriden)
         self.regexAndDecorateWithStdObjects(flattened_finding, finding,
                                             r'[0-9]+(?:\.[0-9]+){3}',
                                             "ipv4address",
                                             mapping_overriden)
         self.regexAndDecorateWithStdObjects(
             flattened_finding, finding,
             r'(?<![:.\w])(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}(?![:.\w])',
             "ipv6address", mapping_overriden)
         self.regexAndDecorateWithStdObjects(
             flattened_finding, finding,
             r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", "email",
             mapping_overriden)
         self.regexAndDecorateWithStdObjects(
             flattened_finding, finding,
             r"(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]",
             "domain-name", mapping_overriden)
         self.regexAndDecorateWithStdObjects(flattened_finding, finding,
                                             r'(https?://\S+)', "url",
                                             mapping_overriden)
         self.regexAndDecorateWithStdObjects(
             flattened_finding, finding,
             r'([~!@#$%^&*()\-_+={}\[\]|\\:;\"`\'<>.\?\w]+\.[a-z,A-Z][\w]+|[\w]+\.[a-z,A-Z][\W]+|\.[a-z,A-Z][\w]+)',
             "file", mapping_overriden)
         self.customFunctionAndDecorateWithStdObjects(
             flattened_finding, finding, "directory", mapping_overriden)
示例#16
0
    def outputManager(self, context, output, key, bucket):
        if len(output) == 0 or output is None:
            if self.total_output_files == 0:
                logging.info("No records pulled from Hubspot.")

                downstream_tasks = context['task'].get_flat_relatives(upstream=False)

                logging.info('Skipping downstream tasks...')
                logging.debug("Downstream task_ids %s", downstream_tasks)

                if downstream_tasks:
                    self.skip(context['dag_run'],
                              context['ti'].execution_date,
                              downstream_tasks)
        else:
            logging.info('Logging {0} to S3...'.format(key))

            output = [flatten(e) for e in output]
            output = '\n'.join([json.dumps({boa.constrict(k): v
                               for k, v in i.items()}) for i in output])

            s3 = S3Hook(self.s3_conn_id)
            s3.load_string(
                string_data=str(output),
                key=key,
                bucket_name=bucket,
                replace=True
            )
            s3.connection.close()

            self.total_output_files += 1
示例#17
0
 def test_list_and_dict(self):
     dic = {
         'a': 1,
         'b': 2,
         'c': [{
             'd': [2, 3, 4],
             'e': [{
                 'f': 1,
                 'g': 2
             }]
         }]
     }
     expected = {
         'a': 1,
         'b': 2,
         'c_0_d_0': 2,
         'c_0_d_1': 3,
         'c_0_d_2': 4,
         'c_0_e_0_f': 1,
         'c_0_e_0_g': 2
     }
     actual = flatten(dic)
     self.assertEqual(actual, expected)
     expected_keys = {
         'a', 'b', 'c_0_d_0', 'c_0_d_1', 'c_0_d_2', 'c_0_e_0_f', 'c_0_e_0_g'
     }
     actual_keys = flatten_keys(dic)
     assertCountEqual(self, actual_keys, expected_keys)
示例#18
0
 def test_flatten_filter_func(self):
     dic = {
         'a': {
             'a': [1, 2, 3]
         },
         'b': {
             'b': 'foo',
             'c': 'bar'
         },
         'c': {
             'c': [{
                 'foo': 5,
                 'bar': 6,
                 'baz': [1, 2, 3]
             }]
         }
     }
     expected = {
         'c_c_0_baz_0': 1,
         'c_c_0_baz_1': 2,
         'c_c_0_baz_2': 3,
         'c_c_0_foo': 5,
         'c_c_0_bar': 6
     }
     filter_func = lambda key: key.startswith('c_c_0_')
     actual = flatten(dic, filter_func=filter_func)
     self.assertEqual(actual, expected)
示例#19
0
 def test_flatten_filter_ignore_keys(self):
     dic = {
         'a': {
             'a': [1, 2, 3]
         },
         'b': {
             'b': 'foo',
             'c': 'bar'
         },
         'c': {
             'c': [{
                 'foo': 5,
                 'bar': 6,
                 'baz': [1, 2, 3]
             }]
         }
     }
     expected = {
         'a_a_0': 1,
         'a_a_1': 2,
         'a_a_2': 3,
         'b_b': 'foo',
         'b_c': 'bar',
         'c_c_0_foo': 5,
         'c_c_0_bar': 6
     }
     keys_to_ignore = {'c_c_0_baz_0', 'c_c_0_baz_1', 'c_c_0_baz_2'}
     actual = flatten(dic, keys_to_ignore=keys_to_ignore)
     self.assertEqual(actual, expected)
示例#20
0
 def test_unflatten_with_list_issue15(self):
     """https://github.com/amirziai/flatten/issues/15"""
     dic = {
         "Required": {
             "a": "1",
             "b": ["1", "2", "3"],
             "c": {
                 "d": {
                     "e": [[{
                         "s1": 1
                     }, {
                         "s2": 2
                     }], [{
                         "s3": 1
                     }, {
                         "s4": 2
                     }]]
                 }
             },
             "f": ["1", "2"]
         },
         "Optional": {
             "x": "1",
             "y": ["1", "2", "3"]
         }
     }
     dic_flatten = flatten(dic)
     actual = unflatten_list(dic_flatten)
     self.assertEqual(actual, dic)
示例#21
0
 def test_blog_example(self):
     dic = {
         "a": 1,
         "b": 2,
         "c": [{
             "d": ['2', 3, 4],
             "e": [{
                 "f": 1,
                 "g": 2
             }]
         }]
     }
     expected = {
         'a': 1,
         'b': 2,
         'c_0_d_0': '2',
         'c_0_d_1': 3,
         'c_0_d_2': 4,
         'c_0_e_0_f': 1,
         'c_0_e_0_g': 2
     }
     actual = flatten(dic)
     self.assertEqual(actual, expected)
     expected_keys = {
         'a', 'b', 'c_0_d_0', 'c_0_d_1', 'c_0_d_2', 'c_0_e_0_f', 'c_0_e_0_g'
     }
     actual_keys = flatten_keys(dic)
     assertCountEqual(self, actual_keys, expected_keys)
示例#22
0
 def test_empty_list_and_dict(self):
     dic = {
         'a': {},
         'b': [],
         'c': '',
         'd': None,
         'e': [{
             'f': [],
             'g': [{
                 'h': {},
                 'i': [],
                 'j': '',
                 'k': None
             }]
         }]
     }
     expected = {
         'a': {},
         'b': [],
         'c': '',
         'd': None,
         'e_0_f': [],
         'e_0_g_0_h': {},
         'e_0_g_0_i': [],
         'e_0_g_0_j': '',
         'e_0_g_0_k': None
     }
     actual = flatten(dic)
     self.assertEqual(actual, expected)
     expected_keys = {
         'a', 'b', 'c', 'd', 'e_0_f', 'e_0_g_0_h', 'e_0_g_0_i', 'e_0_g_0_j',
         'e_0_g_0_k'
     }
     actual_keys = flatten_keys(dic)
     assertCountEqual(self, actual_keys, expected_keys)
示例#23
0
def cleanse(self):

    print('Entrei no  unfollowing  mode')
    nonfollowers = []
    # uma nova lista
    self.unfollow_me = []

    unfollowing = 0

    try:
        list_following(self)

        for unfollow in list_following(self):
            nonfollowers.append(unfollow)
            with urllib.request.urlopen(
                    "https://www.instagram.com/{}/?__a=1".format(
                        unfollow)) as url:
                data = json.loads(url.read().decode())
                flat_list = flatten(data)

                # checar se já é nosso seguidor
                if not flat_list.get("graphql_user_follows_viewer") == False:
                    self.unfollow_me.append(unfollow)
                    unfollowing += 1
                    print("Deixarei de seguir: {}".format(unfollowing))

    finally:

        print("Deixaremos de seguir : {}".format(len(self.unfollow_me),
                                                 self.unfollow_me))

        celebs_to_keep(self)
示例#24
0
    def outputManager(self, hook, output, key, bucket):
        """
        This method handles the output of the data.
        """
        if self.total_output_files == 0:
            logging.info("No records pulled.")
            if self.skip_if_null:
                downstream_tasks = self.context['task'].get_flat_relatives(
                    upstream=False)

                logging.info('Skipping downstream tasks...')
                logging.debug("Downstream task_ids %s", downstream_tasks)

                if downstream_tasks:
                    self.skip(self.context['dag_run'],
                              self.context['ti'].execution_date,
                              downstream_tasks)
        else:
            logging.info('Logging {0} to ...'.format(key))

            output = [flatten(e) for e in output]
            output = '\n'.join([
                json.dumps({boa.constrict(k): v
                            for k, v in i.items()}) for i in output
            ])

            if self.cs_type == 's3':
                hook.load_string(string_data=str(output),
                                 key=key,
                                 bucket_name=bucket,
                                 replace=True)
                hook.connection.close()

                self.total_output_files += 1
示例#25
0
def File_Results_Window(True_Path):
    I = 0
    Raw_Data = DATA.get_data(True_Path)
    # I slightly modified the flatten libary to make my program format easier,
    # it may not work for all users and I may need to copy over what i modified into my code so that it will work for all users
    Refined_Data = flatten(Raw_Data[I], separator="[", replace_separators="]")
    _jwp_object_name = Refined_Data["_jwp_object_name"]
    """
    I commented these out beause these are important, but i have not yet found a proper use for them
    _jwp_object_name = Refined_Data["_jwp_object_name"]
    _jwp_arr_idx = Refined_Data["_jwp_arr_idx"]
    _jwp_is_asset = Refined_Data["_jwp_is_asset"]
    export_type = Refined_Data["export_type"]
    _jwp_export_idx = Refined_Data["_jwp_export_idx"]
    _apoc_data_ver = Refined_Data["_apoc_data_ver"]
    """
    obj_name = str(
        True_Path + "." + _jwp_object_name + ", "
    )  # This will allow the users to copy the object path into the program so that they have an easier time moddign
    while I < len(Raw_Data):
        for key, value in Refined_Data.items():
            attr_name = str(key) + " : " + str(value)
            File_Results_List.append(obj_name + attr_name)
        File_Results_List.append("\n")
        I += 1
    ListBoxWindow(2)
示例#26
0
def load_training_data(all_users=True):
    """
    Pulls the historical ratings from the database into the ML library
    :param all_users: Optional Boolean flag to train on all users or the current user
    :return: train_data, test_data split
    """
    if all_users:
        user = None
        filename = "data_all.txt"
    else:
        user = util.get_user()
        filename = "data_" + user + ".txt"

    # write the latest database value
    # TODO: optimize to load direct from database (look into connect_odbc())
    json_string = models.get_user_history(user, True)
    user_history_json = json.loads(json_string)
    file_content = []
    for history_dict in user_history_json:
        file_content.append(flatten_json.flatten(history_dict))
    with open(filename, "w") as outfile:
        json.dump(file_content, outfile)

    # load training data into graphlab
    training_data = gl.SFrame.read_json(url=filename, orient="records")

    # kill temporary file now that data is loaded
    # TODO: when the recommender trains once daily, don't delete the file
    try:
        os.remove(filename)
    except OSError:
        pass

    return training_data.random_split(.8)
示例#27
0
    def post(self):
        """
        Creates kafka event from json object

        :return: response code of post request to Kafka REST Proxy
        """
        now = round(time.time() * 1000)

        # flatten json so nested attributes can be used in KSQL analysis
        flat_json_object = flatten_json.flatten(json.loads(request.data))

        # extract timestamp and add it to json
        segment_timestamp = segment_timestamp_to_unix_millis(
            flat_json_object.get("timestamp"))
        flat_json_object["segment_timestamp"] = segment_timestamp
        flat_json_object['ingest_timestamp'] = now

        # determine topic to send event
        topic = ''.join(c for c in str(
            flat_json_object.get("type") + flat_json_object.get("event"))
                        if c.isalnum()) + "_00_raw_flatJSON"
        # extract key, if there is a registered attribute that should serve as key for this event
        key = self.get_key(flat_json_object)
        self.p.send(topic=topic,
                    key=str(key).encode('utf-8'),
                    value=flat_json_object)
        self.p.flush()
        return 200
示例#28
0
def show_metrics(
    metrics, all_branches=False, all_tags=False, all_commits=False
):
    from flatten_json import flatten
    from dvc.utils.diff import format_dict

    # When `metrics` contains a `None` key, it means that some files
    # specified as `targets` in `repo.metrics.show` didn't contain any metrics.
    missing = metrics.pop(None, None)

    for branch, val in metrics.items():
        if all_branches or all_tags or all_commits:
            logger.info(f"{branch}:")

        for fname, metric in val.items():
            if not isinstance(metric, dict):
                logger.info("\t{}: {}".format(fname, str(metric)))
                continue

            logger.info(f"\t{fname}:")
            for key, value in flatten(format_dict(metric), ".").items():
                logger.info(f"\t\t{key}: {value}")

    if missing:
        raise BadMetricError(missing)
示例#29
0
def lambda_handler(event, context):

    ruta = getprefix()
    # Sólo hay un fichero por carpeta del bucket:
    file = client.list_objects(Bucket=bucketname,
                               Prefix=ruta).get('Contents', [])[0]['Key']
    # Descargar objeto para editarlo:
    bytes_buffer = io.BytesIO()
    client.download_fileobj(Bucket=bucketname, Key=file, Fileobj=bytes_buffer)
    byte_value = bytes_buffer.getvalue()
    str_value = byte_value.decode()
    # Reemplazar substring }{ -> Cada tweet en una nueva línea
    repStr = re.sub("}{", "} \n{", str_value)

    separados = re.split('\n', repStr)
    texto = ""

    for i in range(0, len(separados)):
        d = json.loads(separados[i])
        d = flatten(d)
        texto = texto + json.dumps(d) + "\n"

    # Cargar objeto en el bucket de destino
    newobj = s3.Object(destbucket, file)
    newobj.put(Body=texto.encode('ascii'))

    return {
        'statusCode': 200,
    }
 def flatten_result(self, results, service_type):
     """
     Flattening the result response
     :param results: list, results
     :return: list, flattened and empty values removed
     """
     flatten_results = []
     private_ip_address_key = 'resource#instancedetails#networkinterfaces#0#privateipaddress'
     action_type_key = 'service#action#actiontype'
     for obj in results:
         for key, value in obj.items():
             try:
                 obj[key] = json.loads(value)
             except ValueError:
                 pass
         flatten_obj = flatten(obj, '#')
         if service_type == 'vpcflow':
             flatten_obj.update({'name': 'VPC flow log'})
             temp = flatten_obj.get("action")
             flatten_obj["action"] = "network-traffic-" + temp.lower()
         if 'id' in flatten_obj:
             flatten_obj['finding_id'] = flatten_obj.pop('id')
         # Formatting to differentiate common key available in different action types for to STIX mapping
         if private_ip_address_key in flatten_obj and flatten_obj[action_type_key] == 'PORT_PROBE':
             flatten_obj['portprobe#'+private_ip_address_key] = flatten_obj.pop(private_ip_address_key)
         elif private_ip_address_key in flatten_obj and flatten_obj[action_type_key] == 'DNS_REQUEST':
             flatten_obj['dnsrequest#'+private_ip_address_key] = flatten_obj.pop(private_ip_address_key)
         flatten_results.append(flatten_obj)
     # Remove null values and empty objects from response
     flatten_result_cleansed = self.format_flatten_result(flatten_results)
     return flatten_result_cleansed
def create(landingZoneWANDetails):
    """
    This function creates a new landingZoneWAN in the landingZoneWAN structure
    based on the passed in landingZoneWAN data

    :param landingZoneWAN:  landingZoneWAN to create in landingZoneWAN list
    :return:             201 on success, 406 on landingZoneWAN exists
    """

    # we don't need the id, the is generated automatically on the database
    if ('id' in landingZoneWANDetails):
        del landingZoneWANDetails["id"]

    # flatten the python object into a python dictionary
    flattened_landingZoneWAN = flatten(landingZoneWANDetails, delimiter)
    schema = LandingZoneWANSchema(many=False)
    new_landingZoneWAN = schema.load(flattened_landingZoneWAN,
                                     session=db.session)
    # Save python object to the database
    db.session.add(new_landingZoneWAN)
    db.session.commit()

    idSchema = IdSchema(many=False)
    data = idSchema.dump(new_landingZoneWAN)

    app.logger.debug("landingZoneWAN data:")
    app.logger.debug(pformat(data))

    return data, 201
示例#32
0
 def test_custom_separator(self):
     dic = {'a': '1',
            'b': '2',
            'c': {'c1': '3', 'c2': '4'}
            }
     expected = {'a': '1', 'b': '2', 'c*c1': '3', 'c*c2': '4'}
     actual = flatten(dic, '*')
     self.assertEqual(actual, expected)
示例#33
0
 def test_empty_tuple(self):
     dic = {
         'a': 1,
         'b': ({'c': ()},)
     }
     expected = {'a': 1, 'b_0_c': ()}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#34
0
 def test_tuple(self):
     dic = {
         'a': 1,
         'b': ({'c': (2, 3)},)
     }
     expected = {'a': 1, 'b_0_c_0': 2, 'b_0_c_1': 3}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#35
0
 def test_list(self):
     dic = {
         'a': 1,
         'b': [{'c': [2, 3]}]
     }
     expected = {'a': 1, 'b_0_c_0': 2, 'b_0_c_1': 3}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#36
0
 def test_one_flatten_utf8(self):
     dic = {'a': '1',
            u'ñ': u'áéö',
            'c': {u'c1': '3', 'c2': '4'}
            }
     expected = {'a': '1', u'ñ': u'áéö', 'c_c1': '3', 'c_c2': '4'}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#37
0
 def test_one_flatten(self):
     dic = {'a': '1',
            'b': '2',
            'c': {'c1': '3', 'c2': '4'}
            }
     expected = {'a': '1', 'b': '2', 'c_c1': '3', 'c_c2': '4'}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#38
0
    def test_unflatten_with_list_issue31(self):
        """https://github.com/amirziai/flatten/issues/31"""
        dic = {"testdict": {"seconddict": [["firstvalue",
                                            "secondvalue"],
                                           ["thirdvalue",
                                            "fourthvalue"]]}}

        dic_flatten = flatten(dic)
        actual = unflatten_list(dic_flatten)
        self.assertEqual(actual, dic)
示例#39
0
 def test_list_and_dict(self):
     dic = {
         'a': 1,
         'b': 2,
         'c': [{'d': [2, 3, 4], 'e': [{'f': 1, 'g': 2}]}]
     }
     expected = {'a': 1, 'b': 2, 'c_0_d_0': 2, 'c_0_d_1': 3, 'c_0_d_2': 4,
                 'c_0_e_0_f': 1, 'c_0_e_0_g': 2}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#40
0
 def test_unflatten_with_list_issue15(self):
     """https://github.com/amirziai/flatten/issues/15"""
     dic = {"Required": {"a": "1",
                         "b": ["1", "2", "3"],
                         "c": {"d": {"e": [[{"s1": 1}, {"s2": 2}],
                                           [{"s3": 1}, {"s4": 2}]]}},
                         "f": ["1", "2"]},
            "Optional": {"x": "1", "y": ["1", "2", "3"]}}
     dic_flatten = flatten(dic)
     actual = unflatten_list(dic_flatten)
     self.assertEqual(actual, dic)
示例#41
0
 def test_blog_example(self):
     dic = {
         "a": 1,
         "b": 2,
         "c": [{"d": ['2', 3, 4], "e": [{"f": 1, "g": 2}]}]
     }
     expected = {'a': 1, 'b': 2, 'c_0_d_0': '2', 'c_0_d_1': 3,
                 'c_0_d_2': 4, 'c_0_e_0_f': 1,
                 'c_0_e_0_g': 2}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#42
0
 def test_empty_list_and_dict(self):
     dic = {
         'a': {},
         'b': [],
         'c': '',
         'd': None,
         'e': [{'f': [], 'g': [{'h': {}, 'i': [], 'j': '', 'k': None}]}]
     }
     expected = {'a': {}, 'b': [], 'c': '', 'd': None,
                 'e_0_f': [], 'e_0_g_0_h': {}, 'e_0_g_0_i': [],
                 'e_0_g_0_j': '', 'e_0_g_0_k': None}
     actual = flatten(dic)
     self.assertEqual(actual, expected)
示例#43
0
 def test_flatten_ignore_keys(self):
     """Ignore a set of root keys for processing"""
     dic = {
         'a': {'a': [1, 2, 3]},
         'b': {'b': 'foo', 'c': 'bar'},
         'c': {'c': [{'foo': 5, 'bar': 6, 'baz': [1, 2, 3]}]}
     }
     expected = {
         'a_a_0': 1,
         'a_a_1': 2,
         'a_a_2': 3
     }
     actual = flatten(dic, root_keys_to_ignore={'b', 'c'})
     self.assertEqual(actual, expected)
示例#44
0
 def test_unflatten_with_list_nested(self):
     dic = {"a": [[{"b": 1}], [{"d": 1}]]}
     dic_flatten = flatten(dic)
     actual = unflatten_list(dic_flatten)
     self.assertEqual(actual, dic)
示例#45
0
    def _get_data(self, table, project_id, flat=False, info_only=False):
        """Get the data for a given table."""
        repo, query = self.repositories[table]
        data = getattr(repo, query)(project_id=project_id)
        ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS') or []
        if table == 'task':
            csv_export_key = current_app.config.get('TASK_CSV_EXPORT_INFO_KEY')
        if table == 'task_run':
            csv_export_key = current_app.config.get('TASK_RUN_CSV_EXPORT_INFO_KEY')
        if table == 'result':
            csv_export_key = current_app.config.get('RESULT_CSV_EXPORT_INFO_KEY')
        if info_only:
            if flat:
                tmp = []
                for row in data:
                    inf = copy.deepcopy(row.dictize()['info'])
                    if inf and type(inf) == dict and csv_export_key and inf.get(csv_export_key):
                        inf = inf[csv_export_key]
                    new_key = '%s_id' % table
                    if inf and type(inf) == dict:
                        inf[new_key] = row.id
                        tmp.append(flatten(inf,
                                           root_keys_to_ignore=ignore_keys))
                    elif inf and type(inf) == list:
                        for datum in inf:
                            if type(datum) == dict:
                                datum[new_key] = row.id
                                tmp.append(flatten(datum,
                                                   root_keys_to_ignore=ignore_keys))
            else:
                tmp = []
                for row in data:
                    if row.dictize()['info']:
                        tmp.append(row.dictize()['info'])
                    else:
                        tmp.append({})
        else:
            if flat:
                tmp = []
                for row in data:
                    cleaned = row.dictize()
                    fav_user_ids = None
                    task_run_ids = None
                    if cleaned.get('fav_user_ids'):
                        fav_user_ids = cleaned['fav_user_ids']
                        cleaned.pop('fav_user_ids')
                    if cleaned.get('task_run_ids'):
                        task_run_ids = cleaned['task_run_ids']
                        cleaned.pop('task_run_ids')

                    cleaned = flatten(cleaned,
                                      root_keys_to_ignore=ignore_keys)

                    if fav_user_ids:
                        cleaned['fav_user_ids'] = fav_user_ids
                    if task_run_ids:
                        cleaned['task_run_ids'] = task_run_ids

                    tmp.append(cleaned)
            else:
                tmp = [row.dictize() for row in data]
        return tmp
示例#46
0
 def test_unflatten_with_list_deep(self):
     dic = {'a': [
         {'b': [{'c': [{'a': 5, 'b': {'a': [1, 2, 3]}, 'c': {'x': 3}}]}]}]}
     dic_flatten = flatten(dic)
     actual = unflatten_list(dic_flatten)
     self.assertEqual(actual, dic)
示例#47
0
def searchDictionaryKey(dict,key):
    flattenDict = flatten(dict,'#')
    for k in flattenDict:
            if key in k:
                    print k
示例#48
0
 def test_one_flatten_utf8_dif(self):
     a = {u'eñe': 1}
     info = dict(info=a)
     expected = {u'info_{}'.format(u'eñe'): 1}
     actual = flatten(info)
     self.assertEqual(actual, expected)
示例#49
0
 def test_no_flatten(self):
     dic = {'a': '1', 'b': '2', 'c': 3}
     expected = dic
     actual = flatten(dic)
     self.assertEqual(actual, expected)