示例#1
0
def generate_graph(num) -> Graph:
    graph = Graph(f'Graph_{num}')
    # product_count = random.randint(2, 10)
    product_count = 1000

    for n in range(product_count):
        price = random.randint(5, 500)
        product_node = graph.add_node({
            'type': 'PRODUCT',
            'price': price
        },
                                      f'Product_{n}',
                                      node_type='PRODUCT')

        if with_probability(0.04):
            new_price_rate = rand_float(0.65, 0.8)
            change_node = graph.add_node(
                {
                    'type': 'CHANGE',
                    'old_price': price,
                    'new_price': price * new_price_rate,
                    'is_fraud': True
                },
                f'Change_of_Prod_{n}',
                node_type='CHANGE',
                node_color='red')
            change_node.add_neighbor(product_node)
        elif with_probability(0.45):
            new_price_rate = rand_float(0.8, 1)
            change_node = graph.add_node(
                {
                    'type': 'CHANGE',
                    'old_price': price,
                    'new_price': price * new_price_rate,
                    'is_fraud': False
                },
                f'Change_of_Prod_{n}',
                node_type='CHANGE',
                node_color='green')
            change_node.add_neighbor(product_node)

    # for node in graph._nodes:
    #     link_count = random.randint(0, min(8, int(node_count / 4)))
    #
    #     for _ in range(link_count):
    #         node.add_neighbor(graph.get_random_node())

    return graph
def generate_graph() -> Tuple[Graph, pd.Series]:
    graph = Graph(f'Graph Node Level')
    node_count = 25_000
    nodes_gt = pd.Series()

    for n in range(node_count):
        type = random.choice(['type-1', 'type-2', 'type-3'])
        node = graph.add_node({}, f'Node_{n}', node_type=type)

        nodes_gt._set_value(node.get_id(), type)

    for node in graph._nodes:
        link_count = random.randint(0, 2) #random.randint(0, min(7, int(node_count / 4)))

        for _ in range(link_count):
            node.add_neighbor(graph.get_random_node())

    return graph, nodes_gt
示例#3
0
    def generate_graph(self, db: Database) -> Graph:
        print(f'generating graph for {db.get_name()}')
        graph = Graph(name=db.get_name())

        # generate nodes
        # time_start = time.perf_counter()
        for table in db.get_tables():
            # print(f'adding nodes for table {table.get_name()}')
            table_data = table.get_data()

            for index, row in table_data.iterrows():
                properties = {
                    column.get_name(): row[column.get_name()]
                    for column in table.get_columns().values() if
                    not column.get_is_primary() and not column.get_is_hidden()
                }

                graph.add_node(
                    properties,
                    f'{table.get_name()}[{index}]',
                    node_color=self._fraud_node_color if 'is_fraud' in row
                    and row['is_fraud'] else self._default_node_color,
                    node_type=table.get_name())

        # time_end = time.perf_counter()
        # print(f"generate nodes took {time_end - time_start:0.4f} seconds")

        # generate DELETE nodes from MTA_CHANGES
        # time_start = time.perf_counter()
        for _, src_record in db.get_table('MTA_CHANGES').get_data().iterrows():
            if (src_record['change_type']) == 'delete':
                dst_table_name, _ = src_record['table_column_ref'].split(
                    '.', 2)
                graph.add_node(src_record['old_value'],
                               f'{dst_table_name}[{src_record["record_id"]}]',
                               self._fraud_node_color if src_record['is_fraud']
                               else self._default_node_color,
                               node_type='MTA_CHANGES')

        # time_end = time.perf_counter()
        # print(f"delete nodes took {time_end - time_start:0.4f} seconds")

        # generate links
        # time_start = time.perf_counter()
        for table in db.get_tables():
            # print(f'TABLE KEYS: {table.get_name()}')
            foreign_keys = table.get_all_foreign_keys()
            table_data = table.get_data()

            for index, src_record in table_data.iterrows():
                src_node = graph.get_node_by_key(
                    f'{table.get_name()}[{index}]')

                for foreign_key in foreign_keys:
                    if foreign_key.get_src_table() != table.get_name():
                        continue

                    dst_table = db.get_table(foreign_key.get_dst_table())
                    dst_data_entry_index = dst_table.find_record_index(
                        foreign_key.get_dst_column(),
                        src_record[foreign_key.get_src_column()])
                    dst_node = graph.get_node_by_key(
                        f'{foreign_key.get_dst_table()}[{dst_data_entry_index}]'
                    )

                    if dst_node is not None:
                        if foreign_key.get_reverse_relation():
                            dst_node.add_neighbor(src_node,
                                                  foreign_key.get_color())
                        else:
                            src_node.add_neighbor(dst_node,
                                                  foreign_key.get_color())

        # time_end = time.perf_counter()
        # print(f"generate links took {time_end - time_start:0.4f} seconds")

        # generate meta links
        # time_start = time.perf_counter()
        for index, src_record in db.get_table(
                'MTA_CHANGES').get_data().iterrows():
            src_node = graph.get_node_by_key(f'MTA_CHANGES[{index}]')
            dst_table_name, dst_column_name = src_record[
                'table_column_ref'].split('.', 2)
            dst_node = graph.get_node_by_key(
                f'{dst_table_name}[{src_record["record_id"]}]')

            src_node.add_neighbor(dst_node,
                                  self._edge_colors[src_record['change_type']])

        # time_end = time.perf_counter()
        # print(f"generate meta links took {time_end - time_start:0.4f} seconds")

        return graph