Exemplo n.º 1
0
def jaccard(a, b, graph):

    neib_a = set(list(graph.neighbors(a)))
    neib_b = set(list(graph.neighbors(b)))

    lis_2a = []
    lis_2b = []

    for x in list(neib_a):
        lis_2a += (list(graph.neighbors(x)))

    for x in list(neib_b):
        lis_2b += (list(graph.neighbors(x)))
    union_a = set(list(neib_b) + lis_2a)
    union_b = set(list(neib_a) + lis_2b)

    common_a = set(lis_2a).intersection(neib_b)

    common_b = set(lis_2b).intersection(neib_a)

    try:
        jac_a = (cardinality.count(common_a)) / (cardinality.count(union_a))
    except ZeroDivisionError:
        jac_a = 0
    try:
        jac_b = (cardinality.count(common_b)) / (cardinality.count(union_b))
    except ZeroDivisionError:
        jac_b = 0

    return jac_a, jac_b, (jac_a + jac_b) / 2
Exemplo n.º 2
0
def check_volume2(min_vec, prim_vec, latt):
    """
        To check if input cell is primitive or not
        input :  min_vec  :   minimum vetors of an atom
                 prim_vec :   basic vectors of primivtive cell
                 latt     :   lattice vectors
        return:  flage3   :   True: input cell is primitive
                 min_prim_latt: 
    """
    tp_vec = np.transpose(min_vec)  # tp_vec : vertical
    inv_tp_vec = np.linalg.inv(tp_vec)
    #print(inv_tp_vec)

    for i in range(3):
        for j in range(3):
            if inv_tp_vec[i, j] < 0.0:
                inv_tp_vec[i, j] = int(inv_tp_vec[i, j] - 0.5)
            else:
                inv_tp_vec[i, j] = int(inv_tp_vec[i, j] + 0.5)

    print('inv_tp_vec:\n', np.linalg.det(inv_tp_vec))
    #print(inv_tp_vec)

    if abs(np.linalg.det(inv_tp_vec)) == (cd.count(prim_vec) - 2):
        print('Found')
    else:
        print('Warning! Not completely found primitive vectors!')

    # min_prim_latt = np.dot(latt, np.linalg.inv(inv_tp_vec))
    # inv_tp_vec : vertical
    # min_prim_latt : horizontal
    min_prim_latt = np.dot(np.transpose(np.linalg.inv(inv_tp_vec)), latt)
    flag3 = 'Found'

    return flag3, min_prim_latt
Exemplo n.º 3
0
def setitem(
    path: RawPath, value: V, collection: MutableCollection
) -> MutableCollection:
    """
    Sets the value at path of collection. If a portion of path doesn't exist, it's created.
    """
    path = to_path(path)
    clone = copy(collection)
    key = head(path)
    if count(path) == 1:
        _safe_setitem(key, value, clone)
    else:
        try:
            sub = collection[key]
        except KeyError:
            if isinstance(path[1], int):
                sub = []
            else:
                sub = {}
        except IndexError:
            for i in range(len(clone), key + 1):
                clone.insert(i, None)
            if isinstance(path[1], int):
                sub = []
            else:
                sub = {}
        clone[key] = setitem(path[1:], value, sub)

    return clone
Exemplo n.º 4
0
def check_volume2(min_vec, prim_vec, latt):
    tp_vec = np.transpose(min_vec)  # // tp_vec : vertical
    inv_tp_vec = np.linalg.inv(tp_vec)
    #print(inv_tp_vec)

    for i in range(3):
        for j in range(3):
            if inv_tp_vec[i, j] < 0.0:
                inv_tp_vec[i, j] = int(inv_tp_vec[i, j] - 0.5)
            else:
                inv_tp_vec[i, j] = int(inv_tp_vec[i, j] + 0.5)

    print('here:\n', np.linalg.det(inv_tp_vec))
    #print(inv_tp_vec)

    if abs(np.linalg.det(inv_tp_vec)) == (cd.count(prim_vec) - 2):
        print('Found')
    else:
        print('Warning! Not completely found primitive vectors!')

    # min_prim_latt = np.dot(latt, np.linalg.inv(inv_tp_vec))  # // tp_vec : vertical
    min_prim_latt = np.dot(np.transpose(np.linalg.inv(inv_tp_vec)),
                           latt)  # // min_prim_latt : horizontal
    flag3 = 'Found'

    return flag3, min_prim_latt
Exemplo n.º 5
0
def generate_foreign_keys(parents, num_parents, num_attributes, output_object):
    '''
    Generate DDL for foreign keys
    '''
    logger.debug('Entering generate_foreign_keys()')
    if num_parents >= 1:
        logger.debug('Generating DDL for foreign keys...')
        logger.debug(f'parents=\n{json.dumps(parents, indent=4)}')
        for parent_num, parent in enumerate(parents):
            logger.debug(f'{i(1)}parent_num={parent_num} parent={parent}')
            assert cardinality.count(parent) == 1
            for parent_name, parent_vals in parent.items():
                pass
            parent_kind = parent_vals['kind']
            is_defining = False
            if 'defining' in parent_vals:
                if parent_vals['defining'] == True:
                    is_defining = True
            logger.debug(f'{i(1)}is_defining={is_defining}')
            column_line = f'{i(1)}{"fk_" + parent_name} uuid '
            if parent_kind in ['one', 'base_class']:
                column_line += 'not null '
            column_line += f'references {parent_name}(pk)'
            if is_defining:
                column_line += ' on delete cascade'
            elif parent_kind == 'zero_or_one':
                column_line += ' on delete set null'
            logger.debug(f'{i(1)}column_line={column_line}')
            if parent_num < num_parents - 1 or num_attributes > 0:
                column_line += ','
            logger.debug(f'column_line={column_line}')
            print(f'{column_line}', file=output_object)
    logger.debug('Leaving generate_foreign_keys()')
Exemplo n.º 6
0
def check_volume1(prim_vec, latt):
    initial_vol = abs(np.linalg.det(latt))
    vol_tolerance = 0.00001
    tmp_latt = np.zeros((3, 3))
    min_vec = np.zeros((3, 3))
    flag3 = 'Not Found'
    for i in range(0, len(prim_vec) - 2):
        for j in range(i + 1, len(prim_vec) - 1):
            for k in range(j + 1, len(prim_vec)):
                if flag3 == 'Not Found':
                    tmp_latt[0] = np.dot(prim_vec[i], latt)
                    tmp_latt[1] = np.dot(prim_vec[j], latt)
                    tmp_latt[2] = np.dot(prim_vec[k], latt)
                    tmp_vol = abs(np.linalg.det(tmp_latt))
                    #print('tmp_latt:\n', tmp_latt)
                    #print('tmpvol:\n', tmp_vol)
                    #print('initvol:\n', initial_vol)
                    #print('cardinality\n', cd.count(prim_vec))
                    if tmp_vol > vol_tolerance:
                        v = initial_vol / tmp_vol
                        if v < 0.0:
                            v = int(v - 0.5)
                        else:
                            v = int(v + 0.5)
                        if v == (
                                cd.count(prim_vec) - 2
                        ):  #primitive satisfy, but convential one not satisify
                            min_vec[0] = prim_vec[
                                i]  # coventional smallset is ~32. intial vol~127.8 if(127/32), the size~4
                            min_vec[1] = prim_vec[j]
                            min_vec[2] = prim_vec[k]
                            return check_volume2(min_vec, prim_vec,
                                                 latt)  #flag3 = 'Found'
    return flag3, None  # flag3 = 'Not Found'
Exemplo n.º 7
0
def mean(iterable: Iterable[Number]) -> float:
    """
    Computes the mean of the values in iterable.
    """
    to_sum, to_count = tee(iterable)
    _sum = reduce(add, to_sum)
    return _sum / count(to_count)
Exemplo n.º 8
0
def commonneigh(a, b, graph):
    neib_a = set(list(graph.neighbors(a)))
    neib_b = set(list(graph.neighbors(b)))

    lis_2a = []
    lis_2b = []

    for x in list(neib_a):
        lis_2a += (list(graph.neighbors(x)))

    for x in list(neib_b):
        lis_2b += (list(graph.neighbors(x)))
    union_a = set(list(neib_b) + lis_2a)
    union_b = set(list(neib_a) + lis_2b)

    common_a = set(lis_2a).intersection(neib_b)
    common_b = set(lis_2b).intersection(neib_a)

    return cardinality.count(common_a), cardinality.count(common_b), (
        cardinality.count(common_a) + cardinality.count(common_b)) / 2
Exemplo n.º 9
0
def delitem(path: RawPath, collection: MutableCollection) -> MutableCollection:
    """
    Deletes given path from collection
    """
    path = to_path(path)
    clone = copy(collection)
    key = head(path)
    if count(path) == 1:
        del clone[key]
    else:
        clone[key] = delitem(path[1:], collection[key])
    return clone
Exemplo n.º 10
0
def generate_entity_comments(entity_name, entities, entity_indices,
                             entities_pc, output_object):
    '''
    Handle entity description and note
    '''
    logger.debug('Entering generate_entity_comments()')
    entity_index = entity_indices[entity_name]
    entity = entities[entity_index]['entity']
    logger.debug(f'entity=\n{yaml.dump(entity)}')

    num_parents = 0
    entity_pc = entities_pc[entity_name]
    logger.debug(f'{i(1)}entity_pc={entity_pc}')
    parents = None
    if 'parents' in entity_pc:
        parents = entity_pc['parents']
        num_parents = cardinality.count(parents)
    num_attributes = 0
    attributes = None
    if 'attributes' in entity:
        attributes = entity['attributes']
        num_attributes = cardinality.count(attributes)
    logger.debug(f'num_parents={num_parents} num_attributes={num_attributes}')

    if 'description' in entity:
        print('-- Description:', file=output_object)
        table_description = entity['description']
        for line in table_description.splitlines():
            print(f'-- {line}', file=output_object)
    if 'note' in entity:
        if 'description' in entity:
            print(file=output_object)
        print('-- Note:', file=output_object)
        table_note = entity['note']
        for line in table_note.splitlines():
            print(f'-- {line}', file=output_object)
        print(file=output_object)
    logger.debug('Leaving generate_entity_comments()')
    return entity, parents, num_parents, attributes, num_attributes
Exemplo n.º 11
0
def generate_mm_synthesized(entity_name, graph, output_object):
    '''
    Generate DDL for synthesized many-to-many mapping table
    
    Assumes synthesized many-to-many mapping tables have no attributes
    This may change with future enhancement
    '''
    logger.debug('Entering generate_mm_synthesized()')
    graph_dependees = graph[entity_name]
    logger.debug(f'{i(1)}graph_dependees={graph_dependees}')
    print(f'create table {entity_name} (', file=output_object)
    print(f'{i(1)}pk uuid not null default gen_random_uuid() primary key,',
          file=output_object)
    num_parents = cardinality.count(graph_dependees)
    for dependee_num, dependee in enumerate(graph_dependees):
        column_line = f'{i(1)}fk_{dependee} uuid not null references {dependee}(pk) on delete cascade'
        if dependee_num < num_parents - 1:
            column_line += ','
        print(column_line, file=output_object)
    print(');\n', file=output_object)
    logger.debug('Leaving generate_mm_synthesized()')
Exemplo n.º 12
0
def generml(input_file_or_object, input, output_object):
    '''
    Generally-callable entry point to
    read an Entity-Relationship diagram created by the yEd graph editor and 
    convert it into Entity-Relationship Markup Language

    \b
    References:
    yEd - https://www.yworks.com/products/yed
    GraphML - http://graphml.graphdrawing.org/index.html
    '''
    logger.debug('Entering generml()')

    graph_tag = '{http://graphml.graphdrawing.org/xmlns}graph'
    node_tag = '{http://graphml.graphdrawing.org/xmlns}node'
    edge_tag = '{http://graphml.graphdrawing.org/xmlns}edge'
    data_tag = '{http://graphml.graphdrawing.org/xmlns}data'
    GenericNode_tag = '{http://www.yworks.com/xml/graphml}GenericNode'
    BorderStyle_tag = '{http://www.yworks.com/xml/graphml}BorderStyle'
    PolyLineEdge_tag = '{http://www.yworks.com/xml/graphml}PolyLineEdge'
    NodeLabel_tag = '{http://www.yworks.com/xml/graphml}NodeLabel'
    LineStyle_tag = '{http://www.yworks.com/xml/graphml}LineStyle'
    Arrows_tag = '{http://www.yworks.com/xml/graphml}Arrows'

    NodeLabel_attr_configuration_name = 'com.yworks.entityRelationship.label.name'
    NodeLabel_attr_configuration_attributes = 'com.yworks.entityRelationship.label.attributes'
    GenericNode_attr_configuration_BigEntity = 'com.yworks.entityRelationship.big_entity'

    logger.debug('before parse()')
    tree = ET.parse(input_file_or_object)
    logger.debug('after parse()')
    root = tree.getroot()

    logger.debug('Printing Entity-Relationship Markup Language')
    er_head = {
        "source": 'stdin' if input == '-' else input,
        "generated_datetime": datetime.datetime.utcnow().isoformat()
    }
    print(yaml.dump(er_head), file=output_object)
    er = {}
    end_kinds = set()  # delete after debugging done
    er_entities = []
    er_enums = []
    er_relationships = []
    name_set = set()  # To prevent duplicate entity or enum names
    ignored_entity_node_ids = set(
    )  # So you can ignore relationships to ignored entities
    node_id_to_entity_name = {}
    graph_elem = root.find(graph_tag)
    assert graph_elem is not None, 'Expected graph tag is not present'
    for graph_child in graph_elem:
        logger.debug(
            f'Next graph_child: tag={strip_namespace(graph_child.tag)}')
        logger.debug(ET.tostring(graph_child, encoding='utf8').decode('utf8'))
        continue_graph_elem_loop = False
        # We only care about nodes and edges
        if graph_child.tag != node_tag and graph_child.tag != edge_tag:
            logger.debug(
                f'Skipping non-node/non-edge graph_child.tag={graph_child.tag}'
            )
            continue
        number_children_graph_child = cardinality.count(graph_child)
        # violated by yEd's default (unedited) entity:
        assert number_children_graph_child == 1, "Expected the graph element's child to have only one child"

        # The data element is a child of both node and edge elements
        data_elem = graph_child.find(data_tag)
        number_children_data = cardinality.count(data_elem)
        assert number_children_data == 1, 'Expected the data element to have only 1 child'

        data_subelem = data_elem[0]
        if graph_child.tag == node_tag:
            logger.debug('Found a node')
            node_elem = graph_child
            node_id = node_elem.attrib['id']
            if data_subelem.tag == GenericNode_tag:
                GenericNode_elem = data_subelem
                assert GenericNode_elem.attrib['configuration'] == GenericNode_attr_configuration_BigEntity, \
                    'Expected the generic node "configuration" attribute to indicate a BigEntity'
                logger.debug(f'GraphML entity node {node_id}:')
                for GenericNode_subelem in GenericNode_elem:
                    logger.debug(
                        f'{i(1)}Found a GenericNode_subelem, tag={strip_namespace(GenericNode_subelem.tag)}'
                    )
                    if GenericNode_subelem.tag == NodeLabel_tag:
                        logger.debug(
                            f'{i(1)}The GenericNode_subelem is a NodeLabel')
                        NodeLabel_elem = GenericNode_subelem
                        NodeLabel_attr_configuration = NodeLabel_elem.attrib[
                            'configuration']
                        if NodeLabel_attr_configuration == NodeLabel_attr_configuration_name:
                            entity_name = NodeLabel_elem.text
                            logger.debug(f'{i(1)}entity_name={entity_name}')
                            if entity_name in name_set:
                                print(
                                    f'\nERROR: Duplicate name specified: {entity_name}',
                                    file=sys.stderr)
                                sys.exit(1)
                            else:
                                name_set.add(entity_name)
                        elif NodeLabel_attr_configuration == NodeLabel_attr_configuration_attributes:
                            entity_attributes = NodeLabel_elem.text
                            logger.debug(
                                f'{i(1)}entity_attributes={entity_attributes}')
                        else:
                            # The configuration attribute can have only 2 values
                            assert False, \
                            f'''Got an unexpected value for the "configuration" attribute of the '''
                            f'''node label element: {NodeLabel_attr_configuration}'''
                    elif GenericNode_subelem.tag == BorderStyle_tag:
                        logger.debug(
                            f"{i(1)}GenericNode_subelem.attrib['type']={GenericNode_subelem.attrib['type']}"
                        )
                        if GenericNode_subelem.attrib['type'] != 'line':
                            logger.debug(
                                f'{i(1)}Ignoring entity because the border is not a simple solid line'
                            )
                            ignored_entity_node_ids.add(
                                node_id
                            )  # So we can also ignore any edges to ignored entities
                            continue_graph_elem_loop = True
                            break
                    else:
                        logger.debug(
                            f'{i(1)}Skipping a non-label/non-border-style: GenericNode_subelem.tag={GenericNode_subelem.tag}'
                        )
                        pass
                if continue_graph_elem_loop:
                    continue
                # Now that we have an entity name and attributes, process the attributes
                logger.debug(f'{i(1)}name: {entity_name}')
                try:
                    yaml_attrs = yaml.safe_load(entity_attributes)
                except (yaml.scanner.ScannerError,
                        yaml.parser.ParserError) as ex:
                    print(f'\nERROR: Invalid YAML (syntax) for attributes section of ' \
                          f'the "{entity_name}" entity:\n\n' \
                          f'BEGIN>>>\n{entity_attributes}\n<<<END\n\n' \
                          f'ERROR DETAILS:\n{ex}\n', file=sys.stderr)
                    sys.exit(1)
                if yaml_attrs is None:
                    pass
                else:
                    logger.debug(f'{i(1)}YAML attributes:\n' + \
                        yaml.dump(yaml_attrs, default_flow_style=False))
                    try:
                        json_schema = json_schema_graphml_enum if entity_name.lower().startswith('enum') \
                            else json_schema_graphml_entity_attributes
                        jsonschema.validate(instance=yaml_attrs,
                                            schema=json_schema)
                    except jsonschema.exceptions.ValidationError as ex:
                        print(f'\nERROR: Invalid YAML (schema) for attributes section of ' \
                              f'the "{entity_name}" entity:\n\n' \
                              f'BEGIN>>>\n{entity_attributes}\n<<<END\n\n' \
                              f'ERROR DETAILS:\n{ex}\n', file=sys.stderr)
                        sys.exit(1)
                if entity_name.lower().startswith('enum'):
                    enum_contents = {} if yaml_attrs is None \
                                    else yaml_attrs if type(yaml_attrs) == type({}) \
                                    else { "values": yaml_attrs } if type(yaml_attrs) == type([]) \
                                    else None
                    assert enum_contents is not None, 'Unexpected contents for enum entity'
                    enum_contents.update({"name": entity_name})
                    enum = {"enum": enum_contents}
                    er_enums.append(enum)
                else:
                    entity_contents = {} if yaml_attrs is None else yaml_attrs
                    entity_contents.update({"name": entity_name})
                    entity = {"entity": entity_contents}
                    er_entities.append(entity)
                    node_id_to_entity_name.update({node_id: entity_name})
            else:
                logger.debug(
                    f'Skipping a non-GenericNode: data_subelem.tag={data_subelem.tag}'
                )
                pass  # Ignoring other kinds of nodes
        elif graph_child.tag == edge_tag:
            edge_elem = graph_child
            edge_id = edge_elem.attrib['id']
            logger.debug(f'Relationship {edge_id}')
            edge_source = edge_elem.attrib['source']
            edge_target = edge_elem.attrib['target']
            if edge_source in ignored_entity_node_ids:
                logger.debug(
                    f'{i(1)}Ignoring relationship because source connects to an ignored entity. '
                    f'edge_source={edge_source}')
                continue
            if edge_target in ignored_entity_node_ids:
                logger.debug(
                    f'{i(1)}Ignoring relationship because target connects to an ignored entity. '
                    f'edge_target={edge_target}')
                continue
            entity_source = node_id_to_entity_name[edge_source]
            entity_target = node_id_to_entity_name[edge_target]
            logger.debug(
                f'{i(1)}edge_source={edge_source}\tentity_source={entity_source}'
            )
            logger.debug(
                f'{i(1)}edge_target={edge_target}\tentity_target={entity_target}'
            )
            if data_subelem.tag == PolyLineEdge_tag:
                PolyLineEdge_elem = data_subelem
                LineStyle_elem = PolyLineEdge_elem.find(LineStyle_tag)
                edge_LineStyle_width = LineStyle_elem.attrib['width']
                edge_LineStyle_type = LineStyle_elem.attrib['type']
                logger.debug(
                    f'{i(1)}edge_LineStyle_width={edge_LineStyle_width} edge_LineStyle_type={edge_LineStyle_type}'
                )
                if edge_LineStyle_type != 'line':
                    logger.debug(
                        f'{i(1)}Ignoring relationship because it does not use a simple solid line'
                    )
                    continue
                Arrows_elem = PolyLineEdge_elem.find(Arrows_tag)
                arrow_source = Arrows_elem.attrib['source']
                arrow_target = Arrows_elem.attrib['target']
                end_kinds.add(arrow_source)
                end_kinds.add(arrow_target)
                logger.debug(
                    f'{i(1)}arrows: source={arrow_source} target={arrow_target}'
                )
                kind_source = arrow_source
                kind_target = arrow_target
                is_defining = False

                if arrow_source == 'white_delta':
                    logger.debug(
                        f"{i(1)}inside branch: arrow_source == 'white_delta'")
                    assert arrow_target == 'none', 'Unexpected edge target {arrow_target} for arrow source {arrow_source}'
                    kind_source = 'base_class'
                    kind_target = 'subclass'
                    is_defining = True
                if arrow_target == 'white_delta':
                    logger.debug(
                        f"{i(1)}inside branch: arrow_target == 'white_delta'")
                    assert arrow_source == 'none', 'Unexpected edge source {arrow_source} for arrow target {arrow_target}'
                    kind_target = 'base_class'
                    kind_source = 'subclass'
                    is_defining = True

                if arrow_source == 'crows_foot_one':
                    kind_source = 'one'
                if arrow_target == 'crows_foot_one':
                    kind_target = 'one'

                if arrow_source == 'crows_foot_one_optional':
                    kind_source = 'zero_or_one'
                if arrow_target == 'crows_foot_one_optional':
                    kind_target = 'zero_or_one'

                if arrow_source == 'crows_foot_many_optional':
                    kind_source = 'zero_or_more'
                if arrow_target == 'crows_foot_many_optional':
                    kind_target = 'zero_or_more'

                relationship = {
                    "relationship": {
                        "participants": [{
                            "name": entity_source,
                            "kind": kind_source
                        }, {
                            "name": entity_target,
                            "kind": kind_target
                        }]
                    }
                }
                if edge_LineStyle_width == '3.0':  # make more general
                    if kind_source != 'one' and kind_target != 'one':
                        print(
                            f'\nERROR: Expected an end of a defining relationship to have a cardinality of "one".  '
                            f'Instead, found cardinalities of "{kind_source}" for entity "{entity_source}" '
                            f'and "{kind_target}" for entity "{entity_target}".'
                        )
                        sys.exit(1)
                    is_defining = True
                if is_defining:
                    relationship['relationship'].update({'defining': 'true'})
                logger.debug(f'{i(1)}new relationship: {relationship}')
                er_relationships.append(relationship)
            else:
                logger.debug(
                    f'Skipping a non-PolyLineEdge: data_subelem.tag={data_subelem.tag}'
                )
                pass  # Ignoring other kinds of edges
        else:
            assert False, f'Expected either a node or an edge, found: {graph_child.tag}'

    er.update({"entities": er_entities})
    er.update({"relationships": er_relationships})
    er.update({"enums": er_enums})
    print(yaml.dump(er), file=output_object)
    logger.debug(f'relationship end kinds: {end_kinds}')
    logger.debug('Leaving generml()')
Exemplo n.º 13
0
def build_entity_parents_and_children(er_yaml):
    '''
    Build parents and children for each entity

    Relationship kinds:
     base_class      parent
     one             parent
     subclass        child
     zero_or_more    child
     zero_or_one     parent
    '''
    logger.debug('Entering build_entity_parents_and_children()')
    entities_pc = {}
    for relationship_outer in er_yaml['relationships']:
        logger.debug(f'relationship_outer={relationship_outer}')
        relationship = relationship_outer['relationship']
        logger.debug(f'relationship={relationship}')
        is_defining = False
        if 'defining' in relationship:
            if relationship['defining'] == 'true':
                is_defining = True
        logger.debug(f'is_defining={is_defining}')
        participants = relationship['participants']
        logger.debug(f'participants={participants}')
        assert cardinality.count(participants) == 2
        for participant_index, participant in enumerate(participants):
            logger.debug(
                f'{i(1)}participant_index={participant_index} participant={participant}'
            )
            other_participant_index = 1 if participant_index == 0 else 0
            participant_name = participant['name']
            participant_kind = participant['kind']
            logger.debug(
                f'{i(2)}participant_name={participant_name} participant_kind={participant_kind}'
            )
            other_participant = participants[other_participant_index]
            logger.debug(
                f'{i(2)}other_participant_index={other_participant_index} other_participant={other_participant}'
            )
            other_participant_name = other_participant['name']
            other_participant_kind = other_participant['kind']
            logger.debug(
                f'{i(2)}other_participant_name={other_participant_name} other_participant_kind={other_participant_kind}'
            )
            if participant_name in entities_pc:
                logger.debug(f'{i(2)}Using existing participating_entity_pc')
                participating_entity_pc = entities_pc[participant_name]
            else:
                logger.debug(f'{i(2)}Making new participating_entity_pc')
                participating_entity_pc = {}
                entities_pc.update({participant_name: participating_entity_pc})
            logger.debug(
                f'{i(2)}participating_entity_pc={participating_entity_pc}')
            if participant_kind in ['zero_or_more', 'subclass']:
                logger.debug(
                    f"{i(2)}TRUE: participant_kind in ['zero_or_more', 'subclass']"
                )
                if participant_kind == 'zero_or_more' and other_participant_kind == 'zero_or_more':
                    logger.debug(
                        'Skipping many-to-many relationship as it is handled elsewhere'
                    )
                    continue
                if 'parents' in participating_entity_pc:
                    logger.debug(
                        f'{i(2)}Using existing participating_entity_pc_parents'
                    )
                    participating_entity_pc_parents = participating_entity_pc[
                        'parents']
                else:
                    logger.debug(
                        f'{i(2)}Making new participating_entity_pc_parents')
                    participating_entity_pc_parents = []
                    participating_entity_pc.update(
                        {'parents': participating_entity_pc_parents})
                participating_entity_pc_parents.append({
                    other_participant_name: {
                        'kind': other_participant_kind,
                        'defining': is_defining
                    }
                })
                logger.debug(
                    f'{i(2)}participating_entity_pc_parents={participating_entity_pc_parents}'
                )
            elif participant_kind in ['one', 'zero_or_one', 'base_class']:
                logger.debug(
                    f"{i(2)}TRUE: participant_kind in ['one', 'zero_or_one', 'base_class']"
                )
                if 'children' in participating_entity_pc:
                    logger.debug(
                        f'{i(2)}Using existing participating_entity_pc_children'
                    )
                    participating_entity_pc_children = participating_entity_pc[
                        'children']
                else:
                    logger.debug(
                        f'{i(2)}Making new participating_entity_pc_children')
                    participating_entity_pc_children = []
                    participating_entity_pc.update(
                        {'children': participating_entity_pc_children})
                participating_entity_pc_children.append({
                    other_participant_name: {
                        'kind': other_participant_kind,
                        'defining': is_defining
                    }
                })
                logger.debug(
                    f'{i(2)}participating_entity_pc_children={participating_entity_pc_children}'
                )
            else:
                assert False
    logger.debug('Leaving build_entity_parents_and_children()')
    return entities_pc
Exemplo n.º 14
0
def test_count_non_iterable():
    with pytest.raises(TypeError) as e:
        cardinality.count(object())
    assert 'is not iterable' in str(e.value)
Exemplo n.º 15
0
def test_count():
    assert cardinality.count([1, 2]) == 2
    assert cardinality.count(generate(0)) == 0
    assert cardinality.count(generate(3)) == 3
    assert cardinality.count(dict()) == 0
Exemplo n.º 16
0
def get_field_count(format_string):
    fmt = string.Formatter()
    return cardinality.count(t for t in fmt.parse(format_string)
                             if t[1] is not None)
Exemplo n.º 17
0
def generate_entities(er_yaml, output_object):
    '''
    Generate the data catalog info for entity tables
    '''
    logger.debug('Entering generate_entities()')
    # Topologically sort the entities (so we can get the synthesized many-to-many mapping tables)
    graph, dependency_ordering, mm_synthesized = topological_sort_entities(
        er_yaml)
    logger.debug(f'graph={graph}')
    logger.debug(f'dependency_ordering={dependency_ordering}')
    logger.debug(f'mm_synthesized={mm_synthesized}')

    entities_pc = build_entity_parents_and_children(er_yaml)
    logger.debug(
        f'after build_entity_parents_and_children(): entities_pc={json.dumps(entities_pc, indent=4)}'
    )

    entities = er_yaml['entities']
    logger.debug(f'entities={yaml.dump(entities)}')

    # Index the entities
    entity_indices = {}
    for entity_index, entity_outer in enumerate(entities):
        entity = entity_outer['entity']
        logger.debug(
            f'entity_index={entity_index} for entity:\n{yaml.dump(entity)}')
        entity_indices.update({entity['name']: entity_index})
    logger.debug(f'entity_indices=\n{json.dumps(entity_indices, indent=4)}')

    # Generate catalog info for entities
    klist = list(entity_indices.keys()).copy()
    klist.sort()
    for entity_name in klist:
        entity_index = entity_indices[entity_name]
        entity_outer = entities[entity_index]
        entity = entity_outer['entity']
        logger.debug(
            f'Generating catalog info for: entity_index={entity_index} entity={entity}'
        )

        print('---', file=output_object)
        print(f'## {entity_name}\n', file=output_object)
        if 'description' in entity:
            print('**Description:**  ', file=output_object)
            entity_description = entity['description']
            for line in entity_description.splitlines():
                print(f'{line}  ', file=output_object)
        if 'note' in entity:
            print('**Note:**  ', file=output_object)
            entity_note = entity['note']
            for line in entity_note.splitlines():
                print(f'{line}  ', file=output_object)
        if 'attributes' in entity:
            if 'description' in entity or 'note' in entity:
                print(file=output_object)
            print('### Columns:', file=output_object)
            print(f'\nNum | Name | Type | Unique | Description | Note',
                  file=output_object)
            print(f'--- | ---- | ---- | ------ | ----------- | ----',
                  file=output_object)
            for ordinal, attr_items in enumerate(entity['attributes'].items()):
                attr_name = attr_items[0]
                attr_details = attr_items[1]
                logger.debug(
                    f'{i(1)}attr_name={attr_name} attr_details={attr_details}')
                attr_type = attr_details[
                    'type'] if 'type' in attr_details else ''
                attr_unique = attr_details[
                    'unique'] if 'unique' in attr_details else ''
                attr_description = attr_details[
                    'description'] if 'description' in attr_details else ''
                attr_note = attr_details[
                    'note'] if 'note' in attr_details else ''
                print(
                    f'{ordinal+1} | {attr_name} | {attr_type} | {attr_unique} | {attr_description} | {attr_note}',
                    file=output_object)

        # Generate relationships section
        parents_count = 0
        children_count = 0
        mm_count = 0
        if entity_name in entities_pc:
            entity_pc = entities_pc[entity_name]
            if 'parents' in entity_pc:
                parents = entity_pc['parents']
                parents_count = cardinality.count(parents)
            if 'children' in entity_pc:
                children = entity_pc['children']
                children_count = cardinality.count(children)
        mm_participating = set()
        for mm in mm_synthesized:
            if entity_name in graph[mm]:
                mm_participating.add(mm)
        mm_count = cardinality.count(mm_participating)
        logger.debug(f'mm_participating={mm_participating}')
        logger.debug(
            f'parents_count={parents_count} children_count={children_count} mm_count={mm_count}'
        )
        if (parents_count >= 1 or children_count >= 1 or mm_count >= 1) and \
            ('description' in entity or 'note' in entity or 'attributes' in entity):
            print(file=output_object)
        if parents_count >= 1 or children_count >= 1:
            print('### Relationships:', file=output_object)
        if parents_count >= 1:
            print('#### Parents', file=output_object)
            print('Name | Kind | Defining', file=output_object)
            print('---- | ---- | --------', file=output_object)
            for parent in parents:
                assert cardinality.count(parent) == 1
                for parent_name, parent_details in parent.items():
                    pass
                logger.debug(
                    f'parent_name={parent_name} parent_details={parent_details}'
                )
                relationship_kind = parent_details['kind']
                is_defining = parent_details[
                    'defining'] if 'defining' in parent_details else False
                print(f'{parent_name} | {relationship_kind} | {is_defining}',
                      file=output_object)
        if children_count >= 1:
            print('#### Children', file=output_object)
            print('Name | Kind | Defining', file=output_object)
            print('---- | ---- | --------', file=output_object)
            for child in children:
                assert cardinality.count(child) == 1
                for child_name, child_details in child.items():
                    pass
                logger.debug(
                    f'child_name={child_name} child_details={child_details}')
                relationship_kind = child_details['kind']
                is_defining = child_details[
                    'defining'] if 'defining' in child_details else False
                print(f'{child_name} | {relationship_kind} | {is_defining}',
                      file=output_object)
        if mm_count >= 1:
            print('#### Many-to-Many Relationships', file=output_object)
            print('Other Entity Name | Kind', file=output_object)
            print('----------------- | ----', file=output_object)
            for mm in mm_participating:
                for participant in graph[mm]:
                    if participant == entity_name:
                        continue
                    print(f'{participant} | zero_or_more', file=output_object)
        print(file=output_object)
    logger.debug('Leaving generate_entities()')
                X_Y = list()
                for i in json.loads(line).values()[0]:
                    list_of_zeros = [0.0] * len(unique_item_id)
                    for j in i:
                        list_of_zeros[j] = 1.0
                    X_Y.append(list_of_zeros)
                L = len(json.loads(line).values()[0])
                for j in range(L):
                    X = X_Y[j]
                    yield (np.array([X]), np.array([X]))

uid_train, uid_test = u_c_id_for_train_test(1)
print(len(uid_train))
print(len(uid_test))

print(cardinality.count(x_y_train_ae(uid_train)))

ae = Sequential()
inputLayer = Dense(100, input_shape=(len(unique_item_id),), activation='tanh')
ae.add(inputLayer)
output = Dense(len(unique_item_id), activation='sigmoid')
ae.add(output)
ae.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])
ae.fit_generator(x_y_train_ae(uid_train), samples_per_epoch=cardinality.count(x_y_train_ae(uid_train)), nb_epoch=200)

w1 = ae.layers[0].get_weights()[0]
b1 = ae.layers[0].get_weights()[1]
w2 = ae.layers[1].get_weights()[0]
b2 = ae.layers[1].get_weights()[1]
pickle.dump(w1, open("w1.p", "wb"))
pickle.dump(b1, open("b1.p", "wb"))