def jaccard(a, b, graph): neib_a = set(list(graph.neighbors(a))) neib_b = set(list(graph.neighbors(b))) lis_2a = [] lis_2b = [] for x in list(neib_a): lis_2a += (list(graph.neighbors(x))) for x in list(neib_b): lis_2b += (list(graph.neighbors(x))) union_a = set(list(neib_b) + lis_2a) union_b = set(list(neib_a) + lis_2b) common_a = set(lis_2a).intersection(neib_b) common_b = set(lis_2b).intersection(neib_a) try: jac_a = (cardinality.count(common_a)) / (cardinality.count(union_a)) except ZeroDivisionError: jac_a = 0 try: jac_b = (cardinality.count(common_b)) / (cardinality.count(union_b)) except ZeroDivisionError: jac_b = 0 return jac_a, jac_b, (jac_a + jac_b) / 2
def check_volume2(min_vec, prim_vec, latt): """ To check if input cell is primitive or not input : min_vec : minimum vetors of an atom prim_vec : basic vectors of primivtive cell latt : lattice vectors return: flage3 : True: input cell is primitive min_prim_latt: """ tp_vec = np.transpose(min_vec) # tp_vec : vertical inv_tp_vec = np.linalg.inv(tp_vec) #print(inv_tp_vec) for i in range(3): for j in range(3): if inv_tp_vec[i, j] < 0.0: inv_tp_vec[i, j] = int(inv_tp_vec[i, j] - 0.5) else: inv_tp_vec[i, j] = int(inv_tp_vec[i, j] + 0.5) print('inv_tp_vec:\n', np.linalg.det(inv_tp_vec)) #print(inv_tp_vec) if abs(np.linalg.det(inv_tp_vec)) == (cd.count(prim_vec) - 2): print('Found') else: print('Warning! Not completely found primitive vectors!') # min_prim_latt = np.dot(latt, np.linalg.inv(inv_tp_vec)) # inv_tp_vec : vertical # min_prim_latt : horizontal min_prim_latt = np.dot(np.transpose(np.linalg.inv(inv_tp_vec)), latt) flag3 = 'Found' return flag3, min_prim_latt
def setitem( path: RawPath, value: V, collection: MutableCollection ) -> MutableCollection: """ Sets the value at path of collection. If a portion of path doesn't exist, it's created. """ path = to_path(path) clone = copy(collection) key = head(path) if count(path) == 1: _safe_setitem(key, value, clone) else: try: sub = collection[key] except KeyError: if isinstance(path[1], int): sub = [] else: sub = {} except IndexError: for i in range(len(clone), key + 1): clone.insert(i, None) if isinstance(path[1], int): sub = [] else: sub = {} clone[key] = setitem(path[1:], value, sub) return clone
def check_volume2(min_vec, prim_vec, latt): tp_vec = np.transpose(min_vec) # // tp_vec : vertical inv_tp_vec = np.linalg.inv(tp_vec) #print(inv_tp_vec) for i in range(3): for j in range(3): if inv_tp_vec[i, j] < 0.0: inv_tp_vec[i, j] = int(inv_tp_vec[i, j] - 0.5) else: inv_tp_vec[i, j] = int(inv_tp_vec[i, j] + 0.5) print('here:\n', np.linalg.det(inv_tp_vec)) #print(inv_tp_vec) if abs(np.linalg.det(inv_tp_vec)) == (cd.count(prim_vec) - 2): print('Found') else: print('Warning! Not completely found primitive vectors!') # min_prim_latt = np.dot(latt, np.linalg.inv(inv_tp_vec)) # // tp_vec : vertical min_prim_latt = np.dot(np.transpose(np.linalg.inv(inv_tp_vec)), latt) # // min_prim_latt : horizontal flag3 = 'Found' return flag3, min_prim_latt
def generate_foreign_keys(parents, num_parents, num_attributes, output_object): ''' Generate DDL for foreign keys ''' logger.debug('Entering generate_foreign_keys()') if num_parents >= 1: logger.debug('Generating DDL for foreign keys...') logger.debug(f'parents=\n{json.dumps(parents, indent=4)}') for parent_num, parent in enumerate(parents): logger.debug(f'{i(1)}parent_num={parent_num} parent={parent}') assert cardinality.count(parent) == 1 for parent_name, parent_vals in parent.items(): pass parent_kind = parent_vals['kind'] is_defining = False if 'defining' in parent_vals: if parent_vals['defining'] == True: is_defining = True logger.debug(f'{i(1)}is_defining={is_defining}') column_line = f'{i(1)}{"fk_" + parent_name} uuid ' if parent_kind in ['one', 'base_class']: column_line += 'not null ' column_line += f'references {parent_name}(pk)' if is_defining: column_line += ' on delete cascade' elif parent_kind == 'zero_or_one': column_line += ' on delete set null' logger.debug(f'{i(1)}column_line={column_line}') if parent_num < num_parents - 1 or num_attributes > 0: column_line += ',' logger.debug(f'column_line={column_line}') print(f'{column_line}', file=output_object) logger.debug('Leaving generate_foreign_keys()')
def check_volume1(prim_vec, latt): initial_vol = abs(np.linalg.det(latt)) vol_tolerance = 0.00001 tmp_latt = np.zeros((3, 3)) min_vec = np.zeros((3, 3)) flag3 = 'Not Found' for i in range(0, len(prim_vec) - 2): for j in range(i + 1, len(prim_vec) - 1): for k in range(j + 1, len(prim_vec)): if flag3 == 'Not Found': tmp_latt[0] = np.dot(prim_vec[i], latt) tmp_latt[1] = np.dot(prim_vec[j], latt) tmp_latt[2] = np.dot(prim_vec[k], latt) tmp_vol = abs(np.linalg.det(tmp_latt)) #print('tmp_latt:\n', tmp_latt) #print('tmpvol:\n', tmp_vol) #print('initvol:\n', initial_vol) #print('cardinality\n', cd.count(prim_vec)) if tmp_vol > vol_tolerance: v = initial_vol / tmp_vol if v < 0.0: v = int(v - 0.5) else: v = int(v + 0.5) if v == ( cd.count(prim_vec) - 2 ): #primitive satisfy, but convential one not satisify min_vec[0] = prim_vec[ i] # coventional smallset is ~32. intial vol~127.8 if(127/32), the size~4 min_vec[1] = prim_vec[j] min_vec[2] = prim_vec[k] return check_volume2(min_vec, prim_vec, latt) #flag3 = 'Found' return flag3, None # flag3 = 'Not Found'
def mean(iterable: Iterable[Number]) -> float: """ Computes the mean of the values in iterable. """ to_sum, to_count = tee(iterable) _sum = reduce(add, to_sum) return _sum / count(to_count)
def commonneigh(a, b, graph): neib_a = set(list(graph.neighbors(a))) neib_b = set(list(graph.neighbors(b))) lis_2a = [] lis_2b = [] for x in list(neib_a): lis_2a += (list(graph.neighbors(x))) for x in list(neib_b): lis_2b += (list(graph.neighbors(x))) union_a = set(list(neib_b) + lis_2a) union_b = set(list(neib_a) + lis_2b) common_a = set(lis_2a).intersection(neib_b) common_b = set(lis_2b).intersection(neib_a) return cardinality.count(common_a), cardinality.count(common_b), ( cardinality.count(common_a) + cardinality.count(common_b)) / 2
def delitem(path: RawPath, collection: MutableCollection) -> MutableCollection: """ Deletes given path from collection """ path = to_path(path) clone = copy(collection) key = head(path) if count(path) == 1: del clone[key] else: clone[key] = delitem(path[1:], collection[key]) return clone
def generate_entity_comments(entity_name, entities, entity_indices, entities_pc, output_object): ''' Handle entity description and note ''' logger.debug('Entering generate_entity_comments()') entity_index = entity_indices[entity_name] entity = entities[entity_index]['entity'] logger.debug(f'entity=\n{yaml.dump(entity)}') num_parents = 0 entity_pc = entities_pc[entity_name] logger.debug(f'{i(1)}entity_pc={entity_pc}') parents = None if 'parents' in entity_pc: parents = entity_pc['parents'] num_parents = cardinality.count(parents) num_attributes = 0 attributes = None if 'attributes' in entity: attributes = entity['attributes'] num_attributes = cardinality.count(attributes) logger.debug(f'num_parents={num_parents} num_attributes={num_attributes}') if 'description' in entity: print('-- Description:', file=output_object) table_description = entity['description'] for line in table_description.splitlines(): print(f'-- {line}', file=output_object) if 'note' in entity: if 'description' in entity: print(file=output_object) print('-- Note:', file=output_object) table_note = entity['note'] for line in table_note.splitlines(): print(f'-- {line}', file=output_object) print(file=output_object) logger.debug('Leaving generate_entity_comments()') return entity, parents, num_parents, attributes, num_attributes
def generate_mm_synthesized(entity_name, graph, output_object): ''' Generate DDL for synthesized many-to-many mapping table Assumes synthesized many-to-many mapping tables have no attributes This may change with future enhancement ''' logger.debug('Entering generate_mm_synthesized()') graph_dependees = graph[entity_name] logger.debug(f'{i(1)}graph_dependees={graph_dependees}') print(f'create table {entity_name} (', file=output_object) print(f'{i(1)}pk uuid not null default gen_random_uuid() primary key,', file=output_object) num_parents = cardinality.count(graph_dependees) for dependee_num, dependee in enumerate(graph_dependees): column_line = f'{i(1)}fk_{dependee} uuid not null references {dependee}(pk) on delete cascade' if dependee_num < num_parents - 1: column_line += ',' print(column_line, file=output_object) print(');\n', file=output_object) logger.debug('Leaving generate_mm_synthesized()')
def generml(input_file_or_object, input, output_object): ''' Generally-callable entry point to read an Entity-Relationship diagram created by the yEd graph editor and convert it into Entity-Relationship Markup Language \b References: yEd - https://www.yworks.com/products/yed GraphML - http://graphml.graphdrawing.org/index.html ''' logger.debug('Entering generml()') graph_tag = '{http://graphml.graphdrawing.org/xmlns}graph' node_tag = '{http://graphml.graphdrawing.org/xmlns}node' edge_tag = '{http://graphml.graphdrawing.org/xmlns}edge' data_tag = '{http://graphml.graphdrawing.org/xmlns}data' GenericNode_tag = '{http://www.yworks.com/xml/graphml}GenericNode' BorderStyle_tag = '{http://www.yworks.com/xml/graphml}BorderStyle' PolyLineEdge_tag = '{http://www.yworks.com/xml/graphml}PolyLineEdge' NodeLabel_tag = '{http://www.yworks.com/xml/graphml}NodeLabel' LineStyle_tag = '{http://www.yworks.com/xml/graphml}LineStyle' Arrows_tag = '{http://www.yworks.com/xml/graphml}Arrows' NodeLabel_attr_configuration_name = 'com.yworks.entityRelationship.label.name' NodeLabel_attr_configuration_attributes = 'com.yworks.entityRelationship.label.attributes' GenericNode_attr_configuration_BigEntity = 'com.yworks.entityRelationship.big_entity' logger.debug('before parse()') tree = ET.parse(input_file_or_object) logger.debug('after parse()') root = tree.getroot() logger.debug('Printing Entity-Relationship Markup Language') er_head = { "source": 'stdin' if input == '-' else input, "generated_datetime": datetime.datetime.utcnow().isoformat() } print(yaml.dump(er_head), file=output_object) er = {} end_kinds = set() # delete after debugging done er_entities = [] er_enums = [] er_relationships = [] name_set = set() # To prevent duplicate entity or enum names ignored_entity_node_ids = set( ) # So you can ignore relationships to ignored entities node_id_to_entity_name = {} graph_elem = root.find(graph_tag) assert graph_elem is not None, 'Expected graph tag is not present' for graph_child in graph_elem: logger.debug( f'Next graph_child: tag={strip_namespace(graph_child.tag)}') logger.debug(ET.tostring(graph_child, encoding='utf8').decode('utf8')) continue_graph_elem_loop = False # We only care about nodes and edges if graph_child.tag != node_tag and graph_child.tag != edge_tag: logger.debug( f'Skipping non-node/non-edge graph_child.tag={graph_child.tag}' ) continue number_children_graph_child = cardinality.count(graph_child) # violated by yEd's default (unedited) entity: assert number_children_graph_child == 1, "Expected the graph element's child to have only one child" # The data element is a child of both node and edge elements data_elem = graph_child.find(data_tag) number_children_data = cardinality.count(data_elem) assert number_children_data == 1, 'Expected the data element to have only 1 child' data_subelem = data_elem[0] if graph_child.tag == node_tag: logger.debug('Found a node') node_elem = graph_child node_id = node_elem.attrib['id'] if data_subelem.tag == GenericNode_tag: GenericNode_elem = data_subelem assert GenericNode_elem.attrib['configuration'] == GenericNode_attr_configuration_BigEntity, \ 'Expected the generic node "configuration" attribute to indicate a BigEntity' logger.debug(f'GraphML entity node {node_id}:') for GenericNode_subelem in GenericNode_elem: logger.debug( f'{i(1)}Found a GenericNode_subelem, tag={strip_namespace(GenericNode_subelem.tag)}' ) if GenericNode_subelem.tag == NodeLabel_tag: logger.debug( f'{i(1)}The GenericNode_subelem is a NodeLabel') NodeLabel_elem = GenericNode_subelem NodeLabel_attr_configuration = NodeLabel_elem.attrib[ 'configuration'] if NodeLabel_attr_configuration == NodeLabel_attr_configuration_name: entity_name = NodeLabel_elem.text logger.debug(f'{i(1)}entity_name={entity_name}') if entity_name in name_set: print( f'\nERROR: Duplicate name specified: {entity_name}', file=sys.stderr) sys.exit(1) else: name_set.add(entity_name) elif NodeLabel_attr_configuration == NodeLabel_attr_configuration_attributes: entity_attributes = NodeLabel_elem.text logger.debug( f'{i(1)}entity_attributes={entity_attributes}') else: # The configuration attribute can have only 2 values assert False, \ f'''Got an unexpected value for the "configuration" attribute of the ''' f'''node label element: {NodeLabel_attr_configuration}''' elif GenericNode_subelem.tag == BorderStyle_tag: logger.debug( f"{i(1)}GenericNode_subelem.attrib['type']={GenericNode_subelem.attrib['type']}" ) if GenericNode_subelem.attrib['type'] != 'line': logger.debug( f'{i(1)}Ignoring entity because the border is not a simple solid line' ) ignored_entity_node_ids.add( node_id ) # So we can also ignore any edges to ignored entities continue_graph_elem_loop = True break else: logger.debug( f'{i(1)}Skipping a non-label/non-border-style: GenericNode_subelem.tag={GenericNode_subelem.tag}' ) pass if continue_graph_elem_loop: continue # Now that we have an entity name and attributes, process the attributes logger.debug(f'{i(1)}name: {entity_name}') try: yaml_attrs = yaml.safe_load(entity_attributes) except (yaml.scanner.ScannerError, yaml.parser.ParserError) as ex: print(f'\nERROR: Invalid YAML (syntax) for attributes section of ' \ f'the "{entity_name}" entity:\n\n' \ f'BEGIN>>>\n{entity_attributes}\n<<<END\n\n' \ f'ERROR DETAILS:\n{ex}\n', file=sys.stderr) sys.exit(1) if yaml_attrs is None: pass else: logger.debug(f'{i(1)}YAML attributes:\n' + \ yaml.dump(yaml_attrs, default_flow_style=False)) try: json_schema = json_schema_graphml_enum if entity_name.lower().startswith('enum') \ else json_schema_graphml_entity_attributes jsonschema.validate(instance=yaml_attrs, schema=json_schema) except jsonschema.exceptions.ValidationError as ex: print(f'\nERROR: Invalid YAML (schema) for attributes section of ' \ f'the "{entity_name}" entity:\n\n' \ f'BEGIN>>>\n{entity_attributes}\n<<<END\n\n' \ f'ERROR DETAILS:\n{ex}\n', file=sys.stderr) sys.exit(1) if entity_name.lower().startswith('enum'): enum_contents = {} if yaml_attrs is None \ else yaml_attrs if type(yaml_attrs) == type({}) \ else { "values": yaml_attrs } if type(yaml_attrs) == type([]) \ else None assert enum_contents is not None, 'Unexpected contents for enum entity' enum_contents.update({"name": entity_name}) enum = {"enum": enum_contents} er_enums.append(enum) else: entity_contents = {} if yaml_attrs is None else yaml_attrs entity_contents.update({"name": entity_name}) entity = {"entity": entity_contents} er_entities.append(entity) node_id_to_entity_name.update({node_id: entity_name}) else: logger.debug( f'Skipping a non-GenericNode: data_subelem.tag={data_subelem.tag}' ) pass # Ignoring other kinds of nodes elif graph_child.tag == edge_tag: edge_elem = graph_child edge_id = edge_elem.attrib['id'] logger.debug(f'Relationship {edge_id}') edge_source = edge_elem.attrib['source'] edge_target = edge_elem.attrib['target'] if edge_source in ignored_entity_node_ids: logger.debug( f'{i(1)}Ignoring relationship because source connects to an ignored entity. ' f'edge_source={edge_source}') continue if edge_target in ignored_entity_node_ids: logger.debug( f'{i(1)}Ignoring relationship because target connects to an ignored entity. ' f'edge_target={edge_target}') continue entity_source = node_id_to_entity_name[edge_source] entity_target = node_id_to_entity_name[edge_target] logger.debug( f'{i(1)}edge_source={edge_source}\tentity_source={entity_source}' ) logger.debug( f'{i(1)}edge_target={edge_target}\tentity_target={entity_target}' ) if data_subelem.tag == PolyLineEdge_tag: PolyLineEdge_elem = data_subelem LineStyle_elem = PolyLineEdge_elem.find(LineStyle_tag) edge_LineStyle_width = LineStyle_elem.attrib['width'] edge_LineStyle_type = LineStyle_elem.attrib['type'] logger.debug( f'{i(1)}edge_LineStyle_width={edge_LineStyle_width} edge_LineStyle_type={edge_LineStyle_type}' ) if edge_LineStyle_type != 'line': logger.debug( f'{i(1)}Ignoring relationship because it does not use a simple solid line' ) continue Arrows_elem = PolyLineEdge_elem.find(Arrows_tag) arrow_source = Arrows_elem.attrib['source'] arrow_target = Arrows_elem.attrib['target'] end_kinds.add(arrow_source) end_kinds.add(arrow_target) logger.debug( f'{i(1)}arrows: source={arrow_source} target={arrow_target}' ) kind_source = arrow_source kind_target = arrow_target is_defining = False if arrow_source == 'white_delta': logger.debug( f"{i(1)}inside branch: arrow_source == 'white_delta'") assert arrow_target == 'none', 'Unexpected edge target {arrow_target} for arrow source {arrow_source}' kind_source = 'base_class' kind_target = 'subclass' is_defining = True if arrow_target == 'white_delta': logger.debug( f"{i(1)}inside branch: arrow_target == 'white_delta'") assert arrow_source == 'none', 'Unexpected edge source {arrow_source} for arrow target {arrow_target}' kind_target = 'base_class' kind_source = 'subclass' is_defining = True if arrow_source == 'crows_foot_one': kind_source = 'one' if arrow_target == 'crows_foot_one': kind_target = 'one' if arrow_source == 'crows_foot_one_optional': kind_source = 'zero_or_one' if arrow_target == 'crows_foot_one_optional': kind_target = 'zero_or_one' if arrow_source == 'crows_foot_many_optional': kind_source = 'zero_or_more' if arrow_target == 'crows_foot_many_optional': kind_target = 'zero_or_more' relationship = { "relationship": { "participants": [{ "name": entity_source, "kind": kind_source }, { "name": entity_target, "kind": kind_target }] } } if edge_LineStyle_width == '3.0': # make more general if kind_source != 'one' and kind_target != 'one': print( f'\nERROR: Expected an end of a defining relationship to have a cardinality of "one". ' f'Instead, found cardinalities of "{kind_source}" for entity "{entity_source}" ' f'and "{kind_target}" for entity "{entity_target}".' ) sys.exit(1) is_defining = True if is_defining: relationship['relationship'].update({'defining': 'true'}) logger.debug(f'{i(1)}new relationship: {relationship}') er_relationships.append(relationship) else: logger.debug( f'Skipping a non-PolyLineEdge: data_subelem.tag={data_subelem.tag}' ) pass # Ignoring other kinds of edges else: assert False, f'Expected either a node or an edge, found: {graph_child.tag}' er.update({"entities": er_entities}) er.update({"relationships": er_relationships}) er.update({"enums": er_enums}) print(yaml.dump(er), file=output_object) logger.debug(f'relationship end kinds: {end_kinds}') logger.debug('Leaving generml()')
def build_entity_parents_and_children(er_yaml): ''' Build parents and children for each entity Relationship kinds: base_class parent one parent subclass child zero_or_more child zero_or_one parent ''' logger.debug('Entering build_entity_parents_and_children()') entities_pc = {} for relationship_outer in er_yaml['relationships']: logger.debug(f'relationship_outer={relationship_outer}') relationship = relationship_outer['relationship'] logger.debug(f'relationship={relationship}') is_defining = False if 'defining' in relationship: if relationship['defining'] == 'true': is_defining = True logger.debug(f'is_defining={is_defining}') participants = relationship['participants'] logger.debug(f'participants={participants}') assert cardinality.count(participants) == 2 for participant_index, participant in enumerate(participants): logger.debug( f'{i(1)}participant_index={participant_index} participant={participant}' ) other_participant_index = 1 if participant_index == 0 else 0 participant_name = participant['name'] participant_kind = participant['kind'] logger.debug( f'{i(2)}participant_name={participant_name} participant_kind={participant_kind}' ) other_participant = participants[other_participant_index] logger.debug( f'{i(2)}other_participant_index={other_participant_index} other_participant={other_participant}' ) other_participant_name = other_participant['name'] other_participant_kind = other_participant['kind'] logger.debug( f'{i(2)}other_participant_name={other_participant_name} other_participant_kind={other_participant_kind}' ) if participant_name in entities_pc: logger.debug(f'{i(2)}Using existing participating_entity_pc') participating_entity_pc = entities_pc[participant_name] else: logger.debug(f'{i(2)}Making new participating_entity_pc') participating_entity_pc = {} entities_pc.update({participant_name: participating_entity_pc}) logger.debug( f'{i(2)}participating_entity_pc={participating_entity_pc}') if participant_kind in ['zero_or_more', 'subclass']: logger.debug( f"{i(2)}TRUE: participant_kind in ['zero_or_more', 'subclass']" ) if participant_kind == 'zero_or_more' and other_participant_kind == 'zero_or_more': logger.debug( 'Skipping many-to-many relationship as it is handled elsewhere' ) continue if 'parents' in participating_entity_pc: logger.debug( f'{i(2)}Using existing participating_entity_pc_parents' ) participating_entity_pc_parents = participating_entity_pc[ 'parents'] else: logger.debug( f'{i(2)}Making new participating_entity_pc_parents') participating_entity_pc_parents = [] participating_entity_pc.update( {'parents': participating_entity_pc_parents}) participating_entity_pc_parents.append({ other_participant_name: { 'kind': other_participant_kind, 'defining': is_defining } }) logger.debug( f'{i(2)}participating_entity_pc_parents={participating_entity_pc_parents}' ) elif participant_kind in ['one', 'zero_or_one', 'base_class']: logger.debug( f"{i(2)}TRUE: participant_kind in ['one', 'zero_or_one', 'base_class']" ) if 'children' in participating_entity_pc: logger.debug( f'{i(2)}Using existing participating_entity_pc_children' ) participating_entity_pc_children = participating_entity_pc[ 'children'] else: logger.debug( f'{i(2)}Making new participating_entity_pc_children') participating_entity_pc_children = [] participating_entity_pc.update( {'children': participating_entity_pc_children}) participating_entity_pc_children.append({ other_participant_name: { 'kind': other_participant_kind, 'defining': is_defining } }) logger.debug( f'{i(2)}participating_entity_pc_children={participating_entity_pc_children}' ) else: assert False logger.debug('Leaving build_entity_parents_and_children()') return entities_pc
def test_count_non_iterable(): with pytest.raises(TypeError) as e: cardinality.count(object()) assert 'is not iterable' in str(e.value)
def test_count(): assert cardinality.count([1, 2]) == 2 assert cardinality.count(generate(0)) == 0 assert cardinality.count(generate(3)) == 3 assert cardinality.count(dict()) == 0
def get_field_count(format_string): fmt = string.Formatter() return cardinality.count(t for t in fmt.parse(format_string) if t[1] is not None)
def generate_entities(er_yaml, output_object): ''' Generate the data catalog info for entity tables ''' logger.debug('Entering generate_entities()') # Topologically sort the entities (so we can get the synthesized many-to-many mapping tables) graph, dependency_ordering, mm_synthesized = topological_sort_entities( er_yaml) logger.debug(f'graph={graph}') logger.debug(f'dependency_ordering={dependency_ordering}') logger.debug(f'mm_synthesized={mm_synthesized}') entities_pc = build_entity_parents_and_children(er_yaml) logger.debug( f'after build_entity_parents_and_children(): entities_pc={json.dumps(entities_pc, indent=4)}' ) entities = er_yaml['entities'] logger.debug(f'entities={yaml.dump(entities)}') # Index the entities entity_indices = {} for entity_index, entity_outer in enumerate(entities): entity = entity_outer['entity'] logger.debug( f'entity_index={entity_index} for entity:\n{yaml.dump(entity)}') entity_indices.update({entity['name']: entity_index}) logger.debug(f'entity_indices=\n{json.dumps(entity_indices, indent=4)}') # Generate catalog info for entities klist = list(entity_indices.keys()).copy() klist.sort() for entity_name in klist: entity_index = entity_indices[entity_name] entity_outer = entities[entity_index] entity = entity_outer['entity'] logger.debug( f'Generating catalog info for: entity_index={entity_index} entity={entity}' ) print('---', file=output_object) print(f'## {entity_name}\n', file=output_object) if 'description' in entity: print('**Description:** ', file=output_object) entity_description = entity['description'] for line in entity_description.splitlines(): print(f'{line} ', file=output_object) if 'note' in entity: print('**Note:** ', file=output_object) entity_note = entity['note'] for line in entity_note.splitlines(): print(f'{line} ', file=output_object) if 'attributes' in entity: if 'description' in entity or 'note' in entity: print(file=output_object) print('### Columns:', file=output_object) print(f'\nNum | Name | Type | Unique | Description | Note', file=output_object) print(f'--- | ---- | ---- | ------ | ----------- | ----', file=output_object) for ordinal, attr_items in enumerate(entity['attributes'].items()): attr_name = attr_items[0] attr_details = attr_items[1] logger.debug( f'{i(1)}attr_name={attr_name} attr_details={attr_details}') attr_type = attr_details[ 'type'] if 'type' in attr_details else '' attr_unique = attr_details[ 'unique'] if 'unique' in attr_details else '' attr_description = attr_details[ 'description'] if 'description' in attr_details else '' attr_note = attr_details[ 'note'] if 'note' in attr_details else '' print( f'{ordinal+1} | {attr_name} | {attr_type} | {attr_unique} | {attr_description} | {attr_note}', file=output_object) # Generate relationships section parents_count = 0 children_count = 0 mm_count = 0 if entity_name in entities_pc: entity_pc = entities_pc[entity_name] if 'parents' in entity_pc: parents = entity_pc['parents'] parents_count = cardinality.count(parents) if 'children' in entity_pc: children = entity_pc['children'] children_count = cardinality.count(children) mm_participating = set() for mm in mm_synthesized: if entity_name in graph[mm]: mm_participating.add(mm) mm_count = cardinality.count(mm_participating) logger.debug(f'mm_participating={mm_participating}') logger.debug( f'parents_count={parents_count} children_count={children_count} mm_count={mm_count}' ) if (parents_count >= 1 or children_count >= 1 or mm_count >= 1) and \ ('description' in entity or 'note' in entity or 'attributes' in entity): print(file=output_object) if parents_count >= 1 or children_count >= 1: print('### Relationships:', file=output_object) if parents_count >= 1: print('#### Parents', file=output_object) print('Name | Kind | Defining', file=output_object) print('---- | ---- | --------', file=output_object) for parent in parents: assert cardinality.count(parent) == 1 for parent_name, parent_details in parent.items(): pass logger.debug( f'parent_name={parent_name} parent_details={parent_details}' ) relationship_kind = parent_details['kind'] is_defining = parent_details[ 'defining'] if 'defining' in parent_details else False print(f'{parent_name} | {relationship_kind} | {is_defining}', file=output_object) if children_count >= 1: print('#### Children', file=output_object) print('Name | Kind | Defining', file=output_object) print('---- | ---- | --------', file=output_object) for child in children: assert cardinality.count(child) == 1 for child_name, child_details in child.items(): pass logger.debug( f'child_name={child_name} child_details={child_details}') relationship_kind = child_details['kind'] is_defining = child_details[ 'defining'] if 'defining' in child_details else False print(f'{child_name} | {relationship_kind} | {is_defining}', file=output_object) if mm_count >= 1: print('#### Many-to-Many Relationships', file=output_object) print('Other Entity Name | Kind', file=output_object) print('----------------- | ----', file=output_object) for mm in mm_participating: for participant in graph[mm]: if participant == entity_name: continue print(f'{participant} | zero_or_more', file=output_object) print(file=output_object) logger.debug('Leaving generate_entities()')
X_Y = list() for i in json.loads(line).values()[0]: list_of_zeros = [0.0] * len(unique_item_id) for j in i: list_of_zeros[j] = 1.0 X_Y.append(list_of_zeros) L = len(json.loads(line).values()[0]) for j in range(L): X = X_Y[j] yield (np.array([X]), np.array([X])) uid_train, uid_test = u_c_id_for_train_test(1) print(len(uid_train)) print(len(uid_test)) print(cardinality.count(x_y_train_ae(uid_train))) ae = Sequential() inputLayer = Dense(100, input_shape=(len(unique_item_id),), activation='tanh') ae.add(inputLayer) output = Dense(len(unique_item_id), activation='sigmoid') ae.add(output) ae.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy']) ae.fit_generator(x_y_train_ae(uid_train), samples_per_epoch=cardinality.count(x_y_train_ae(uid_train)), nb_epoch=200) w1 = ae.layers[0].get_weights()[0] b1 = ae.layers[0].get_weights()[1] w2 = ae.layers[1].get_weights()[0] b2 = ae.layers[1].get_weights()[1] pickle.dump(w1, open("w1.p", "wb")) pickle.dump(b1, open("b1.p", "wb"))