def import_file_h(self, directory, platform, gateway_type): """ Import a .h file (see manifold.metadata/*.h) Args: directory: A String instance containing directory storing the .h files Example: STATIC_ROUTES_DIR = "/usr/share/manifold/metadata/" platform: A String instance containing the name of the platform Examples: "ple", "senslab", "tdmi", "omf", ... gateway_types: A String instnace containing the type of the Gateway Examples: "SFA", "XMLRPC", "MaxMind", "tdmi" See: sqlite3 /var/myslice/db.sqlite > select gateway_type from platform; Returns: A list of Announce instances, each Announce embeds a Table instance. This list may be empty. """ # Check path filename = os.path.join(directory, "%s.h" % gateway_type) if not os.path.exists(filename): filename = os.path.join(directory, "%s-%s.h" % (gateway_type, platform)) if not os.path.exists(filename): Log.debug( "Metadata file '%s' not found (platform = %r, gateway_type = %r)" % (filename, platform, gateway_type)) return [] # Read input file Log.debug("Platform %s: Processing %s" % (platform, filename)) return import_file_h(filename, platform)
def make_sub_graph(metadata, relevant_fields): """ \brief Create a reduced graph based on g. We only keep vertices having a key in relevant_fields \param g A DiGraph instance (the full 3nf graph) \param relevant_fields A dictionnary {Table: Fields} indicating for each Table which Field(s) are relevant. \return The corresponding sub-3nf-graph """ g = metadata.graph sub_graph = DiGraph() copy = dict() vertices_to_keep = set(relevant_fields.keys()) # Copy relevant vertices from g for u in vertices_to_keep: copy_u = Table.make_table_from_fields(u, relevant_fields[u]) copy[u] = copy_u sub_graph.add_node(copy_u) # no data on nodes # Copy relevant arcs from g for u, v in g.edges(): try: copy_u, copy_v = copy[u], copy[v] except: continue sub_graph.add_edge(copy_u, copy_v, deepcopy(g.edge[u][v])) Log.debug("Adding copy of : %s" % metadata.print_arc(u, v)) return sub_graph
def remove_pid_file(self): """ \brief Remove the pid file (internal usage) """ # The lock file is implicitely released while removing the pid file Log.debug("Removing %s" % Options().pid_filename) if os.path.exists(Options().pid_filename) == True: os.remove(Options().pid_filename)
def make_lock_file(self): """ \brief Prepare the lock file required to manage the pid file Initialize Options().lock_file """ if Options().pid_filename and Options().no_daemon == False: Log.debug("Daemonizing using pid file '%s'" % Options().pid_filename) Options().lock_file = lockfile.FileLock(Options().pid_filename) if Options().lock_file.is_locked() == True: log_error("'%s' is already running ('%s' is locked)." % (Options().get_name(), Options().pid_filename)) self.terminate() Options().lock_file.acquire() else: Options().lock_file = None
def make_arc(self, u, v): """ \brief Connect a "u" Table to a "v" Table (if necessary) in the DbGraph \param u The source node (Table instance) \param v The target node (Table instance) """ #----------------------------------------------------------------------- #returns(Predicate) #@accepts(set, Key) def make_predicate(fields_u, key_v): """ \brief Compute the Predicate to JOIN a "u" Table with "v" Table \param fields_u The set of Field of u required to JOIN with v \param key_v The Key of v involved in the JOIN. You may pass None if v has no key. \return This function returns : - either None iif u embeds a set of v instances - either a Predicate instance which indicates how to join u and v """ if len(fields_u) == 1 and list(fields_u)[0].is_array(): # u embed an array of element of type v, so there is # no JOIN and thus no Predicate. # Note that v do not even require to have a key return None # u and v can be joined # This code only support Key made of only one Field assert key_v, "Can't join with None key" assert len(fields_u) == len(key_v), "Can't join fields = %r with key = %r" % (fields_u, key_v) assert len(key_v) == 1, "Composite key not supported: key = %r" % key_v return Predicate( "%s" % list(fields_u)[0].get_name(), "==", "%s" % list(key_v)[0].get_name() ) #----------------------------------------------------------------------- if u == v: return relations = u.get_relations(v, self) if relations: self.graph.add_edge(u, v, relations=relations) Log.debug("NEW EDGE %s" % self.print_arc(u, v)) relations_str = [ r.get_str_type() for r in relations]
def child_callback(self, child_id, record): """ \brief Processes records received by the child node \param child_id identifier of the child that received the record \param record dictionary representing the received record """ if record.is_last(): # XXX SEND ALL self.status.completed(child_id) return key = self.key.get_field_names() # DISTINCT not implemented, just forward the record if not key: Log.critical("No key associated to UNION operator") self.send(record) return # Send records that have no key if not Record.has_fields(record, key): Log.info( "UNION::child_callback sent record without key '%(key)s': %(record)r", **locals()) self.send(record) return key_value = Record.get_value(record, key) if key_value in self.key_map: Log.debug("UNION::child_callback merged duplicate records: %r" % record) prev_record = self.key_map[key_value] for k, v in record.items(): if not k in prev_record: prev_record[k] = v continue if isinstance(v, list): if not prev_record[k]: prev_record[k] = list( ) # with failures it can occur that this is None prev_record[k].extend(v) # DUPLICATES ? #else: # if not v == previous[k]: # print "W: ignored conflictual field" # # else: nothing to do else: self.key_map[key_value] = record
def start_reactor(self): self._num_instances += 1 if self._reactorStarted: Log.debug("Reactor already started") return self._reactorStarted = True # Should not occur if self._reactorRunning: Log.debug("Reactor already running: should not occur") return threading.Thread.start(self) cpt = 0 while not self._reactorRunning: time.sleep(0.1) cpt += 1 if cpt > 5: raise ReactorException, "Reactor thread is too long to start... cancelling" self.reactor.addSystemEventTrigger('after', 'shutdown', self.__reactorShutDown)
def wrap(source, args): #token = yield SFAManageToken().get_token(self.interface) args = (name,) + args printable_args = [] for arg in args: if is_rspec(arg): printable_args.append('<rspec>') elif is_credential(arg): printable_args.append('<credential>') elif is_credential_list(arg): printable_args.append('<credentials>') elif is_user_list(arg): printable_args.append('<user list>') else: printable_args.append(str(arg)) self.started = time.time() self.arg0 = printable_args[0] self.arg1 = printable_args[1:] Log.debug("SFA CALL %s(%s) - interface = %s" % (printable_args[0], printable_args[1:], self.interface)) print("SFA CALL %s(%s) - interface = %s" % (printable_args[0], printable_args[1:], self.interface)) self.proxy.callRemote(*args).addCallbacks(proxy_success_cb, proxy_error_cb)
def proxy_error_cb(error): #SFAManageToken().put_token(self.interface) diff = time.time() - self.started Log.debug('SFA CALL ERROR %s(%s) - interface = %s - execution time = %s sec.' % (self.arg0, self.arg1, self.interface, round(diff,2))) d.errback(ValueError("Error in SFA Proxy %s" % error))
def proxy_success_cb(result): #SFAManageToken().put_token(self.interface) diff = time.time() - self.started Log.debug('SFA CALL SUCCESS %s(%s) - interface = %s - execution time = %s sec.' % (self.arg0, self.arg1, self.interface, round(diff,2))) d.callback(result)
def stop(self): Log.debug("Stopping '%s'" % self.daemon_name)
def build_pruned_tree(metadata, needed_fields, map_vertex_pred): """ \brief Compute the pruned 3-nf tree included in a 3nf-graph g according to a predecessors map modeling a 3-nf tree and a set of need fields. \param g The 3-nf graph \param needed_fields A set of Field instances, queried by the user \param map_vertex_pred The predecessor map related to the tree we are pruning \sa manifold.util.dfs.py \return An instance of networkx.DiGraph representing the pruned 3-nf tree Data related to this graph are copied from g, so it can be safely modified without impacting g. Such graph is typically embedded in a DBGraph instance. \sa manifold.core.dbgraph.py """ Log.debug("-" * 100) Log.debug("Prune useless keys/nodes/arcs from tree") Log.debug("-" * 100) g = metadata.graph (_, relevant_fields, missing_fields) = prune_precedessor_map(metadata, needed_fields, map_vertex_pred) # XXX we don't use predecessor graph for building subgraph, a sign we can simplify here tree = make_sub_graph(metadata, relevant_fields) # Print tree Log.debug("-" * 100) Log.debug("Minimal tree:") Log.debug("-" * 100) for table in tree.nodes(): Log.debug("%s\n" % table) Log.debug("-" * 100) return (tree, missing_fields)
def execute(self, query, params=None, cursor_factory=None): """ Execute a SQL query on PostgreSQL Args: query: a String containing a SQL query params: a dictionnary or None if unused cursor_factory: see http://initd.org/psycopg/docs/extras.html Returns: The corresponding cursor """ # modified for psycopg2-2.0.7 # executemany is undefined for SELECT's # see http://www.python.org/dev/peps/pep-0249/ # accepts either None, a single dict, a tuple of single dict - in which case it execute's # or a tuple of several dicts, in which case it executemany's Log.debug(query) cursor = self.connect(cursor_factory) try: # psycopg2 requires %()s format for all parameters, # regardless of type. # this needs to be done carefully though as with pattern-based filters # we might have percents embedded in the query # so e.g. GetPersons({"email":"*fake*"}) was resulting in .. LIKE "%sake%" if psycopg2: query = re.sub(r"(%\([^)]*\)|%)[df]", r"\1s", query) # rewrite wildcards set by Filter.py as "***" into "%" query = query.replace("***", "%") if not params: cursor.execute(query) elif isinstance(params, StringValue): cursor.execute(query, params) elif isinstance(params, dict): cursor.execute(query, params) elif isinstance(params, tuple) and len(params) == 1: cursor.execute(query, params[0]) else: param_seq = params cursor.executemany(query, param_seq) (self.rowcount, self.description, self.lastrowid) = \ (cursor.rowcount, cursor.description, cursor.lastrowid) except Exception, e: try: self.rollback() except: pass uuid = uuid4() #commands.getoutput("uuidgen") Log.debug("Database error %s:" % uuid) Log.debug(e) Log.debug("Query:") Log.debug(query) Log.debug("Params:") Log.debug(pformat(params)) msg = str(e).rstrip() # jordan raise Exception(self.make_error_message(msg, uuid))
def make_table(self, table_name): """ Build a Table instance according to a given table/view name by quering the PostgreSQL schema. Args: table_name: Name of a view or a relation in PostgreSQL (String instance) Returns: The Table instance extracted from the PostgreSQL schema related to the queried view/relation """ cursor = self.get_cursor() table = Table(self.get_platform(), None, table_name, None, None) param_execute = {"table_name": table_name} # FOREIGN KEYS: # We build a foreign_keys dictionary associating each field of # the table with the table it references. cursor.execute(PostgreSQLGateway.SQL_TABLE_FOREIGN_KEYS, param_execute) fks = cursor.fetchall() foreign_keys = {fk.column_name: fk.foreign_table_name for fk in fks} # COMMENTS: # We build a comments dictionary associating each field of the table with # its comment. comments = self.get_fields_comment(table_name) # FIELDS: fields = set() cursor.execute(PostgreSQLGateway.SQL_TABLE_FIELDS, param_execute) for field in cursor.fetchall(): # PostgreSQL types vs base types table.insert_field( Field(qualifiers=[] if field.is_updatable == "YES" else ["const"], type=foreign_keys[field.column_name] if field.column_name in foreign_keys else PostgreSQLGateway.to_manifold_type(field.data_type), name=field.column_name, is_array=(field.data_type == "ARRAY"), description=comments[field.column_name] if field.column_name in comments else "(null)")) # PRIMARY KEYS: XXX simple key ? # We build a key dictionary associating each table with its primary key cursor.execute(PostgreSQLGateway.SQL_TABLE_KEYS, param_execute) fks = cursor.fetchall() primary_keys = dict() for fk in fks: foreign_key = tuple(fk.column_names) if table_name not in primary_keys.keys(): primary_keys[table_name] = set() primary_keys[table_name].add(foreign_key) if table_name in primary_keys.keys(): for k in primary_keys[table_name]: table.insert_key(k) # PARTITIONS: # TODO #mc = MetadataClass('class', table_name) #mc.fields = fields #mc.keys.append(primary_keys[table_name]) table.capabilities.retrieve = True table.capabilities.join = True table.capabilities.selection = True table.capabilities.projection = True Log.debug("Adding table: %s" % table) return table
def to_3nf(metadata): """ Compute a 3nf schema See also http://elm.eeng.dcu.ie/~ee221/EE221-DB-7.pdf p14 Args: metadata: A dictionnary {String => list(Announces)} which maps platform name a list containing its corresponding Announces. Returns: The corresponding 3nf graph (DbGraph instance) """ # 1) Compute functional dependancies tables = [] map_method_capabilities = {} for platform, announces in metadata.items(): for announce in announces: tables.append(announce.table) map_method_capabilities[(platform, announce.table.get_name())] = announce.table.get_capabilities() fds = make_fd_set(tables) # 2) Find a minimal cover (fds_min_cover, fds_removed) = fd_minimal_cover(fds) # 3) Reinjecting fds removed during normalization reinject_fds(fds_min_cover, fds_removed) # 4) Grouping fds by method #OBOSOLETE| fdss = fds_min_cover.group_by_method() # Mando fdss = fds_min_cover.group_by_tablename_method() # Jordan # 5) Making 3-nf tables tables_3nf = list() #DEPRECATED|LOIC| map_tablename_methods = dict() # map table_name with methods to demux #DEPRECATED|LOIC| for table_name, map_platform_fds in fdss.items(): # For the potential parent table # Stores the number of distinct platforms set num_platforms = 0 # Stores the set of platforms all_platforms = set() common_fields = Fields() common_key_names = set() # Annotations needed for the query plan child_tables = list() for platform, fds in map_platform_fds.items(): platforms = set() fields = set() keys = Keys() # Annotations needed for the query plane map_method_keys = dict() map_method_fields = dict() for fd in fds: key = fd.get_determinant().get_key() keys.add(key) fields |= fd.get_fields() # We need to add fields from the key for key_field in key: fields.add(key_field) # XXX for field, methods in fd.get_map_field_methods().items(): for method in methods: # key annotation if not method in map_method_keys.keys(): map_method_keys[method] = set() map_method_keys[method].add(key) # field annotations if not method in map_method_fields.keys(): map_method_fields[method] = set() map_method_fields[method].add(field.get_name()) map_method_fields[method].add(key_field.get_name()) #DEPRECATED|LOIC| # demux annotation #DEPRECATED|LOIC| method_name = method.get_name() #DEPRECATED|LOIC| if method_name != table_name : #DEPRECATED|LOIC| if method_name not in map_tablename_methods.keys(): #DEPRECATED|LOIC| map_tablename_methods[method_name] = set() #DEPRECATED|LOIC| map_tablename_methods[method_name].add(method) #DEPRECATED|LOIC| platforms.add(method.get_platform()) table = Table(platforms, None, table_name, fields, keys) # inject field and key annotation in the Table object table.map_method_keys = map_method_keys table.map_method_fields = map_method_fields tables_3nf.append(table) child_tables.append(table) Log.debug("TABLE 3nf:", table, table.keys) #print " method fields", map_method_fields num_platforms += 1 all_platforms |= platforms if common_fields.is_empty(): common_fields = Fields(fields) else: common_fields &= Fields(fields) keys_names = frozenset([field.get_name() for field in key for key in keys]) common_key_names.add(keys_names) # Convert common_key_names into Keys() according to common_fields common_keys = set() map_name_fields = dict() for field in common_fields: map_name_fields[field.get_name()] = field for key_names in common_key_names: common_keys.add(Key(frozenset([map_name_fields[field_name] for field_name in key_names]))) # Several platforms provide the same object, so we've to build a parent table if num_platforms > 1: parent_table = Table(all_platforms, None, table_name, common_fields, common_keys) # Migrate common fields from children to parents, except keys parent_map_method_fields = dict() names_in_common_keys = key.get_field_names() for field in common_fields: methods = set() field_name = field.get_name() for child_table in child_tables: # Objective = remove the field from child table # Several methods can have it for _method, _fields in child_table.map_method_fields.items(): if field_name in _fields: methods.add(_method) if field_name not in names_in_common_keys: _fields.remove(field.get_name()) if field_name not in names_in_common_keys: child_table.erase_field(field_name) # Add the field with all methods to parent_table for method in methods: if not method in parent_map_method_fields: parent_map_method_fields[method] = set() parent_map_method_fields[method].add(field.get_name()) #MANDO|parent_map_method_fields[method].add(field.get_name()) # inject field and key annotation in the Table object #MANDO|DEPRECATED| parent_table.map_method_keys = dict() #map_common_method_keys parent_table.map_method_fields = parent_map_method_fields tables_3nf.append(parent_table) Log.debug("Parent table TABLE 3nf:", parent_table, table.get_keys()) #print " method fields", parent_map_method_fields # XXX we already know about the links between those two platforms # but we can find them easily (cf dbgraph) #DEPRECATED|LOIC| # inject demux annotation #DEPRECATED|LOIC| for table in tables_3nf: #DEPRECATED|LOIC| if table.get_name() in map_tablename_methods.keys(): #DEPRECATED|LOIC| table.methods_demux = map_tablename_methods[table.get_name()] #DEPRECATED|LOIC| else: #DEPRECATED|LOIC| table.methods_demux = set() # 6) Inject capabilities # TODO: capabilities are now in tables, shall they be present in tables_3nf # instead of relying on map_method_capabilities ? for table in tables_3nf: for announces in metadata.values(): for announce in announces: if announce.get_table().get_name() == table.get_name(): capabilities = table.get_capabilities() if capabilities.is_empty(): table.set_capability(announce.get_table().get_capabilities()) elif not capabilities == announce.get_table().get_capabilities(): Log.warning("Conflicting capabilities for tables %r (%r) and %r (%r)" % ( table, capabilities, announce.get_table(), announce.get_table().get_capabilities() )) # 7) Building DBgraph graph_3nf = DBGraph(tables_3nf, map_method_capabilities) for table in tables_3nf: Log.info("%s" % table) return graph_3nf
def prune_precedessor_map(metadata, queried_fields, map_vertex_pred): """ \brief Prune from a predecessor map (representing a tree) the entries that are not needed (~ remove from a tree useless nodes). \param metadata DBGraph instance \param queried_fields The fields that are queried by the user A node/table u is useful if one or both of those condition is satisfied: - u provides a field queried by the user - u is involved in a join required to answer to the query \param map_vertex_pred A dictionnary {Talbe => Table} which maps a vertex and its predecessor in the tree we're considering \return A tuple made of - predecessors A dictionnary {Table => Table} included in map_vertex_pred containing only the relevant arcs - relevant_keys A dictionnary {Table => set(Key)} which indicates for each 3nf Table which are its relevant Keys NOT USED ANYMORE - relevant_fields A dictionnary {Table => set(Field)} which indicates for each 3nf Table which are its relevant Fields """ # NOTE: The pruning step could be avoided if we integrated all these conditions into the DFS procedure g = metadata.graph # Helper function to manage a dictionary of sets def update_map(m, k, s): if k not in m.keys(): m[k] = set() m[k] |= s # Vertices in predecessors have been already examined in a previous iteration predecessor = dict() # A map that associates each table with the set of fields that it uniquely provides relevant_fields = dict() missing_fields = queried_fields # XXX In debug comments, we need to explain for each table, why it has been # kept or discarded succintly # Loop in arbitrary order through the 3nf tables for v, u in map_vertex_pred.items(): Log.debug("Considering %r -> [[ %r ]]" % (u, v)) # For each table, we determine the set of fields it provides that are # necessary to answer the query queried_fields_v = v.get_fields_with_name(queried_fields, metadata) # and those that are not present in the parent (foreign keys) queried_fields_u = u.get_fields_with_name(queried_fields, metadata) if u else set() queried_fields_v_unique = queried_fields_v - queried_fields_u # ??? missing_fields -= queried_fields_v # If v is not the root or does not provide relevant fields (= not found # in the parent), then we prune it by not including it in the # predecessor map we return. (We do not need a table if all fields can # be found in the parent.) if u and not queried_fields_v_unique: Log.debug(" [X] No interesting field") continue # Let's now consider all pairs of table (u -> v) up to the root, # focusing on table v # # All tables back to the root are necessary at least to be able to # retrieve v through successive joins (and we will thus need the keys # of intermediate tables). while True: # v has already been considered if v in predecessor.keys(): Log.debug(" [X] Already processed %r" % v) break # TABLE # # Don't discard table v by adding it to the predecessor map predecessor[v] = u # FIELDS # # Relevants fields for table v are those contributing to the query # Including fields that might be in the key is not important, since # they are all added later on. # # eg. queried_fields has slice_hrn, but resource has slice # relevant fields, hence queried_field_v should have slice # XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX print "queried_fields==", queried_fields queried_fields_v = v.get_fields_with_name(queried_fields, metadata) missing_fields -= set(map(lambda x: x.get_name(), queried_fields_v)) print "we got", queried_fields_v print "missing_fields becomes", missing_fields # resolve queried_fields_v = set( map(lambda x: x.get_name(), queried_fields_v)) update_map(relevant_fields, v, queried_fields_v) # KEYS # # Key fields are necessary to perform JOIN in at least one table (otherwise we would not have distinct 3nf tables) if u: # thus, we are not considering the root (no need for keys) # for u, select the first join (arbitrary) (Key or set(Field) instances depending on the arc label) key_u = metadata.get_relation(u, v).get_predicate().get_key() if isinstance(key_u, StringTypes): key_u = [key_u] key_u = set(key_u) # for v, arbitrarily choose the first key assuming it is used for the join key_v = v.get_keys().one().get_names() # Adding keys... update_map(relevant_fields, u, key_u) update_map(relevant_fields, v, key_v) # Queries fields do not necessarily include fields from the key, so add # them all the time, otherwise they will get pruned update_map(relevant_fields, v, key_v) Log.debug(" [V] Table %r, relevant_fields=%r" % (v, relevant_fields.get(v, None))) # Stopping conditions: if not u: # u = None : u is the root, no need to continue Log.debug("<<< reached root") break # Move to the previous arc u' -> v'=u -> v v = u u = map_vertex_pred[u] return (predecessor, relevant_fields, missing_fields)