def register(package): prefix = package.__name__ + "." for importer, modname, ispkg in pkgutil.iter_modules(package.__path__, prefix): try: module = __import__(modname, fromlist="dummy") except Exception, e: Log.info("Could not load %s : %s" % (modname, e))
def __init__(self, allowed_capabilities = None): """ Create an Interface instance. Args: platforms: A list of Platforms. allowed_capabilities: A Capabilities instance or None """ # Register the list of Gateways Log.info("Registering gateways") register_gateways() # self.platforms is list(dict) where each dict describes a platform. # See platform table in the Storage. self.platforms = Storage.execute(Query().get("platform").filter_by("disabled", "=", False)) #, format = "object") # self.allowed_capabilities is a Capabilities instance (or None) self.allowed_capabilities = allowed_capabilities # self.data is {String : list(Announce)} dictionnary mapping each # platform name (= namespace) with its corresponding Announces. self.metadata = dict() # self.gateways is a {String : Gateway} which maps a platform name to # the appropriate Gateway instance. self.gateways = dict() self.policy = Policy(self) self.boot()
def execute_query(self, namespace, query, annotations, is_deferred=False): if annotations: user = annotations.get('user', None) else: user = None # Code duplication with Interface() class if namespace is not None: allowed_platforms = [ p['platform'] for p in self.platforms if p['platform'] == namespace ] else: allowed_platforms = [p['platform'] for p in self.platforms] qp = QueryPlan() qp.build(query, self.g_3nf, allowed_platforms, self.allowed_capabilities, user) Log.tmp("QUERY PLAN") qp.dump() self.instanciate_gateways(qp, user) Log.info("QUERY PLAN:\n%s" % (qp.dump())) return self.execute_query_plan(namespace, query, annotations, qp, is_deferred)
def xmlrpc_forward(self, request, query, annotations=None): """ """ Log.info("Incoming XMLRPC request, query = %r, annotations = %r" % (self.display_query(query), annotations)) if Options().disable_auth: Log.info("Authentication disabled by configuration") else: if not annotations or not 'authentication' in annotations: msg = "You need to specify an authentication token in annotations" return dict(ResultValue.get_error(ResultValue.FORBIDDEN, msg)) # We expect to find an authentication token in the annotations if annotations: auth = annotations.get('authentication', None) else: auth = {} auth['request'] = request # Check login password try: user = Auth(auth, self.interface).check() except Exception, e: Log.warning( "XMLRPCAPI::xmlrpc_forward: Authentication failed...: %s" % str(e)) msg = "Authentication failed: %s" % e return dict(ResultValue.get_error(ResultValue.FORBIDDEN, msg))
def make_metadata(self): """ Prepare metadata (e.g. Tables encapsulated in Announces instances) related to this Gateway. Metadata are in the generic case retrieved both by inspecting the pgsql schema and the .h file related to this Gateway. """ # Import metadata from pgsql schema. # By default, we only fetch tables and we ignore views. ######## announces_pgsql = self.make_metadata_from_names(self.get_table_names()) if not announces_pgsql: Log.warning("Cannot find metadata for platform %s: %s" % (self.platform, e)) else: Log.info("Tables imported from pgsql schema: %s" % [announce.get_table() for announce in announces_pgsql]) ### """ class table { string comment; field fields[]; key keys[]; }; class field { string comment; bool is_const; bool is_array; string type; }; class key { table table; /**< BACKWARD_1N */ field fields[]; }; """ # 1) router::boot: # for each gateway: # if gateway.type == postgresql: # router.instantiate_gateway(gateway) # PostgreSQLGateway::__init__(): # self.router.instantiate_gateway(platform = postgresql_metadata, config = config) # 2) d = self.router.forward(Query.get('object').select([name, field.name, field.type, field.comment, key]) # see core/interface.py:180 # 3) announces_pgsql = Announce.from_dict(d) # In this gateway inject field info in table info ######### # Fetch metadata from .h files (if any) announces_h = Announces.from_dot_h(self.get_platform(), self.get_gateway_type()) Log.info("Tables imported from .h schema: %s" % [announce.get_table() for announce in announces_h]) # Return the resulting announces return self.merge_announces( announces_pgsql, announces_h) if announces_h else announces_pgsql
def get_gateway_config(self, gateway_name): """ Load a default hardcoded configuration. """ Log.info("Hardcoded CSV|PostgreSQL configuration") if gateway_name == "postgresql": config = { "db_user" : "postgres", "db_password" : None, "db_name" : "test"} elif gateway_name == "csv": config = {"filename" : "/tmp/test.csv"} else: config = {} return config
def child_callback(self, child_id, record): """ \brief Processes records received by the child node \param child_id identifier of the child that received the record \param record dictionary representing the received record """ if record.is_last(): # XXX SEND ALL self.status.completed(child_id) return key = self.key.get_field_names() # DISTINCT not implemented, just forward the record if not key: Log.critical("No key associated to UNION operator") self.send(record) return # Send records that have no key if not Record.has_fields(record, key): Log.info( "UNION::child_callback sent record without key '%(key)s': %(record)r", **locals()) self.send(record) return key_value = Record.get_value(record, key) if key_value in self.key_map: Log.debug("UNION::child_callback merged duplicate records: %r" % record) prev_record = self.key_map[key_value] for k, v in record.items(): if not k in prev_record: prev_record[k] = v continue if isinstance(v, list): if not prev_record[k]: prev_record[k] = list( ) # with failures it can occur that this is None prev_record[k].extend(v) # DUPLICATES ? #else: # if not v == previous[k]: # print "W: ignored conflictual field" # # else: nothing to do else: self.key_map[key_value] = record
def parse_dot_h(iterable, filename=None): """ Import information stored in a .h file (see manifold/metadata/*.h) Args: iterable: The file descriptor of a successfully opened file. You may also pass iter(string) if the content of the .h is stored in "string" filename: The corresponding filename. It is only used to print user friendly message, so you may pass None. Returns: A tuple made of two dictionnaries (tables, enums) tables: - key: String (the name of the class) - data: the corresponding Table instance enums: - key: String (the name of the enum) - data: the corresponding MetadataEnum instance Raises: ValueError: if the input data is not well-formed. """ # Parse file table_name = None cur_enum_name = None tables = {} enums = {} no_line = -1 for line in iterable: line = line.rstrip("\r\n") is_valid = True error_message = None no_line += 1 if REGEXP_EMPTY_LINE.match(line): continue if line[0] == '#': continue if table_name: # current scope = class # local const MyType my_field[]; /**< Comment */ m = REGEXP_CLASS_FIELD.match(line) if m: qualifiers = list() if m.group(2): qualifiers.append("local") if m.group(3): qualifiers.append("const") tables[table_name].insert_field( Field(qualifiers=qualifiers, type=m.group(4), name=m.group(5), is_array=(m.group(6) != None), description=m.group(7).lstrip("/*< ").rstrip("*/ "))) continue # KEY(my_field1, my_field2); m = REGEXP_CLASS_KEY.match(line) if m: key = m.group(1).split(',') key = [key_elt.strip() for key_elt in key] tables[table_name].insert_key(key) # XXX #if key not in tables[table_name].keys: # tables[table_name].keys.append(key) continue # CAPABILITY(my_field1, my_field2); m = REGEXP_CLASS_CAP.match(line) if m: capability = map(lambda x: x.strip(), m.group(1).split(',')) tables[table_name].set_capability(capability) continue # PARTITIONBY(clause_string); m = REGEXP_CLASS_CLAUSE.match(line) if m: clause_string = m.group(1) clause = Clause(clause_string) tables[table_name].partitions.append(clause) continue # }; if REGEXP_CLASS_END.match(line): cur_class = tables[table_name] if not cur_class.keys: # we must add a implicit key key_name = "%s_id" % table_name if key_name in cur_class.get_field_names(): Log.error( "Trying to add implicit key %s which is already in use" % key_name) Log.info("Adding implicit key %s in %s" % (key_name, table_name)) dummy_key_field = Field(["const"], "unsigned", key_name, False, "Dummy key") cur_class.insert_field(dummy_key_field) cur_class.insert_key(Key([dummy_key_field])) table_name = None continue # Invalid line is_valid = False error_message = "In '%s', line %r: in table '%s': invalid line: [%r] %s" % ( filename, no_line, table_name, line, ''.join( [PATTERN_BEGIN, PATTERN_CLASS_FIELD, PATTERN_END])) elif cur_enum_name: # current scope = enum # "my string value", m = REGEXP_ENUM_FIELD.match(line) if m: value = m.group(1) continue # }; if REGEXP_CLASS_END.match(line): cur_enum_name = None continue # Invalid line is_valid = False error_message = "In '%s', line %r: in enum '%s': invalid line: [%r]" % ( filename, no_line, cur_enum_name, line) else: # no current scope # class MyClass { m = REGEXP_CLASS_BEGIN.match(line) if m: qualifier = m.group(1) table_name = m.group(2) tables[table_name] = Table(None, None, table_name, None, Keys()) # qualifier ?? continue # enum MyEnum { m = REGEXP_ENUM_BEGIN.match(line) if m: cur_enum_name = m.group(1) enums[cur_enum_name] = MetadataEnum(cur_enum_name) continue # Invalid line is_valid = False error_message = "In '%s', line %r: class declaration expected: [%r]" if is_valid == False: if not error_message: error_message = "Invalid input file %s, line %r: [%r]" % ( filename, no_line, line) Log.error(error_message) raise ValueError(error_message) return (tables, enums)
def main(self): """ \brief Runs a XMLRPC server """ Log.info("XMLRPC server daemon (%s) started." % sys.argv[0]) # NOTE it is important to import those files only after daemonization, # since they open files we cannot easily preserve from twisted.web import xmlrpc, server # SSL support from OpenSSL import SSL from twisted.internet import ssl #, reactor #from twisted.internet.protocol import Factory, Protocol #from twisted.internet import reactor # This also imports manifold.util.reactor_thread that uses reactor from manifold.core.router import Router assert not (Options().platform and Options().gateway), "Both gateway and platform cannot be specified at commandline" # This imports twisted code so we need to import it locally from manifold.core.xmlrpc_api import XMLRPCAPI # This should be configurable allowed_capabilities = Capabilities() allowed_capabilities.selection = True allowed_capabilities.projection = True # XXX We should harmonize interfaces between Router and Forwarder if Options().platform: platforms = Storage.execute(Query().get('platform'), format='object') # We pass a single platform to Forwarder platform = [p for p in platforms if p.name == Options().platform][0] self.interface = Forwarder(platform, allowed_capabilities) elif Options().gateway: # XXX user # XXX Change Forwarded initializer #DEPRECATED| platform = Platform(u'dummy', Options().gateway, self.get_gateway_config(Options().gateway), 'user') platform = Platform( platform = u'dummy', gateway_type = Options().gateway, config = self.get_gateway_config(Options().gateway), auth_type = 'user' ) self.interface = Forwarder(platform, allowed_capabilities) else: self.interface = Router() try: def verifyCallback(connection, x509, errnum, errdepth, ok): if not ok: print 'invalid cert from subject:', x509.get_subject() print errnum, errdepth return False else: print "Certs are fine", x509, x509.get_subject() return True ssl_path = Options().ssl_path if not ssl_path or not os.path.exists(ssl_path): print "" print "You need to generate SSL keys and certificate in '%s' to be able to run manifold" % ssl_path print "" print "mkdir -p /etc/manifold/keys" print "openssl genrsa 1024 > /etc/manifold/keys/server.key" print "chmod 400 /etc/manifold/keys/server.key" print "openssl req -new -x509 -nodes -sha1 -days 365 -key /etc/manifold/keys/server.key > /etc/manifold/keys/server.cert" print "" sys.exit(0) server_key_file = "%s/server.key" % ssl_path server_crt_file = "%s/server.cert" % ssl_path Log.tmp("key, cert=", server_key_file, server_crt_file) myContextFactory = ssl.DefaultOpenSSLContextFactory(server_key_file, server_crt_file) ctx = myContextFactory.getContext() ctx.set_verify( SSL.VERIFY_PEER, # | SSL.VERIFY_FAIL_IF_NO_PEER_CERT, verifyCallback ) # Since we have self-signed certs we have to explicitly # tell the server to trust them. #ctx.load_verify_locations("keys/ca.pem") trusted_roots_path = Options().trusted_roots_path if not trusted_roots_path or not os.path.exists(trusted_roots_path): Log.warning("No trusted root found in %s. You won't be able to login using SSL client certificates" % trusted_roots_path) ctx.load_verify_locations(None, ssl_path) #ReactorThread().listenTCP(Options().xmlrpc_port, server.Site(XMLRPCAPI(self.interface, allowNone=True))) ReactorThread().listenSSL(Options().xmlrpc_port, server.Site(XMLRPCAPI(self.interface, allowNone=True)), myContextFactory) ReactorThread().start_reactor() except Exception, e: # TODO If database gets disconnected, we can sleep/attempt reconnection Log.error("Error in XMLRPC API: %s" % str(e))
def forward(self, query, annotations = None, is_deferred = False, execute = True): """ Forwards an incoming Query to the appropriate Gateways managed by this Router. Args: query: The user's Query. is_deferred: (bool) execute: Set to true if the QueryPlan must be executed. user: The user issuing the Query. Returns: A ResultValue in case of success. None in case of failure. """ Log.info("Incoming query: %r" % query) user = annotations['user'] if annotations and 'user' in annotations else None # Implements common functionalities = local queries, etc. namespace = None # Handling internal queries if ":" in query.get_from(): namespace, table_name = query.get_from().rsplit(":", 2) if namespace == self.LOCAL_NAMESPACE: if table_name in ['object', 'gateway']: if table_name == 'object': output = self.get_metadata_objects() elif table_name == "gateway": output = [{'name': name} for name in Gateway.list().keys()] qp = QueryPlan() qp.ast.from_table(query, output, key=None).selection(query.get_where()).projection(query.get_select()) return self.execute_query_plan(namespace, query, annotations, qp, is_deferred) else: q = query.copy() q.object = table_name records = Storage.execute(q, user = user) return self.send(query, records, annotations, is_deferred) elif namespace: platform_names = self.metadata.keys() if namespace not in platform_names: raise ValueError("Unsupported namespace '%s' (valid values are: %s and local)" % (namespace, ", ".join(self.metadata.keys()))) if table_name == "object": # Prepare 'output' which will contains announces transposed as a list # of dictionnaries. output = list() announces = self.metadata[namespace] for announce in announces: output.append(announce.get_table().to_dict()) qp = QueryPlan() qp.ast.from_table(query, output, key = None).selection(query.get_where()).projection(query.get_select()) return self.execute_query_plan(namespace, query, annotations, qp, is_deferred) #output = ResultValue.get_success(output) #if not d: # return output #else: # d.callback(output) # return d # In fact we would need a simple query plan here instead # Source = list of dict # Result = a list or a deferred # None is returned to inform child classes they are in charge of the answer return None
def forward(self, query, annotations=None, is_deferred=False, execute=True): """ Forwards an incoming Query to the appropriate Gateways managed by this Router. Args: query: The user's Query. is_deferred: (bool) execute: Set to true if the QueryPlan must be executed. user: The user issuing the Query. Returns: A ResultValue in case of success. None in case of failure. """ Log.info("Router::forward: %s" % query) # TMP CACHE DEBUG #import pdb #pdb.set_trace() # INVALIDATE CACHE - should be per object? if query.get_action() != 'get': self.delete_cache(annotations) user = annotations[ 'user'] if annotations and 'user' in annotations else None if annotations is None: annotations = {} ret = super(Router, self).forward(query, annotations, is_deferred, execute) if ret: # Note: we do not run hooks at the moment for local queries return ret # Previously, cache etc had nothing to do. We now enforce policy, and # eventually this will give us a new query plan query_plan = None # Enforcing policy # XXX This issues after a check (in interface) that we are not # considering local or object tables... and we need to do the proper # check when records are received, otherwise the query is absent from # the cache. This is sources of errors, and should be fixed. # # Possible results and related actions: # - ACCEPT : the query passes and will trigger a new query plan # - CACHED : ill named, the cache is taking care of everything, no new query plan to do # This will handle multiple scenarios such as buffered (a mixed of # cached and real time records), and multicast (real time records). # - DENIED # - ERROR (decision, data) = self.policy.filter(query, None, annotations) # TMP CACHE DEBUG #pdb.set_trace() if decision == Policy.ACCEPT: pass elif decision == Policy.REWRITE: _query, _annotations = data if _query: query = _query if _annotations: annotations = _annotations elif decision == Policy.CACHE_HIT: query_plan = data #return self.send(query, data, annotations, is_deferred) elif decision in [Policy.DENIED, Policy.ERROR]: if decision == Policy.DENIED: data = ResultValue.get_error(ResultValue.FORBIDDEN) return self.send_result_value(query, data, annotations, is_deferred) else: raise Exception, "Unknown QUERY decision from policy engine: %s" % Policy.map_decision[ decision] # TMP CACHE DEBUG #pdb.set_trace() # We suppose we have no namespace from here if not execute: if not query_plan: query_plan = QueryPlan() query_plan.build(query, self.g_3nf, allowed_platforms, self.allowed_capabilities, user) Log.info(query_plan.dump()) # Note: no hook either for queries that are not executed return ResultValue.get_success(None) # The query plan will be the same whatever the action: it represents # the easier way to reach the destination = routing # We do not need the full query for the query plan, in fact just the # destination, which is a subpart of the query = (fact, filters, fields) # action = what to do on this QP # ts = how it behaves # XXX disabled #if query.get_action() == "update": # # At the moment we can only update if the primary key is present # keys = self.metadata_get_keys(query.get_from()) # if not keys: # raise Exception, "Missing metadata for table %s" % query.get_from() # key_fields = keys.one().get_minimal_names() # # # XXX THIS SHOULD BE ABLE TO ACCEPT TUPLES # #if not query.filters.has_eq(key): # # raise Exception, "The key field(s) '%r' must be present in update request" % key # Execute query plan # the deferred object is sent to execute function of the query_plan # This might be a deferred, we cannot put any hook here... try: # Namespace and table are splited if prefix is used in the query # namespace is passed as a parameter # Then namespace and table are back together in process_qp_results # in order to send the results in the right Cache entry if ":" in query.get_from(): namespace, table_name = query.get_from().rsplit(":", 2) query.object = table_name else: namespace = None if query_plan: return self.execute_query_plan(namespace, query, annotations, query_plan, is_deferred, policy=False) else: return self.execute_query(namespace, query, annotations, is_deferred) except Exception, e: return ResultValue.get_error(e, traceback.format_exc())
def to_3nf(metadata): """ Compute a 3nf schema See also http://elm.eeng.dcu.ie/~ee221/EE221-DB-7.pdf p14 Args: metadata: A dictionnary {String => list(Announces)} which maps platform name a list containing its corresponding Announces. Returns: The corresponding 3nf graph (DbGraph instance) """ # 1) Compute functional dependancies tables = [] map_method_capabilities = {} for platform, announces in metadata.items(): for announce in announces: tables.append(announce.table) map_method_capabilities[(platform, announce.table.get_name())] = announce.table.get_capabilities() fds = make_fd_set(tables) # 2) Find a minimal cover (fds_min_cover, fds_removed) = fd_minimal_cover(fds) # 3) Reinjecting fds removed during normalization reinject_fds(fds_min_cover, fds_removed) # 4) Grouping fds by method #OBOSOLETE| fdss = fds_min_cover.group_by_method() # Mando fdss = fds_min_cover.group_by_tablename_method() # Jordan # 5) Making 3-nf tables tables_3nf = list() #DEPRECATED|LOIC| map_tablename_methods = dict() # map table_name with methods to demux #DEPRECATED|LOIC| for table_name, map_platform_fds in fdss.items(): # For the potential parent table # Stores the number of distinct platforms set num_platforms = 0 # Stores the set of platforms all_platforms = set() common_fields = Fields() common_key_names = set() # Annotations needed for the query plan child_tables = list() for platform, fds in map_platform_fds.items(): platforms = set() fields = set() keys = Keys() # Annotations needed for the query plane map_method_keys = dict() map_method_fields = dict() for fd in fds: key = fd.get_determinant().get_key() keys.add(key) fields |= fd.get_fields() # We need to add fields from the key for key_field in key: fields.add(key_field) # XXX for field, methods in fd.get_map_field_methods().items(): for method in methods: # key annotation if not method in map_method_keys.keys(): map_method_keys[method] = set() map_method_keys[method].add(key) # field annotations if not method in map_method_fields.keys(): map_method_fields[method] = set() map_method_fields[method].add(field.get_name()) map_method_fields[method].add(key_field.get_name()) #DEPRECATED|LOIC| # demux annotation #DEPRECATED|LOIC| method_name = method.get_name() #DEPRECATED|LOIC| if method_name != table_name : #DEPRECATED|LOIC| if method_name not in map_tablename_methods.keys(): #DEPRECATED|LOIC| map_tablename_methods[method_name] = set() #DEPRECATED|LOIC| map_tablename_methods[method_name].add(method) #DEPRECATED|LOIC| platforms.add(method.get_platform()) table = Table(platforms, None, table_name, fields, keys) # inject field and key annotation in the Table object table.map_method_keys = map_method_keys table.map_method_fields = map_method_fields tables_3nf.append(table) child_tables.append(table) Log.debug("TABLE 3nf:", table, table.keys) #print " method fields", map_method_fields num_platforms += 1 all_platforms |= platforms if common_fields.is_empty(): common_fields = Fields(fields) else: common_fields &= Fields(fields) keys_names = frozenset([field.get_name() for field in key for key in keys]) common_key_names.add(keys_names) # Convert common_key_names into Keys() according to common_fields common_keys = set() map_name_fields = dict() for field in common_fields: map_name_fields[field.get_name()] = field for key_names in common_key_names: common_keys.add(Key(frozenset([map_name_fields[field_name] for field_name in key_names]))) # Several platforms provide the same object, so we've to build a parent table if num_platforms > 1: parent_table = Table(all_platforms, None, table_name, common_fields, common_keys) # Migrate common fields from children to parents, except keys parent_map_method_fields = dict() names_in_common_keys = key.get_field_names() for field in common_fields: methods = set() field_name = field.get_name() for child_table in child_tables: # Objective = remove the field from child table # Several methods can have it for _method, _fields in child_table.map_method_fields.items(): if field_name in _fields: methods.add(_method) if field_name not in names_in_common_keys: _fields.remove(field.get_name()) if field_name not in names_in_common_keys: child_table.erase_field(field_name) # Add the field with all methods to parent_table for method in methods: if not method in parent_map_method_fields: parent_map_method_fields[method] = set() parent_map_method_fields[method].add(field.get_name()) #MANDO|parent_map_method_fields[method].add(field.get_name()) # inject field and key annotation in the Table object #MANDO|DEPRECATED| parent_table.map_method_keys = dict() #map_common_method_keys parent_table.map_method_fields = parent_map_method_fields tables_3nf.append(parent_table) Log.debug("Parent table TABLE 3nf:", parent_table, table.get_keys()) #print " method fields", parent_map_method_fields # XXX we already know about the links between those two platforms # but we can find them easily (cf dbgraph) #DEPRECATED|LOIC| # inject demux annotation #DEPRECATED|LOIC| for table in tables_3nf: #DEPRECATED|LOIC| if table.get_name() in map_tablename_methods.keys(): #DEPRECATED|LOIC| table.methods_demux = map_tablename_methods[table.get_name()] #DEPRECATED|LOIC| else: #DEPRECATED|LOIC| table.methods_demux = set() # 6) Inject capabilities # TODO: capabilities are now in tables, shall they be present in tables_3nf # instead of relying on map_method_capabilities ? for table in tables_3nf: for announces in metadata.values(): for announce in announces: if announce.get_table().get_name() == table.get_name(): capabilities = table.get_capabilities() if capabilities.is_empty(): table.set_capability(announce.get_table().get_capabilities()) elif not capabilities == announce.get_table().get_capabilities(): Log.warning("Conflicting capabilities for tables %r (%r) and %r (%r)" % ( table, capabilities, announce.get_table(), announce.get_table().get_capabilities() )) # 7) Building DBgraph graph_3nf = DBGraph(tables_3nf, map_method_capabilities) for table in tables_3nf: Log.info("%s" % table) return graph_3nf