def __init__(self, connection, stmtcachesize=1000, paramstyle=None): self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s self.__underlyingmodule = None # will be updated next self.getunderlyingmodule() # updates self.__underlyingmodule if paramstyle is None: paramstyle = self.__underlyingmodule.paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None # Thread-stuff self.__cursor = connection.cursor() self.__queue = Queue(5000) t = Thread(target=self.__worker) t.daemon = True t.start()
def __init__(self, jdbcconn, stmtcachesize=20): """Create a ConnectionWrapper around the given JDBC connection. If no default ConnectionWrapper already exists, the new ConnectionWrapper is set to be the default ConnectionWrapper. Arguments: - jdbcconn: An open JDBC Connection (not a PEP249 Connection) - stmtcachesize: The maximum number of PreparedStatements kept open. Default: 20. """ if not isinstance(jdbcconn, jdbc.Connection): raise TypeError('1st argument must implement java.sql.Connection') if jdbcconn.isClosed(): raise ValueError('1st argument must be an open Connection') self.__jdbcconn = jdbcconn # Add a finalizer to __prepstmts to close PreparedStatements when # they are pushed out self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close()) self.__resultmeta = FIFODict(stmtcachesize) self.__resultset = None self.__resultnames = None self.__resulttypes = None self.nametranslator = lambda s: s self.__jdbcconn.setAutoCommit(False) if pygrametl._defaulttargetconnection is None: pygrametl._defaulttargetconnection = self
def __init__(self, connection, stmtcachesize=1000, paramstyle=None): """Create a ConnectionWrapper around the given PEP 249 connection If no default ConnectionWrapper already exists, the new ConnectionWrapper is set as the default. Arguments: - connection: An open PEP 249 connection to the database - stmtcachesize: A number deciding how many translated statements to cache. A statement needs to be translated when the connection does not use 'pyformat' to specify parameters. When 'pyformat' is used, stmtcachesize is ignored as no statements need to be translated. - paramstyle: A string holding the name of the PEP 249 connection's paramstyle. If None, pygrametl will try to find the paramstyle automatically (an AttributeError can be raised if that fails). """ self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s if paramstyle is None: try: paramstyle = \ modules[self.__connection.__class__.__module__].paramstyle except AttributeError: # Note: This is probably a better way to do this, but to avoid # to break anything that worked before this fix, we only do it # this way if the first approach didn't work try: paramstyle = \ modules[self.__connection.__class__.__module__. split('.')[0]].paramstyle except AttributeError: # To support, e.g., mysql.connector connections paramstyle = \ modules[self.__connection.__class__.__module__. rsplit('.', 1)[0]].paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None global _defaulttargetconnection if _defaulttargetconnection is None: _defaulttargetconnection = self
def __init__(self, connection, stmtcachesize=1000, paramstyle=None, \ copyintonew=False): """Create a ConnectionWrapper around the given PEP 249 connection If no default ConnectionWrapper already exists, the new ConnectionWrapper is set as the default. Arguments: - connection: An open PEP 249 connection to the database - stmtcachesize: A number deciding how many translated statements to cache. A statement needs to be translated when the connection does not use 'pyformat' to specify parameters. When 'pyformat' is used and copyintonew == False, stmtcachesize is ignored as no statements need to be translated. - paramstyle: A string holding the name of the PEP 249 connection's paramstyle. If None, pygrametl will try to find the paramstyle automatically (an AttributeError can be raised if that fails). - copyintonew: A boolean deciding if a new mapping only holding the needed arguments should be created when a statement is executed. Some drivers require this. """ self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s self.__underlyingmodule = None # will be updated next self.getunderlyingmodule() # updates self.__underlyingmodule if paramstyle is None: paramstyle = self.__underlyingmodule.paramstyle if copyintonew or not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: # Since paramstyle == 'pyformat' and copyintonew == False, # no translations are needed self.__translate = None self.__paramstyle = paramstyle self.__copyintonew = copyintonew global _defaulttargetconnection if _defaulttargetconnection is None: _defaulttargetconnection = self
def __init__(self, connection, stmtcachesize=1000, paramstyle=None): self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s if paramstyle is None: try: paramstyle = modules[self.__connection.__class__.__module__].paramstyle except AttributeError: # Note: This is probably a better way to do this, but to avoid # to break anything that worked before this fix, we only do it # this way if the first approach didn't work try: paramstyle = modules[self.__connection.__class__.__module__.split(".")[0]].paramstyle except AttributeError: # To support, e.g., mysql.connector connections paramstyle = modules[self.__connection.__class__.__module__.rsplit(".", 1)[0]].paramstyle if not paramstyle == "pyformat": self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, "_translate2" + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None # Thread-stuff self.__cursor = connection.cursor() self.__queue = Queue(5000) t = Thread(target=self.__worker) t.daemon = True t.start()
def __init__(self, jdbcconn, stmtcachesize=20): """Create a ConnectionWrapper around the given JDBC connection """ self.__jdbcconn = jdbcconn # Add a finalizer to __prepstmts to close PreparedStatements when # they are pushed out self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close()) self.__resultmeta = FIFODict(stmtcachesize) self.__resultset = None self.__resultnames = None self.__resulttypes = None self.nametranslator = lambda s: s self.__jdbcconn.setAutoCommit(False) self.__queue = Queue(5000) t = Thread(target=self.__worker) t.setDaemon(True) # NB: "t.daemon = True" does NOT work... t.start()
def __init__(self, jdbcconn, stmtcachesize=20): """Create a ConnectionWrapper around the given JDBC connection. If no default ConnectionWrapper already exists, the new ConnectionWrapper is set to be the default ConnectionWrapper. """ self.__jdbcconn = jdbcconn # Add a finalizer to __prepstmts to close PreparedStatements when # they are pushed out self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close()) self.__resultmeta = FIFODict(stmtcachesize) self.__resultset = None self.__resultnames = None self.__resulttypes = None self.nametranslator = lambda s: s self.__jdbcconn.setAutoCommit(False) if pygrametl._defaulttargetconnection is None: pygrametl._defaulttargetconnection = self
def __init__(self, connection, stmtcachesize=1000, paramstyle=None): self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s if paramstyle is None: try: paramstyle = \ modules[self.__connection.__class__.__module__].paramstyle except AttributeError: # Note: This is probably a better way to do this, but to avoid # to break anything that worked before this fix, we only do it # this way if the first approach didn't work try: paramstyle = \ modules[self.__connection.__class__.__module__. split('.')[0]].paramstyle except AttributeError: # To support, e.g., mysql.connector connections paramstyle = \ modules[self.__connection.__class__.__module__. rsplit('.', 1)[0]].paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None # Thread-stuff self.__cursor = connection.cursor() self.__queue = Queue(5000) t = Thread(target=self.__worker) t.daemon = True t.start()
def __init__(self, jdbcconn, stmtcachesize=20): """Create a ConnectionWrapper around the given JDBC connection Arguments: - jdbcconn: An open JDBC Connection (not a PEP249 Connection) - stmtcachesize: The maximum number of PreparedStatements kept open. Default: 20. """ self.__jdbcconn = jdbcconn # Add a finalizer to __prepstmts to close PreparedStatements when # they are pushed out self.__prepstmts = FIFODict(stmtcachesize, lambda k, v: v[0].close()) self.__resultmeta = FIFODict(stmtcachesize) self.__resultset = None self.__resultnames = None self.__resulttypes = None self.nametranslator = lambda s: s self.__jdbcconn.setAutoCommit(False) self.__queue = Queue(5000) t = Thread(target=self.__worker) t.setDaemon(True) # NB: "t.daemon = True" does NOT work... t.setName('BackgroundJDBCConnectionWrapper') t.start()
def __init__(self, connection, stmtcachesize=1000, paramstyle=None): """Create a ConnectionWrapper around the given PEP 249 connection If no default ConnectionWrapper already exists, the new ConnectionWrapper is set as the default. Arguments: - connection: An open PEP 249 connection to the database - stmtcachesize: A number deciding how many translated statements to cache. A statement needs to be translated when the connection does not use 'pyformat' to specify parameters. When 'pyformat' is used, stmtcachesize is ignored as no statements need to be translated. - paramstyle: A string holding the name of the PEP 249 connection's paramstyle. If None, pygrametl will try to find the paramstyle automatically (an AttributeError can be raised if that fails). """ self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s self.__underlyingmodule = None # will be updated next self.getunderlyingmodule() # updates self.__underlyingmodule if paramstyle is None: paramstyle = self.__underlyingmodule.paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None global _defaulttargetconnection if _defaulttargetconnection is None: _defaulttargetconnection = self
class BackgroundConnectionWrapper(object): """An alternative implementation of the ConnectionWrapper for experiments. This implementation communicates with the database by using a separate thread. It is likely better to use ConnectionWrapper og a shared ConnectionWrapper (see pygrametl.parallel). This class offers the same methods as ConnectionWrapper. The documentation is not repeated here. """ _SINGLE = 1 _MANY = 2 # Most of this class' code was just copied from ConnectionWrapper # as we just want to do experiments with this class. def __init__(self, connection, stmtcachesize=1000, paramstyle=None): self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s if paramstyle is None: try: paramstyle = \ modules[self.__connection.__class__.__module__].paramstyle except AttributeError: # Note: This is probably a better way to do this, but to avoid # to break anything that worked before this fix, we only do it # this way if the first approach didn't work try: paramstyle = \ modules[self.__connection.__class__.__module__. split('.')[0]].paramstyle except AttributeError: # To support, e.g., mysql.connector connections paramstyle = \ modules[self.__connection.__class__.__module__. rsplit('.', 1)[0]].paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None # Thread-stuff self.__cursor = connection.cursor() self.__queue = Queue(5000) t = Thread(target=self.__worker) t.daemon = True t.start() def execute(self, stmt, arguments=None, namemapping=None, translate=True): if namemapping and arguments: arguments = copy(arguments, **namemapping) if self.__translate and translate: (stmt, arguments) = self.__translate(stmt, arguments) self.__queue.put((self._SINGLE, self.__cursor, stmt, arguments)) def executemany(self, stmt, params, translate=True): if self.__translate and translate: # Idea: Translate the statement for the first parameter set. Then # reuse the statement (but create new attribute sequences if # needed) for the remaining paramter sets newstmt = self.__translate(stmt, params[0])[0] if isinstance(self.__translations[stmt], str): # The paramstyle is 'named' in this case and we don't have to # put parameters into sequences self.__queue.put((self._MANY, self.__cursor, newstmt, params)) else: # We need to extract attributes and put them into sequences # The attributes to extract names = self.__translations[stmt][1] newparams = [[p[n] for n in names] for p in params] self.__queue.put( (self._MANY, self.__cursor, newstmt, newparams)) else: # for pyformat when no translation is necessary self.__queue.put((self._MANY, self.__cursor, stmt, params)) def _translate2named(self, stmt, row=None): # Translate %(name)s to :name. No need to change row. # Cache only the translated SQL. res = self.__translations.get(stmt, None) if res: return (res, row) res = stmt while True: start = res.find('%(') if start == -1: break end = res.find(')s', start) name = res[start + 2:end] res = res.replace(res[start:end + 2], ':' + name) self.__translations[stmt] = res return (res, row) def _translate2qmark(self, stmt, row=None): # Translate %(name)s to ? and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) name = newstmt[start + 2:end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], '?', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2numeric(self, stmt, row=None): # Translate %(name)s to 1,2,... and build a list of attributes to # extract from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] cnt = 0 newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) name = newstmt[start + 2:end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt)) cnt += 1 self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2format(self, stmt, row=None): # Translate %(name)s to %s and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) name = newstmt[start + 2:end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], '%s', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def rowfactory(self, names=None): self.__queue.join() rows = self.__cursor self.__cursor = self.__connection.cursor() if names is None: if rows.description is None: # no query was executed ... return (nothing for nothing in []) # a generator with no rows else: names = [self.nametranslator(t[0]) for t in rows.description] return rowfactory(rows, names, True) def fetchone(self, names=None): self.__queue.join() if self.__cursor.description is None: return {} if names is None: names = [ self.nametranslator(t[0]) for t in self.__cursor.description ] values = self.__cursor.fetchone() if values is None: # A row with each att = None return dict([(n, None) for n in names]) else: return dict(zip(names, values)) def fetchonetuple(self): self.__queue.join() if self.__cursor.description is None: return () values = self.__cursor.fetchone() if values is None: return (None, ) * len(self.__cursor.description) else: return values def fetchmanytuples(self, cnt): self.__queue.join() if self.__cursor.description is None: return [] return self.__cursor.fetchmany(cnt) def fetchalltuples(self): self.__queue.join() if self.__cursor.description is not None: while True: results = self.__cursor.fetchmany(200) if not results: break for row in results: yield row def rowcount(self): self.__queue.join() return self.__cursor.rowcount def getunderlyingmodule(self): # No need to join the queue here return modules[self.__connection.__class__.__module__] def commit(self): endload() self.__queue.join() self.__connection.commit() def close(self): self.__queue.join() self.__connection.close() def rollback(self): self.__queue.join() self.__connection.rollback() def setasdefault(self): global _defaulttargetconnection _defaulttargetconnection = self def cursor(self): self.__queue.join() return self.__connection.cursor() def resultnames(self): self.__queue.join() if self.__cursor.description is None: return None else: return tuple([t[0] for t in self.__cursor.description]) def __getstate__(self): # In case the ConnectionWrapper is pickled (to be sent to another # process), we need to create a new cursor when it is unpickled. res = self.__dict__.copy() del res['_ConnectionWrapper__cursor'] # a dirty trick, but... def __setstate__(self, dictdata): self.__dict__.update(dictdata) self.__cursor = self.__connection.cursor() def __worker(self): while True: (op, curs, stmt, args) = self.__queue.get() if op == self._SINGLE: curs.execute(stmt, args) elif op == self._MANY: curs.executemany(stmt, args) self.__queue.task_done()
class ConnectionWrapper(object): """Provide a uniform representation of different database connection types. All Dimensions and FactTables communicate with the data warehouse using a ConnectionWrapper. In this way, the code for loading the DW does not have to care about which parameter format is used. pygrametl's code uses the 'pyformat' but the ConnectionWrapper performs translations of the SQL to use 'named', 'qmark', 'format', or 'numeric' if the user's database connection needs this. Note that the translations are simple and naive. Escaping as in %%(name)s is not taken into consideration. These simple translations are enough for pygrametl's code which is the important thing here; we're not trying to make a generic, all-purpose tool to get rid of the problems with different parameter formats. It is, however, possible to disable the translation of a statement to execute such that 'problematic' statements can be executed anyway. """ def __init__(self, connection, stmtcachesize=1000, paramstyle=None, \ copyintonew=False): """Create a ConnectionWrapper around the given PEP 249 connection If no default ConnectionWrapper already exists, the new ConnectionWrapper is set as the default. Arguments: - connection: An open PEP 249 connection to the database - stmtcachesize: A number deciding how many translated statements to cache. A statement needs to be translated when the connection does not use 'pyformat' to specify parameters. When 'pyformat' is used and copyintonew == False, stmtcachesize is ignored as no statements need to be translated. - paramstyle: A string holding the name of the PEP 249 connection's paramstyle. If None, pygrametl will try to find the paramstyle automatically (an AttributeError can be raised if that fails). - copyintonew: A boolean deciding if a new mapping only holding the needed arguments should be created when a statement is executed. Some drivers require this. """ self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s self.__underlyingmodule = None # will be updated next self.getunderlyingmodule() # updates self.__underlyingmodule if paramstyle is None: paramstyle = self.__underlyingmodule.paramstyle if copyintonew or not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: # Since paramstyle == 'pyformat' and copyintonew == False, # no translations are needed self.__translate = None self.__paramstyle = paramstyle self.__copyintonew = copyintonew global _defaulttargetconnection if _defaulttargetconnection is None: _defaulttargetconnection = self def execute(self, stmt, arguments=None, namemapping=None, translate=True): """Execute a statement. Arguments: - stmt: the statement to execute - arguments: a mapping with the arguments (default: None) - namemapping: a mapping of names such that if stmt uses %(arg)s and namemapping[arg]=arg2, the value arguments[arg2] is used instead of arguments[arg] - translate: decides if translation from 'pyformat' to the undlying connection's format should take place. Default: True """ if namemapping and arguments: arguments = copy(arguments, **namemapping) if self.__translate and translate: (stmt, arguments) = self.__translate(stmt, arguments) if arguments is None: # Some drivers don't accept None for 'arguments' self.__cursor.execute(stmt) else: self.__cursor.execute(stmt, arguments) def executemany(self, stmt, params, translate=True): """Execute a sequence of statements.""" if self.__translate and translate: # Idea: Translate the statement for the first parameter set. Then # reuse the statement (but create new attribute sequences if # needed) for the remaining parameter sets (newstmt, _) = self.__translate(stmt, params[0]) names = self.__translations[stmt][1] if self.__paramstyle == 'pyformat' or self.__paramstyle == 'named': if self.__copyintonew: # we need to copy attributes from params into new dicts newparams = [{n:p[n] for n in names} for p in params] else: newparams = params else: # We need to extract attributes and put them into sequences newparams = [[p[n] for n in names] for p in params] else: # nothing to do for pyformat when no translation is necessary newstmt = stmt newparams = params self.__cursor.executemany(newstmt, newparams) def _translate2pyformat(self, stmt, row=None): # No translation of stmt needed. This method is only used if a new # row only containing the required attributes must be made. (_, names) = self.__translations.get(stmt, (None, None)) if names: return (stmt, {n:row[n] for n in names}) elif names == []: # there are no arguments to copy return (stmt, None) names = [] end = 0 while True: start = stmt.find('%(', end) if start == -1: break end = stmt.find(')s', start) if end == -1: break names.append(stmt[start + 2 : end]) self.__translations[stmt] = (stmt, names) return self._translate2pyformat(stmt, row) def _translate2named(self, stmt, row=None): # Translate %(name)s to :name. Make new row if self.__copyintonew. # Cache the translated SQL and a list of attributes to extract. (res, names) = self.__translations.get(stmt, (None, None)) if res: if not self.__copyintonew: return (res, row) elif names: return (res, {n:row[n] for n in names}) else: return (res, None) names = [] res = stmt while True: start = res.find('%(') if start == -1: break end = res.find(')s', start) if end == -1: break name = res[start + 2: end] res = res.replace(res[start:end + 2], ':' + name) names.append(name) self.__translations[stmt] = (res, names) return self._translate2named(stmt, row) def _translate2qmark(self, stmt, row=None): # Translate %(name)s to ? and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace( newstmt[start:end + 2], '?', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2numeric(self, stmt, row=None): # Translate %(name)s to 1,2,... and build a list of attributes to # extract from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] cnt = 0 newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt)) cnt += 1 self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2format(self, stmt, row=None): # Translate %(name)s to %s and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace( newstmt[start:end + 2], '%s', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def rowfactory(self, names=None): """Return a generator object returning result rows (i.e. dicts).""" rows = self.__cursor self.__cursor = self.__connection.cursor() if names is None: if rows.description is None: # no query was executed ... return (nothing for nothing in []) # a generator with no rows else: names = [self.nametranslator(t[0]) for t in rows.description] return rowfactory(rows, names, True) def fetchone(self, names=None): """Return one result row (i.e. dict).""" if self.__cursor.description is None: return {} if names is None: names = [self.nametranslator(t[0]) for t in self.__cursor.description] values = self.__cursor.fetchone() if values is None: # A row with each att = None return dict([(n, None) for n in names]) else: return dict(zip(names, values)) def fetchonetuple(self): """Return one result tuple.""" if self.__cursor.description is None: return () values = self.__cursor.fetchone() if values is None: return (None, ) * len(self.__cursor.description) else: return values def fetchmanytuples(self, cnt): """Return cnt result tuples.""" if self.__cursor.description is None: return [] return self.__cursor.fetchmany(cnt) def fetchalltuples(self): """Return all result tuples""" if self.__cursor.description is not None: while True: results = self.__cursor.fetchmany(200) if not results: break for row in results: yield row def rowcount(self): """Return the size of the result.""" return self.__cursor.rowcount def getunderlyingmodule(self): """Return a reference to the underlying connection's module. This is done by considering the connection's __class__'s __module__ string from right to left (e.g., 'a.b.c', 'a.b', 'a') and looking for the attributes 'paramstyle' and 'connect' in the possible modules """ if self.__underlyingmodule is not None: return self.__underlyingmodule else: fullmodname = self.__connection.__class__.__module__ for i in reversed(range(fullmodname.count('.') + 1)): modname = fullmodname.rsplit('.', i)[0] try: modref = modules[modname] if hasattr(modref, 'paramstyle') and \ hasattr(modref, 'connect'): self.__underlyingmodule = modref return modref except KeyError: pass return None # We could not finde the module. Raise an Exception instead? def commit(self): """Commit the transaction.""" endload() self.__connection.commit() def close(self): """Close the connection to the database,""" global _defaulttargetconnection if _defaulttargetconnection is self: _defaulttargetconnection = None self.__connection.close() def rollback(self): """Rollback the transaction.""" self.__connection.rollback() def setasdefault(self): """Set this ConnectionWrapper as the default connection.""" global _defaulttargetconnection _defaulttargetconnection = self def cursor(self): """Return a cursor object. Optional method.""" return self.__connection.cursor() def resultnames(self): if self.__cursor.description is None: return None else: return tuple([t[0] for t in self.__cursor.description]) def __getstate__(self): # In case the ConnectionWrapper is pickled (to be sent to another # process), we need to create a new cursor when it is unpickled. res = self.__dict__.copy() del res['_ConnectionWrapper__cursor'] # a dirty trick, but... return res def __setstate__(self, dictdata): self.__dict__.update(dictdata) self.__cursor = self.__connection.cursor()
class ConnectionWrapper(object): """Provide a uniform representation of different database connection types. All Dimensions and FactTables communicate with the data warehouse using a ConnectionWrapper. In this way, the code for loading the DW does not have to care about which parameter format is used. pygrametl's code uses the 'pyformat' but the ConnectionWrapper performs translations of the SQL to use 'named', 'qmark', 'format', or 'numeric' if the user's database connection needs this. Note that the translations are simple and naive. Escaping as in %%(name)s is not taken into consideration. These simple translations are enough for pygrametl's code which is the important thing here; we're not trying to make a generic, all-purpose tool to get rid of the problems with different parameter formats. It is, however, possible to disable the translation of a statement to execute such that 'problematic' statements can be executed anyway. """ def __init__(self, connection, stmtcachesize=1000, paramstyle=None): """Create a ConnectionWrapper around the given PEP 249 connection If no default ConnectionWrapper already exists, the new ConnectionWrapper is set as the default. Arguments: - connection: An open PEP 249 connection to the database - stmtcachesize: A number deciding how many translated statements to cache. A statement needs to be translated when the connection does not use 'pyformat' to specify parameters. When 'pyformat' is used, stmtcachesize is ignored as no statements need to be translated. - paramstyle: A string holding the name of the PEP 249 connection's paramstyle. If None, pygrametl will try to find the paramstyle automatically (an AttributeError can be raised if that fails). """ self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s if paramstyle is None: try: paramstyle = \ modules[self.__connection.__class__.__module__].paramstyle except AttributeError: # Note: This is probably a better way to do this, but to avoid # to break anything that worked before this fix, we only do it # this way if the first approach didn't work try: paramstyle = \ modules[self.__connection.__class__.__module__. split('.')[0]].paramstyle except AttributeError: # To support, e.g., mysql.connector connections paramstyle = \ modules[self.__connection.__class__.__module__. rsplit('.', 1)[0]].paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None global _defaulttargetconnection if _defaulttargetconnection is None: _defaulttargetconnection = self def execute(self, stmt, arguments=None, namemapping=None, translate=True): """Execute a statement. Arguments: - stmt: the statement to execute - arguments: a mapping with the arguments (default: None) - namemapping: a mapping of names such that if stmt uses %(arg)s and namemapping[arg]=arg2, the value arguments[arg2] is used instead of arguments[arg] - translate: decides if translation from 'pyformat' to the undlying connection's format should take place. Default: True """ if namemapping and arguments: arguments = copy(arguments, **namemapping) if self.__translate and translate: (stmt, arguments) = self.__translate(stmt, arguments) self.__cursor.execute(stmt, arguments) def executemany(self, stmt, params, translate=True): """Execute a sequence of statements.""" if self.__translate and translate: # Idea: Translate the statement for the first parameter set. Then # reuse the statement (but create new attribute sequences if # needed) for the remaining paramter sets newstmt = self.__translate(stmt, params[0])[0] if isinstance(self.__translations[stmt], str): # The paramstyle is 'named' in this case and we don't have to # put parameters into sequences self.__cursor.executemany(newstmt, params) else: # We need to extract attributes and put them into sequences # The attributes to extract names = self.__translations[stmt][1] newparams = [[p[n] for n in names] for p in params] self.__cursor.executemany(newstmt, newparams) else: # for pyformat when no translation is necessary self.__cursor.executemany(stmt, params) def _translate2named(self, stmt, row=None): # Translate %(name)s to :name. No need to change row. # Cache only the translated SQL. res = self.__translations.get(stmt, None) if res: return (res, row) res = stmt while True: start = res.find('%(') if start == -1: break end = res.find(')s', start) if end == -1: break name = res[start + 2:end] res = res.replace(res[start:end + 2], ':' + name) self.__translations[stmt] = res return (res, row) def _translate2qmark(self, stmt, row=None): # Translate %(name)s to ? and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2:end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], '?', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2numeric(self, stmt, row=None): # Translate %(name)s to 1,2,... and build a list of attributes to # extract from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] cnt = 0 newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2:end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt)) cnt += 1 self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2format(self, stmt, row=None): # Translate %(name)s to %s and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2:end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], '%s', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def rowfactory(self, names=None): """Return a generator object returning result rows (i.e. dicts).""" rows = self.__cursor self.__cursor = self.__connection.cursor() if names is None: if rows.description is None: # no query was executed ... return (nothing for nothing in []) # a generator with no rows else: names = [self.nametranslator(t[0]) for t in rows.description] return rowfactory(rows, names, True) def fetchone(self, names=None): """Return one result row (i.e. dict).""" if self.__cursor.description is None: return {} if names is None: names = [ self.nametranslator(t[0]) for t in self.__cursor.description ] values = self.__cursor.fetchone() if values is None: # A row with each att = None return dict([(n, None) for n in names]) else: return dict(zip(names, values)) def fetchonetuple(self): """Return one result tuple.""" if self.__cursor.description is None: return () values = self.__cursor.fetchone() if values is None: return (None, ) * len(self.__cursor.description) else: return values def fetchmanytuples(self, cnt): """Return cnt result tuples.""" if self.__cursor.description is None: return [] return self.__cursor.fetchmany(cnt) def fetchalltuples(self): """Return all result tuples""" if self.__cursor.description is not None: while True: results = self.__cursor.fetchmany(200) if not results: break for row in results: yield row def rowcount(self): """Return the size of the result.""" return self.__cursor.rowcount def getunderlyingmodule(self): """Return a reference to the underlying connection's module.""" return modules[self.__connection.__class__.__module__] def commit(self): """Commit the transaction.""" endload() self.__connection.commit() def close(self): """Close the connection to the database,""" self.__connection.close() def rollback(self): """Rollback the transaction.""" self.__connection.rollback() def setasdefault(self): """Set this ConnectionWrapper as the default connection.""" global _defaulttargetconnection _defaulttargetconnection = self def cursor(self): """Return a cursor object. Optional method.""" return self.__connection.cursor() def resultnames(self): if self.__cursor.description is None: return None else: return tuple([t[0] for t in self.__cursor.description]) def __getstate__(self): # In case the ConnectionWrapper is pickled (to be sent to another # process), we need to create a new cursor when it is unpickled. res = self.__dict__.copy() del res['_ConnectionWrapper__cursor'] # a dirty trick, but... return res def __setstate__(self, dictdata): self.__dict__.update(dictdata) self.__cursor = self.__connection.cursor()
class BackgroundConnectionWrapper(object): """An alternative implementation of the ConnectionWrapper for experiments. This implementation communicates with the database by using a separate thread. It is likely better to use ConnectionWrapper og a shared ConnectionWrapper (see pygrametl.parallel). This class offers the same methods as ConnectionWrapper. The documentation is not repeated here. """ _SINGLE = 1 _MANY = 2 # Most of this class' code was just copied from ConnectionWrapper # as we just want to do experiments with this class. def __init__(self, connection, stmtcachesize=1000, paramstyle=None): self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s if paramstyle is None: try: paramstyle = modules[self.__connection.__class__.__module__].paramstyle except AttributeError: # Note: This is probably a better way to do this, but to avoid # to break anything that worked before this fix, we only do it # this way if the first approach didn't work try: paramstyle = modules[self.__connection.__class__.__module__.split(".")[0]].paramstyle except AttributeError: # To support, e.g., mysql.connector connections paramstyle = modules[self.__connection.__class__.__module__.rsplit(".", 1)[0]].paramstyle if not paramstyle == "pyformat": self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, "_translate2" + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None # Thread-stuff self.__cursor = connection.cursor() self.__queue = Queue(5000) t = Thread(target=self.__worker) t.daemon = True t.start() def execute(self, stmt, arguments=None, namemapping=None, translate=True): if namemapping and arguments: arguments = copy(arguments, **namemapping) if self.__translate and translate: (stmt, arguments) = self.__translate(stmt, arguments) self.__queue.put((self._SINGLE, self.__cursor, stmt, arguments)) def executemany(self, stmt, params, translate=True): if self.__translate and translate: # Idea: Translate the statement for the first parameter set. Then # reuse the statement (but create new attribute sequences if needed) # for the remaining paramter sets newstmt = self.__translate(stmt, params[0])[0] if isinstance(self.__translations[stmt], str): # The paramstyle is 'named' in this case and we don't have to # put parameters into sequences self.__queue.put((self._MANY, self.__cursor, newstmt, params)) else: # We need to extract attributes and put them into sequences # The attributes to extract names = self.__translations[stmt][1] newparams = [[p[n] for n in names] for p in params] self.__queue.put((self._MANY, self.__cursor, newstmt, newparams)) else: # for pyformat when no translation is necessary self.__queue.put((self._MANY, self.__cursor, stmt, params)) def _translate2named(self, stmt, row=None): # Translate %(name)s to :name. No need to change row. # Cache only the translated SQL. res = self.__translations.get(stmt, None) if res: return (res, row) res = stmt while True: start = res.find("%(") if start == -1: break end = res.find(")s", start) name = res[start + 2 : end] res = res.replace(res[start : end + 2], ":" + name) self.__translations[stmt] = res return (res, row) def _translate2qmark(self, stmt, row=None): # Translate %(name)s to ? and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find("%(") if start == -1: break end = newstmt.find(")s", start) name = newstmt[start + 2 : end] names.append(name) newstmt = newstmt.replace(newstmt[start : end + 2], "?", 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2numeric(self, stmt, row=None): # Translate %(name)s to 1,2,... and build a list of attributes to # extract from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] cnt = 0 newstmt = stmt while True: start = newstmt.find("%(") if start == -1: break end = newstmt.find(")s", start) name = newstmt[start + 2 : end] names.append(name) newstmt = newstmt.replace(newstmt[start : end + 2], ":" + str(cnt)) cnt += 1 self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2format(self, stmt, row=None): # Translate %(name)s to %s and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find("%(") if start == -1: break end = newstmt.find(")s", start) name = newstmt[start + 2 : end] names.append(name) newstmt = newstmt.replace(newstmt[start : end + 2], "%s", 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def rowfactory(self, names=None): self.__queue.join() rows = self.__cursor self.__cursor = self.__connection.cursor() if names is None: if rows.description is None: # no query was executed ... return (nothing for nothing in []) # a generator with no rows else: names = [self.nametranslator(t[0]) for t in rows.description] return rowfactory(rows, names, True) def fetchone(self, names=None): self.__queue.join() if self.__cursor.description is None: return {} if names is None: names = [self.nametranslator(t[0]) for t in self.__cursor.description] values = self.__cursor.fetchone() if values is None: # A row with each att = None return dict([(n, None) for n in names]) else: return dict(zip(names, values)) def fetchonetuple(self): self.__queue.join() if self.__cursor.description is None: return () values = self.__cursor.fetchone() if values is None: return (None,) * len(self.__cursor.description) else: return values def fetchmanytuples(self, cnt): self.__queue.join() if self.__cursor.description is None: return [] return self.__cursor.fetchmany(cnt) def fetchalltuples(self): self.__queue.join() if self.__cursor.description is None: return [] return self.__cursor.fetchall() def rowcount(self): self.__queue.join() return self.__cursor.rowcount def getunderlyingmodule(self): # No need to join the queue here return modules[self.__connection.__class__.__module__] def commit(self): endload() self.__queue.join() self.__connection.commit() def close(self): self.__queue.join() self.__connection.close() def rollback(self): self.__queue.join() self.__connection.rollback() def setasdefault(self): global _defaulttargetconnection _defaulttargetconnection = self def cursor(self): self.__queue.join() return self.__connection.cursor() def resultnames(self): self.__queue.join() if self.__cursor.description is None: return None else: return tuple([t[0] for t in self.__cursor.description]) def __getstate__(self): # In case the ConnectionWrapper is pickled (to be sent to another # process), we need to create a new cursor when it is unpickled. res = self.__dict__.copy() del res["_ConnectionWrapper__cursor"] # a dirty trick, but... def __setstate__(self, dict): self.__dict__.update(dict) self.__cursor = self.__connection.cursor() def __worker(self): while True: (op, curs, stmt, args) = self.__queue.get() if op == self._SINGLE: curs.execute(stmt, args) elif op == self._MANY: curs.executemany(stmt, args) self.__queue.task_done()
class BackgroundConnectionWrapper(object): """An alternative implementation of the ConnectionWrapper for experiments. This implementation communicates with the database by using a separate thread. It is likely better to use ConnectionWrapper og a shared ConnectionWrapper (see pygrametl.parallel). This class offers the same methods as ConnectionWrapper. The documentation is not repeated here. """ _SINGLE = 1 _MANY = 2 # Most of this class' code was just copied from ConnectionWrapper # as we just want to do experiments with this class. def __init__(self, connection, stmtcachesize=1000, paramstyle=None): self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s self.__underlyingmodule = None # will be updated next self.getunderlyingmodule() # updates self.__underlyingmodule if paramstyle is None: paramstyle = self.__underlyingmodule.paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None # Thread-stuff self.__cursor = connection.cursor() self.__queue = Queue(5000) t = Thread(target=self.__worker) t.daemon = True t.start() def execute(self, stmt, arguments=None, namemapping=None, translate=True): if namemapping and arguments: arguments = copy(arguments, **namemapping) if self.__translate and translate: (stmt, arguments) = self.__translate(stmt, arguments) self.__queue.put((self._SINGLE, self.__cursor, stmt, arguments)) def executemany(self, stmt, params, translate=True): if self.__translate and translate: # Idea: Translate the statement for the first parameter set. Then # reuse the statement (but create new attribute sequences if # needed) for the remaining paramter sets newstmt = self.__translate(stmt, params[0])[0] if isinstance(self.__translations[stmt], str): # The paramstyle is 'named' in this case and we don't have to # put parameters into sequences self.__queue.put((self._MANY, self.__cursor, newstmt, params)) else: # We need to extract attributes and put them into sequences # The attributes to extract names = self.__translations[stmt][1] newparams = [[p[n] for n in names] for p in params] self.__queue.put( (self._MANY, self.__cursor, newstmt, newparams)) else: # for pyformat when no translation is necessary self.__queue.put((self._MANY, self.__cursor, stmt, params)) def _translate2named(self, stmt, row=None): # Translate %(name)s to :name. No need to change row. # Cache only the translated SQL. res = self.__translations.get(stmt, None) if res: return (res, row) res = stmt while True: start = res.find('%(') if start == -1: break end = res.find(')s', start) name = res[start + 2: end] res = res.replace(res[start:end + 2], ':' + name) self.__translations[stmt] = res return (res, row) def _translate2qmark(self, stmt, row=None): # Translate %(name)s to ? and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace( newstmt[start:end + 2], '?', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2numeric(self, stmt, row=None): # Translate %(name)s to 1,2,... and build a list of attributes to # extract from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] cnt = 0 newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt)) cnt += 1 self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2format(self, stmt, row=None): # Translate %(name)s to %s and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace( newstmt[start:end + 2], '%s', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def rowfactory(self, names=None): self.__queue.join() rows = self.__cursor self.__cursor = self.__connection.cursor() if names is None: if rows.description is None: # no query was executed ... return (nothing for nothing in []) # a generator with no rows else: names = [self.nametranslator(t[0]) for t in rows.description] return rowfactory(rows, names, True) def fetchone(self, names=None): self.__queue.join() if self.__cursor.description is None: return {} if names is None: names = [self.nametranslator(t[0]) for t in self.__cursor.description] values = self.__cursor.fetchone() if values is None: # A row with each att = None return dict([(n, None) for n in names]) else: return dict(zip(names, values)) def fetchonetuple(self): self.__queue.join() if self.__cursor.description is None: return () values = self.__cursor.fetchone() if values is None: return (None, ) * len(self.__cursor.description) else: return values def fetchmanytuples(self, cnt): self.__queue.join() if self.__cursor.description is None: return [] return self.__cursor.fetchmany(cnt) def fetchalltuples(self): self.__queue.join() if self.__cursor.description is not None: while True: results = self.__cursor.fetchmany(200) if not results: break for row in results: yield row def rowcount(self): self.__queue.join() return self.__cursor.rowcount def getunderlyingmodule(self): """Return a reference to the underlying connection's module. This is done by considering the connection's __class__'s __module__ string from right to left (e.g., 'a.b.c', 'a.b', 'a') and looking for the attributes 'paramstyle' and 'connect' in the possible modules """ # No need to join the queue here if self.__underlyingmodule is not None: return self.__underlyingmodule else: fullmodname = self.__connection.__class__.__module__ for i in reversed(range(fullmodname.count('.') + 1)): modname = fullmodname.rsplit('.', i)[0] try: modref = modules[modname] if hasattr(modref, 'paramstyle') and \ hasattr(modref, 'connect'): self.__underlyingmodule = modref return modref except KeyError: pass return None # We could not finde the module. Raise an Exception instead? def commit(self): endload() self.__queue.join() self.__connection.commit() def close(self): self.__queue.join() self.__connection.close() def rollback(self): self.__queue.join() self.__connection.rollback() def setasdefault(self): global _defaulttargetconnection _defaulttargetconnection = self def cursor(self): self.__queue.join() return self.__connection.cursor() def resultnames(self): self.__queue.join() if self.__cursor.description is None: return None else: return tuple([t[0] for t in self.__cursor.description]) def __getstate__(self): # In case the ConnectionWrapper is pickled (to be sent to another # process), we need to create a new cursor when it is unpickled. res = self.__dict__.copy() del res['_ConnectionWrapper__cursor'] # a dirty trick, but... def __setstate__(self, dictdata): self.__dict__.update(dictdata) self.__cursor = self.__connection.cursor() def __worker(self): while True: (op, curs, stmt, args) = self.__queue.get() if op == self._SINGLE: if args is None: curs.execute(stmt) else: curs.execute(stmt, args) elif op == self._MANY: curs.executemany(stmt, args) self.__queue.task_done()
class ConnectionWrapper(object): """Provide a uniform representation of different database connection types. All Dimensions and FactTables communicate with the data warehouse using a ConnectionWrapper. In this way, the code for loading the DW does not have to care about which parameter format is used. pygrametl's code uses the 'pyformat' but the ConnectionWrapper performs translations of the SQL to use 'named', 'qmark', 'format', or 'numeric' if the user's database connection needs this. Note that the translations are simple and naive. Escaping as in %%(name)s is not taken into consideration. These simple translations are enough for pygrametl's code which is the important thing here; we're not trying to make a generic, all-purpose tool to get rid of the problems with different parameter formats. It is, however, possible to disable the translation of a statement to execute such that 'problematic' statements can be executed anyway. """ def __init__(self, connection, stmtcachesize=1000, paramstyle=None): """Create a ConnectionWrapper around the given PEP 249 connection If no default ConnectionWrapper already exists, the new ConnectionWrapper is set as the default. Arguments: - connection: An open PEP 249 connection to the database - stmtcachesize: A number deciding how many translated statements to cache. A statement needs to be translated when the connection does not use 'pyformat' to specify parameters. When 'pyformat' is used, stmtcachesize is ignored as no statements need to be translated. - paramstyle: A string holding the name of the PEP 249 connection's paramstyle. If None, pygrametl will try to find the paramstyle automatically (an AttributeError can be raised if that fails). """ self.__connection = connection self.__cursor = connection.cursor() self.nametranslator = lambda s: s self.__underlyingmodule = None # will be updated next self.getunderlyingmodule() # updates self.__underlyingmodule if paramstyle is None: paramstyle = self.__underlyingmodule.paramstyle if not paramstyle == 'pyformat': self.__translations = FIFODict(stmtcachesize) try: self.__translate = getattr(self, '_translate2' + paramstyle) except AttributeError: raise InterfaceError("The paramstyle '%s' is not supported" % paramstyle) else: self.__translate = None global _defaulttargetconnection if _defaulttargetconnection is None: _defaulttargetconnection = self def execute(self, stmt, arguments=None, namemapping=None, translate=True): """Execute a statement. Arguments: - stmt: the statement to execute - arguments: a mapping with the arguments (default: None) - namemapping: a mapping of names such that if stmt uses %(arg)s and namemapping[arg]=arg2, the value arguments[arg2] is used instead of arguments[arg] - translate: decides if translation from 'pyformat' to the undlying connection's format should take place. Default: True """ if namemapping and arguments: arguments = copy(arguments, **namemapping) if self.__translate and translate: (stmt, arguments) = self.__translate(stmt, arguments) if arguments is None: # Some drivers don't accept None for 'arguments' self.__cursor.execute(stmt) else: self.__cursor.execute(stmt, arguments) def executemany(self, stmt, params, translate=True): """Execute a sequence of statements.""" if self.__translate and translate: # Idea: Translate the statement for the first parameter set. Then # reuse the statement (but create new attribute sequences if # needed) for the remaining paramter sets newstmt = self.__translate(stmt, params[0])[0] if isinstance(self.__translations[stmt], str): # The paramstyle is 'named' in this case and we don't have to # put parameters into sequences self.__cursor.executemany(newstmt, params) else: # We need to extract attributes and put them into sequences # The attributes to extract names = self.__translations[stmt][1] newparams = [[p[n] for n in names] for p in params] self.__cursor.executemany(newstmt, newparams) else: # for pyformat when no translation is necessary self.__cursor.executemany(stmt, params) def _translate2named(self, stmt, row=None): # Translate %(name)s to :name. No need to change row. # Cache only the translated SQL. res = self.__translations.get(stmt, None) if res: return (res, row) res = stmt while True: start = res.find('%(') if start == -1: break end = res.find(')s', start) if end == -1: break name = res[start + 2: end] res = res.replace(res[start:end + 2], ':' + name) self.__translations[stmt] = res return (res, row) def _translate2qmark(self, stmt, row=None): # Translate %(name)s to ? and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace( newstmt[start:end + 2], '?', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2numeric(self, stmt, row=None): # Translate %(name)s to 1,2,... and build a list of attributes to # extract from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] cnt = 0 newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace(newstmt[start:end + 2], ':' + str(cnt)) cnt += 1 self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def _translate2format(self, stmt, row=None): # Translate %(name)s to %s and build a list of attributes to extract # from row. Cache both. (newstmt, names) = self.__translations.get(stmt, (None, None)) if newstmt: return (newstmt, [row[n] for n in names]) names = [] newstmt = stmt while True: start = newstmt.find('%(') if start == -1: break end = newstmt.find(')s', start) if end == -1: break name = newstmt[start + 2: end] names.append(name) newstmt = newstmt.replace( newstmt[ start:end + 2], '%s', 1) # Replace once! self.__translations[stmt] = (newstmt, names) return (newstmt, [row[n] for n in names]) def rowfactory(self, names=None): """Return a generator object returning result rows (i.e. dicts).""" rows = self.__cursor self.__cursor = self.__connection.cursor() if names is None: if rows.description is None: # no query was executed ... return (nothing for nothing in []) # a generator with no rows else: names = [self.nametranslator(t[0]) for t in rows.description] return rowfactory(rows, names, True) def fetchone(self, names=None): """Return one result row (i.e. dict).""" if self.__cursor.description is None: return {} if names is None: names = [self.nametranslator(t[0]) for t in self.__cursor.description] values = self.__cursor.fetchone() if values is None: # A row with each att = None return dict([(n, None) for n in names]) else: return dict(zip(names, values)) def fetchonetuple(self): """Return one result tuple.""" if self.__cursor.description is None: return () values = self.__cursor.fetchone() if values is None: return (None, ) * len(self.__cursor.description) else: return values def fetchmanytuples(self, cnt): """Return cnt result tuples.""" if self.__cursor.description is None: return [] return self.__cursor.fetchmany(cnt) def fetchalltuples(self): """Return all result tuples""" if self.__cursor.description is not None: while True: results = self.__cursor.fetchmany(200) if not results: break for row in results: yield row def rowcount(self): """Return the size of the result.""" return self.__cursor.rowcount def getunderlyingmodule(self): """Return a reference to the underlying connection's module. This is done by considering the connection's __class__'s __module__ string from right to left (e.g., 'a.b.c', 'a.b', 'a') and looking for the attributes 'paramstyle' and 'connect' in the possible modules """ if self.__underlyingmodule is not None: return self.__underlyingmodule else: fullmodname = self.__connection.__class__.__module__ for i in reversed(range(fullmodname.count('.') + 1)): modname = fullmodname.rsplit('.', i)[0] try: modref = modules[modname] if hasattr(modref, 'paramstyle') and \ hasattr(modref, 'connect'): self.__underlyingmodule = modref return modref except KeyError: pass return None # We could not finde the module. Raise an Exception instead? def commit(self): """Commit the transaction.""" endload() self.__connection.commit() def close(self): """Close the connection to the database,""" self.__connection.close() def rollback(self): """Rollback the transaction.""" self.__connection.rollback() def setasdefault(self): """Set this ConnectionWrapper as the default connection.""" global _defaulttargetconnection _defaulttargetconnection = self def cursor(self): """Return a cursor object. Optional method.""" return self.__connection.cursor() def resultnames(self): if self.__cursor.description is None: return None else: return tuple([t[0] for t in self.__cursor.description]) def __getstate__(self): # In case the ConnectionWrapper is pickled (to be sent to another # process), we need to create a new cursor when it is unpickled. res = self.__dict__.copy() del res['_ConnectionWrapper__cursor'] # a dirty trick, but... return res def __setstate__(self, dictdata): self.__dict__.update(dictdata) self.__cursor = self.__connection.cursor()