class ConnectionPool(object): """ Wrapper for twisted.enterprise.adbapi.ConnectionPool to use with tornado. """ def __init__(self, *args, **kwargs): self._pool = TxConnectionPool(*args, **kwargs) def run_query(self, *args, **kwargs): return self._defer_to_future(self._pool.runQuery(*args, **kwargs)) def run_operation(self, *args, **kwargs): return self._defer_to_future(self._pool.runOperation(*args, **kwargs)) def run_interaction(self, *args, **kwargs): return self._defer_to_future(self._pool.runInteraction(*args, **kwargs)) def close(self): self._pool.close() @staticmethod def _defer_to_future(defer): future = TracebackFuture() defer.addCallbacks( future.set_result, lambda failure: future.set_exc_info( (failure.type, failure.value, failure.tb))) return future
def test_startedClose(self): """ If L{ConnectionPool.close} is called after it has been started, but not by its shutdown trigger, the shutdown trigger is cancelled. """ reactor = EventReactor(True) pool = ConnectionPool('twisted.test.test_adbapi', cp_reactor=reactor) # There should be a shutdown trigger waiting. self.assertEquals(reactor.triggers, [('during', 'shutdown', pool.finalClose)]) pool.close() # But not anymore. self.assertFalse(reactor.triggers)
def test_unstartedClose(self): """ If L{ConnectionPool.close} is called without L{ConnectionPool.start} having been called, the pool's startup event is cancelled. """ reactor = EventReactor(False) pool = ConnectionPool('twisted.test.test_adbapi', cp_reactor=reactor) # There should be a startup trigger waiting. self.assertEquals(reactor.triggers, [('after', 'startup', pool._start)]) pool.close() # But not anymore. self.assertFalse(reactor.triggers)
def test_unstartedClose(self): """ If L{ConnectionPool.close} is called without L{ConnectionPool.start} having been called, the pool's startup event is cancelled. """ reactor = EventReactor(False) pool = ConnectionPool("twisted.test.test_adbapi", cp_reactor=reactor) # There should be a startup trigger waiting. self.assertEqual(reactor.triggers, [("after", "startup", pool._start)]) pool.close() # But not anymore. self.assertFalse(reactor.triggers)
def test_startedClose(self): """ If L{ConnectionPool.close} is called after it has been started, but not by its shutdown trigger, the shutdown trigger is cancelled. """ reactor = EventReactor(True) pool = ConnectionPool('twisted.test.test_adbapi', cp_reactor=reactor) # There should be a shutdown trigger waiting. self.assertEqual(reactor.triggers, [('during', 'shutdown', pool.finalClose)]) pool.close() # But not anymore. self.assertFalse(reactor.triggers)
class MySQLPipeline(object): @classmethod def from_crawler(cls, crawler): return cls(crawler.stats) def __init__(self, stats): #Instantiate DB self.dbpool = ConnectionPool('MySQLdb', host=SETTINGS['DB_HOST'], user=SETTINGS['DB_USER'], passwd=SETTINGS['DB_PASSWD'], port=SETTINGS['DB_PORT'], db=SETTINGS['DB_DB'], charset='utf8', use_unicode=True, cursorclass=MySQLdb.cursors.DictCursor) self.stats = stats dispatcher.connect(self.spider_closed, signals.spider_closed) def spider_closed(self, spider): """ Cleanup function, called after crawing has finished to close open objects. Close ConnectionPool. """ self.dbpool.close() def process_item(self, item, spider): query = self.dbpool.runInteraction(self._insert_record, item) query.addErrback(self._handle_error) return item def _insert_record(self, tx, item): result = tx.execute( """INSERT INTO agregator_results (task_id, direct_link, source_link, rank, site, `date`) VALUES (%s, %s, %s, %s, %s, %s)""", ( item["django_task_id"], item["direct_link"], item["source_link"], item["rank"], item["site"], item["date"], )) if result > 0: self.stats.inc_value('database/items_added') def _handle_error(self, e): log.err(e)
class MDatabase: """ Sqlite database for Marnatarlo """ def __init__(self, dbname): self.dbname = dbname try: fh = open(dbname) except IOError as e: conn = sqlite3.connect(dbname) curs = conn.cursor() curs.execute("Create table users (name text unique, password text)") curs.execute("Create table stats(name text, played INTEGER, won INTEGER, FOREIGN KEY(name) REFERENCES users(name))") conn.commit() curs.close() self.__dbpool = ConnectionPool('sqlite3', self.dbname) def shutdown(self): """ Shutdown function It's a required task to shutdown the database connection pool: garbage collector doesn't shutdown associated thread """ self.__dbpool.close() def returnOk(self, o): return True def returnFailure(self, o): return False def returnResult(self, result): return result def _returnResult(self, deferred, count=None): if count: return self.__dbpool.fetchmany(count) else: return self.__dbpool.fetchall() def execSql(self, sql, params={}): """ Exec an SQL command, return True or False @type sql C{str} @param sql SQL command """ def run(sql, params): return self.__dbpool.runQuery(sql, params) d = run(sql, params) d.addCallback(self._returnResult) d.addErrback(self.returnFailure) d.addCallback(self.returnResult) return d def fetch(self, sql, params={}): """ Exec an SQL command, fetching the rows resulting @type sql C{str} @param sql SQL command """ def run(sql, params): return self.__dbpool.runQuery(sql, params) d = run(sql, params) d.addCallback(self.returnResult) d.addErrback(self.returnFailure) return d def get_stats(self, user): query = "SELECT * FROM stats WHERE name=?" return self.fetch(query, (user,)) def user_won(self, user): query = "UPDATE stats SET won=won+1 WHERE name=?" return self.execSql(query, (user,)) def user_play(self, user): query = "UPDATE stats SET played=played+1 WHERE name=?" return self.execSql(query, (user,)) def save_user(self, user, passwd): """ Save user / password into db @type user C{str} @type password C{str} """ def insert_user(users, user, passwd): if len(users) > 0: return self.returnFailure(users) query = "INSERT INTO users(name, password) VALUES (?, ?)" self.execSql(query, (user, passwd,)) query = "INSERT INTO stats(name, played, won) VALUES (?, 0,0)" return self.execSql(query, (user,)) return self.get_user_login_info(user).addCallback(insert_user, user, passwd) def get_user_login_info(self, user): """ Get a tuple, user / password @type user C{str} """ query = "SELECT * FROM users WHERE name=?"; return self.fetch(query, (user,)) def get_all_users(self): """ Get all users from db """ query = "SELECT u.name, s.played, s.won FROM users AS u, stats AS s WHERE u.name = s.name"; return self.fetch(query)
class AbstractADBAPIDatabase(object): """ A generic SQL database. """ def __init__(self, dbID, dbapiName, dbapiArgs, persistent, **kwargs): """ @param persistent: C{True} if the data in the DB must be perserved during upgrades, C{False} if the DB data can be re-created from an external source. @type persistent: bool """ self.dbID = dbID self.dbapiName = dbapiName self.dbapiArgs = dbapiArgs self.dbapikwargs = kwargs self.persistent = persistent self.initialized = False def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.pool) @inlineCallbacks def open(self): """ Access the underlying database. @return: a db2 connection object for this index's underlying data store. """ if not self.initialized: self.pool = ConnectionPool(self.dbapiName, *self.dbapiArgs, **self.dbapikwargs) # sqlite3 is not thread safe which means we have to close the sqlite3 connections in the same thread that # opened them. We need a special thread pool class that has a thread worker function that does a close # when a thread is closed. if self.dbapiName == "sqlite3": self.pool.threadpool.stop() self.pool.threadpool = ConnectionClosingThreadPool(1, 1) self.pool.threadpool.start() self.pool.threadpool.pool = self.pool # # Set up the schema # # Create CALDAV table if needed test = (yield self._test_schema_table()) if test: version = (yield self._db_value_for_sql("select VALUE from CALDAV where KEY = 'SCHEMA_VERSION'")) dbtype = (yield self._db_value_for_sql("select VALUE from CALDAV where KEY = 'TYPE'")) if (version != self._db_version()) or (dbtype != self._db_type()): if dbtype != self._db_type(): log.err("Database %s has different type (%s vs. %s)" % (self.dbID, dbtype, self._db_type())) # Delete this index and start over yield self._db_remove() yield self._db_init() elif version != self._db_version(): log.err("Database %s has different schema (v.%s vs. v.%s)" % (self.dbID, version, self._db_version())) # Upgrade the DB yield self._db_upgrade(version) else: yield self._db_init() self.initialized = True def close(self): if self.initialized: self.pool.close() self.pool = None self.initialized = False @inlineCallbacks def clean(self): if not self.initialized: yield self.open() yield self._db_empty_data_tables() @inlineCallbacks def execute(self, sql, *query_params): if not self.initialized: yield self.open() yield self._db_execute(sql, *query_params) @inlineCallbacks def executescript(self, script): if not self.initialized: yield self.open() yield self._db_execute_script(script) @inlineCallbacks def query(self, sql, *query_params): if not self.initialized: yield self.open() result = (yield self._db_all_values_for_sql(sql, *query_params)) returnValue(result) @inlineCallbacks def queryList(self, sql, *query_params): if not self.initialized: yield self.open() result = (yield self._db_values_for_sql(sql, *query_params)) returnValue(result) @inlineCallbacks def queryOne(self, sql, *query_params): if not self.initialized: yield self.open() result = (yield self._db_value_for_sql(sql, *query_params)) returnValue(result) def _db_version(self): """ @return: the schema version assigned to this DB. """ raise NotImplementedError def _db_type(self): """ @return: the collection type assigned to this DB. """ raise NotImplementedError def _test_schema_table(self): return self._test_table("CALDAV") @inlineCallbacks def _db_init(self): """ Initialise the underlying database tables. """ log.msg("Initializing database %s" % (self.dbID,)) # TODO we need an exclusive lock of some kind here to prevent a race condition # in which multiple processes try to create the tables. yield self._db_init_schema_table() yield self._db_init_data_tables() yield self._db_recreate() @inlineCallbacks def _db_init_schema_table(self): """ Initialise the underlying database tables. @param db_filename: the file name of the index database. @param q: a database cursor to use. """ # # CALDAV table keeps track of our schema version and type # yield self._create_table("CALDAV", ( ("KEY", "text unique"), ("VALUE", "text unique"), ), True) yield self._db_execute( """ insert or ignore into CALDAV (KEY, VALUE) values ('SCHEMA_VERSION', :1) """, (self._db_version(),) ) yield self._db_execute( """ insert or ignore into CALDAV (KEY, VALUE) values ('TYPE', :1) """, (self._db_type(),) ) def _db_init_data_tables(self): """ Initialise the underlying database tables. """ raise NotImplementedError def _db_empty_data_tables(self): """ Delete the database tables. """ # Implementations can override this to re-create data pass def _db_recreate(self): """ Recreate the database tables. """ # Implementations can override this to re-create data pass @inlineCallbacks def _db_upgrade(self, old_version): """ Upgrade the database tables. """ if self.persistent: yield self._db_upgrade_data_tables(old_version) yield self._db_upgrade_schema() else: # Non-persistent DB's by default can be removed and re-created. However, for simple # DB upgrades they SHOULD override this method and handle those for better performance. yield self._db_remove() yield self._db_init() def _db_upgrade_data_tables(self, old_version): """ Upgrade the data from an older version of the DB. """ # Persistent DB's MUST override this method and do a proper upgrade. Their data # cannot be thrown away. raise NotImplementedError("Persistent databases MUST support an upgrade method.") @inlineCallbacks def _db_upgrade_schema(self): """ Upgrade the stored schema version to the current one. """ yield self._db_execute("insert or replace into CALDAV (KEY, VALUE) values ('SCHEMA_VERSION', :1)", (self._db_version(),)) @inlineCallbacks def _db_remove(self): """ Remove all database information (all the tables) """ yield self._db_remove_data_tables() yield self._db_remove_schema() def _db_remove_data_tables(self): """ Remove all the data from an older version of the DB. """ raise NotImplementedError("Each database must remove its own tables.") @inlineCallbacks def _db_remove_schema(self): """ Remove the stored schema version table. """ yield self._db_execute("drop table if exists CALDAV") @inlineCallbacks def _db_all_values_for_sql(self, sql, *query_params): """ Execute an SQL query and obtain the resulting values. @param sql: the SQL query to execute. @param query_params: parameters to C{sql}. @return: an interable of values in the first column of each row resulting from executing C{sql} with C{query_params}. @raise AssertionError: if the query yields multiple columns. """ sql = self._prepare_statement(sql) results = (yield self.pool.runQuery(sql, *query_params)) returnValue(tuple(results)) @inlineCallbacks def _db_values_for_sql(self, sql, *query_params): """ Execute an SQL query and obtain the resulting values. @param sql: the SQL query to execute. @param query_params: parameters to C{sql}. @return: an interable of values in the first column of each row resulting from executing C{sql} with C{query_params}. @raise AssertionError: if the query yields multiple columns. """ sql = self._prepare_statement(sql) results = (yield self.pool.runQuery(sql, *query_params)) returnValue(tuple([row[0] for row in results])) @inlineCallbacks def _db_value_for_sql(self, sql, *query_params): """ Execute an SQL query and obtain a single value. @param sql: the SQL query to execute. @param query_params: parameters to C{sql}. @return: the value resulting from the executing C{sql} with C{query_params}. @raise AssertionError: if the query yields multiple rows or columns. """ value = None for row in (yield self._db_values_for_sql(sql, *query_params)): assert value is None, "Multiple values in DB for %s %s" % (sql, query_params) value = row returnValue(value) def _db_execute(self, sql, *query_params): """ Execute an SQL operation that returns None. @param sql: the SQL query to execute. @param query_params: parameters to C{sql}. @return: an iterable of tuples for each row resulting from executing C{sql} with C{query_params}. """ sql = self._prepare_statement(sql) return self.pool.runOperation(sql, *query_params) """ Since different databases support different types of columns and modifiers on those we need to have an "abstract" way of specifying columns in our code and then map the abstract specifiers to the underlying DB's allowed types. Types we can use are: integer text text(n) date serial The " unique" modifier can be appended to any of those. """ def _map_column_types(self, type): raise NotImplementedError def _create_table(self, name, columns, ifnotexists=False): raise NotImplementedError def _test_table(self, name): raise NotImplementedError def _create_index(self, name, ontable, columns, ifnotexists=False): raise NotImplementedError def _prepare_statement(self, sql): raise NotImplementedError
class AbstractADBAPIDatabase(object): """ A generic SQL database. """ def __init__(self, dbID, dbapiName, dbapiArgs, persistent, **kwargs): """ @param persistent: C{True} if the data in the DB must be perserved during upgrades, C{False} if the DB data can be re-created from an external source. @type persistent: bool """ self.dbID = dbID self.dbapiName = dbapiName self.dbapiArgs = dbapiArgs self.dbapikwargs = kwargs self.persistent = persistent self.initialized = False def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.pool) @inlineCallbacks def open(self): """ Access the underlying database. @return: a db2 connection object for this index's underlying data store. """ if not self.initialized: self.pool = ConnectionPool(self.dbapiName, *self.dbapiArgs, **self.dbapikwargs) # sqlite3 is not thread safe which means we have to close the sqlite3 connections in the same thread that # opened them. We need a special thread pool class that has a thread worker function that does a close # when a thread is closed. if self.dbapiName == "sqlite3": self.pool.threadpool.stop() self.pool.threadpool = ConnectionClosingThreadPool(1, 1) self.pool.threadpool.start() self.pool.threadpool.pool = self.pool # # Set up the schema # # Create CALDAV table if needed try: test = (yield self._test_schema_table()) if test: version = (yield self._db_value_for_sql("select VALUE from CALDAV where KEY = 'SCHEMA_VERSION'")) dbtype = (yield self._db_value_for_sql("select VALUE from CALDAV where KEY = 'TYPE'")) if (version != self._db_version()) or (dbtype != self._db_type()): if dbtype != self._db_type(): log.error( "Database {db} has different type ({t1} vs. {t2})", db=self.dbID, t1=dbtype, t2=self._db_type() ) # Delete this index and start over yield self._db_remove() yield self._db_init() elif version != self._db_version(): log.error( "Database {db} has different schema (v.{v1} vs. v.{v2})", db=self.dbID, v1=version, v2=self._db_version() ) # Upgrade the DB yield self._db_upgrade(version) else: yield self._db_init() self.initialized = True except: # Clean up upon error so we don't end up leaking threads self.pool.close() self.pool = None raise def close(self): if self.initialized: try: self.pool.close() except Exception, e: log.error("Error whilst closing connection pool: {ex}", ex=e) self.pool = None self.initialized = False
class AbstractADBAPIDatabase(object): """ A generic SQL database. """ def __init__(self, dbID, dbapiName, dbapiArgs, persistent, **kwargs): """ @param persistent: C{True} if the data in the DB must be perserved during upgrades, C{False} if the DB data can be re-created from an external source. @type persistent: bool """ self.dbID = dbID self.dbapiName = dbapiName self.dbapiArgs = dbapiArgs self.dbapikwargs = kwargs self.persistent = persistent self.initialized = False def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.pool) @inlineCallbacks def open(self): """ Access the underlying database. @return: a db2 connection object for this index's underlying data store. """ if not self.initialized: self.pool = ConnectionPool(self.dbapiName, *self.dbapiArgs, **self.dbapikwargs) # sqlite3 is not thread safe which means we have to close the sqlite3 connections in the same thread that # opened them. We need a special thread pool class that has a thread worker function that does a close # when a thread is closed. if self.dbapiName == "sqlite3": self.pool.threadpool.stop() self.pool.threadpool = ConnectionClosingThreadPool(1, 1) self.pool.threadpool.start() self.pool.threadpool.pool = self.pool # # Set up the schema # # Create CALDAV table if needed try: test = (yield self._test_schema_table()) if test: version = (yield self._db_value_for_sql("select VALUE from CALDAV where KEY = 'SCHEMA_VERSION'")) dbtype = (yield self._db_value_for_sql("select VALUE from CALDAV where KEY = 'TYPE'")) if (version != self._db_version()) or (dbtype != self._db_type()): if dbtype != self._db_type(): log.error("Database %s has different type (%s vs. %s)" % (self.dbID, dbtype, self._db_type())) # Delete this index and start over yield self._db_remove() yield self._db_init() elif version != self._db_version(): log.error("Database %s has different schema (v.%s vs. v.%s)" % (self.dbID, version, self._db_version())) # Upgrade the DB yield self._db_upgrade(version) else: yield self._db_init() self.initialized = True except: # Clean up upon error so we don't end up leaking threads self.pool.close() self.pool = None raise def close(self): if self.initialized: try: self.pool.close() except Exception, e: log.error("Error whilst closing connection pool: %s" % (e,)) self.pool = None self.initialized = False
class Database(object): def __init__(self, config: Dict) -> None: self.db_engine = config.get('engine', 'MySQLdb') self.database = config.get('database', 'tracker') self.user = config.get('user', 'root') self.db_pool = None self.connect() def connect(self): self.db_pool = ConnectionPool(self.db_engine, database=self.database, user=self.user) @inlineCallbacks def callback(self, message: str) -> DataBaseResponse: db_package = DataBasePackage.deserialize(message) result = None try: if db_package.method == Method.insert: if isinstance(db_package.target, GeoPoint): params = { 'id': db_package.target.tracker.id_str, 'lat': db_package.target.latitude, 'lon': db_package.target.longitude, 'speed': db_package.target.speed, 'altitude': db_package.target.altitude, 'timestamp': db_package.target.timestamp } # TODO: Fix hardcode tracker id yield self.db_pool.runQuery(''' INSERT INTO geodata (tracker_id, lat, lon, speed, altitude, stamp) VALUES (1, {lat}, {lon}, {speed}, {altitude}, FROM_UNIXTIME({timestamp})); '''.format(**params)) if db_package.method == Method.select: query = '' selectors = [] if db_package.selector.target == Target.geo: query = 'SELECT * FROM geodata' elif db_package.selector.target == Target.user: query = 'SELECT * FROM users' if db_package.selector.selector.get('login'): selectors.append( 'login = "******"' % db_package.selector.selector.get('login')) if db_package.selector.selector.get('password_hash'): selectors.append( 'password_hash = "%s"' % db_package.selector.selector.get('password_hash')) if db_package.selector.selector.get('user_id'): selectors.append( 'id = %s' % db_package.selector.selector.get('user_id')) if selectors: query += ' WHERE {}'.format(' AND '.join(selectors)) if db_package.selector.offset: query += ' OFFSET %d' % db_package.selector.offset if db_package.selector.limit: query += ' LIMIT %d' % db_package.selector.limit rows = yield self.db_pool.runQuery(query) if db_package.selector.target == Target.geo: response = [ GeoPoint(latitude=row[2], longitude=row[3], altitude=row[5], timestamp=row[6], speed=row[4]) for row in rows ] elif db_package.selector.target == Target.user: response = [ User(user_id=row[0], login=row[1], password_hash=row[2], stamp=row[3]) for row in rows ] else: response = [] result = DataBaseResponse(response) except OperationalError as e: print(' [!] Connection failed: {}'.format(e)) self.db_pool.close() self.connect() result = yield reactor.callLater(1, self.callback, message) return result
class SQLMagicPipeline(object): def __init__(self, settings, **kwargs): """Connect to database in the pool.""" if not isinstance(settings, dict): raise NotConfigured('No database connection settings found.') self.settings = settings self.stats = kwargs.get('stats') self.debug = kwargs.get('debug', False) self.paramstyle = ':' self.identifier = '"' # default to ANSI quoting self.queries = { 'select': "SELECT $fields FROM $table:esc WHERE $indices:and", # select on UniqueFields 'selectall': "SELECT $fields FROM $table:esc", 'selectone': "SELECT $fields FROM $table:esc WHERE $indices:and LIMIT 1", # if backend supports LIMIT # 'delete' : "DELETE FROM $table:esc WHERE $indices:and", # match on UniqueFields 'deleteme': "DELETE FROM $table:esc WHERE $fields_values:and", # exact item match } self.dbapi = None if self.settings.get('drivername') == 'sqlite': self.dbapi = __import__('sqlite3', fromlist=['']) self.__dbpool = ConnectionPool('sqlite3', self.settings.get('database', ':memory:'), # apparently the connection pool / thread pool does not do the teardown in the same thread # https://twistedmatrix.com/trac/ticket/3629 # therefore throwing errors on finalClose at reactor shutdown # TODO: should be able to work around that? check_same_thread=False, # SQLite must be compiled threadsafe to use this # limit connection pool to one thread to avoid "database is locked" errors #cp_max=1, # - or raise the database timeout sufficiently timeout=300, ) # alternative escaping parameter #self.paramstyle = '?' #self.paramstyle = ':' #self.paramstyle = '$' # default statements for sqlite self.queries.update({ 'insert': "INSERT INTO $table:esc SET $fields_values", 'upsert': "INSERT OR REPLACE INTO $table:esc ($fields) VALUES ($values)", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) elif self.settings.get('drivername') == 'pgsql': self.dbapi = __import__('psycopg2', fromlist=['']) #from psycopg2.extras import DictCursor self.__dbpool = ConnectionPool('psycopg2', database=self.settings.get('database'), user = self.settings.get('username'), password = self.settings.get('password', None), host = self.settings.get('host', None), # default to unix socket port = self.settings.get('port', '5432'), # cursor_factory = DictCursor, ) self.paramstyle = '%s' # default statements for postgres self.queries.update({ 'insert': "INSERT INTO $table:esc ($fields) VALUES ($values)", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) elif self.settings.get('drivername') == 'mysql': self.dbapi = __import__('MySQLdb', fromlist=['']) from MySQLdb import cursors self.__dbpool = ReconnectingConnectionPool('MySQLdb', db=self.settings.get('database'), user = self.settings.get('username'), passwd = self.settings.get('password', None), host = self.settings.get('host', 'localhost'), # should default to unix socket port = self.settings.get('port', 3306), cursorclass = cursors.DictCursor, charset = 'utf8', use_unicode = True, # connpool settings cp_reconnect = True, #cp_noisy = True, #cp_min = 1, #cp_max = 1, ) self.paramstyle = '%s' self.identifier = '`' # MySQL quoting # default statements for mysql self.queries.update({ 'insert': "INSERT INTO $table:esc ($fields) VALUES ($values)", # 'upsert': "REPLACE INTO $table ($fields) VALUES ($values)", 'upsert': "INSERT INTO $table:esc SET $fields_values ON DUPLICATE KEY UPDATE $fields_values", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) elif self.settings.get('drivername') == 'firebird': # untested self.dbapi = __import__('fdb', fromlist=['']) self.__dbpool = ConnectionPool('fdb', database=self.settings.get('database'), user = self.settings.get('username'), password = self.settings.get('password', None), host = self.settings.get('host', None), # default to unix socket port = self.settings.get('port', 3050), #dialect = 1, # necessary for all dialect 1 databases charset = 'UTF8',# specify a character set for the connection ) self.paramstyle = '?' self.queries.update({ 'insert': "INSERT INTO $table:esc ($fields) VALUES ($values)", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) self.queries.update(kwargs.get('queries', {})) @classmethod def from_crawler(cls, crawler): if not crawler.settings.get('SQLMAGIC_DATABASE'): raise NotConfigured('No database connection settings found.') o = cls( settings=crawler.settings.get('SQLMAGIC_DATABASE'), stats=crawler.stats, queries=crawler.settings.get('SQLMAGIC_QUERIES', {}), debug=crawler.settings.getbool('SQLMAGIC_DEBUG') ) return o def open_spider(self, spider): self.on_connect() def on_connect(self): ## override this to run some queries after connecting # e.g. create tables for an in-memory SQLite database pass def close_spider(self, spider): self.shutdown() def shutdown(self): """Shutdown connection pool, kill associated threads""" self.__dbpool.close() def process_item(self, item, spider): """Process the item.""" # Only handle items inheriting SQLItem if not isinstance(item, SQLItem): return item self.stats.inc_value('sqlmagic/total_items_caught') # always return original item deferred = self.operation(item, spider) deferred.addBoth(lambda _: item) return deferred def operation(self, item, spider): def on_insert(result, query, params): self.stats.inc_value('sqlmagic/sqlop_success_insert') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG, spider=spider) return result def on_update(result, query, params): self.stats.inc_value('sqlmagic/sqlop_success_update') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG, spider=spider) return result def on_integrityerror(error, query, params): error.trap(self.dbapi.IntegrityError) e = error.getErrorMessage() self.stats.inc_value('sqlmagic/error_integrity') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s failed executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.INFO, spider=spider) # error.raiseException() # keep bubbling def on_operationalerror(error, query, params): error.trap(self.dbapi.OperationalError) e = error.getErrorMessage() self.stats.inc_value('sqlmagic/error_operational') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s failed executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.WARNING, spider=spider) # error.raiseException() # keep bubbling def on_seriouserror(error, query, params): error.trap(self.dbapi.ProgrammingError, self.dbapi.InterfaceError) e = error.getErrorMessage() self.stats.inc_value('sqlmagic/error_connection') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s FAILED executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.WARNING, spider=spider) error.raiseException() # keep bubbling return error def update(error, query, params): error.trap(self.dbapi.IntegrityError) if error.value[0] != 1062: # Duplicate key error.raiseException() # keep bubbling #e = error.getErrorMessage() #if self.debug: # qlog = self._log_preparedsql(query, params) # log.msg('%s got error %s - trying update' % (self.__class__.__name__, e), level=log.DEBUG, spider=spider) self.stats.inc_value('sqlmagic/sqlop_update_after_insert_tries') d = self.__dbpool.runInteraction(self.transaction, query, params, item, spider) d.addCallback(on_update, query, params) return d # try insert query, params = _sql_format(self.queries['insert'], item, paramstyle=self.paramstyle, identifier=self.identifier) #query, params = _sql_format(self.queries['upsert'], item, paramstyle=self.paramstyle, identifier=self.identifier) deferred = self.__dbpool.runInteraction(self.transaction, query, params, item, spider) deferred.addCallback(on_insert, query, params) deferred.addErrback(on_seriouserror, query, params) deferred.addErrback(on_operationalerror, query, params) #deferred.addErrback(on_integrityerror, query, params) # ignore failing inserts before update # on failure, update query, params = _sql_format(self.queries['update'], item, paramstyle=self.paramstyle, identifier=self.identifier) deferred.addErrback(update, query, params) deferred.addErrback(on_seriouserror, query, params) deferred.addErrback(on_operationalerror, query, params) deferred.addErrback(on_integrityerror, query, params) deferred.addErrback(self._database_error, item, spider) # deferred = self.insert_or_update((query,params), (update, uparams), item, spider) self.stats.inc_value('sqlmagic/total_items_returned') return deferred def transaction(self, txn, query, params, item, spider): self.stats.inc_value('sqlmagic/sqlop_transact_%s' % query[:6].lower()) txn.execute(query, params) """ def xtransaction(self, txn, query, params, item, spider): # primary key check query, params = _sql_format(self.queries['select'], item, paramstyle=self.paramstyle, identifier=self.identifier) txn.execute(query, params) result = txn.fetchone() if result: log.msg("Item already in db: (id) %s item:\n%r" % (result['id'], item), level=log.WARNING) query, params = _sql_format(self.queries['insert'], item, paramstyle=self.paramstyle, identifier=self.identifier) # transaction in thread qlog = self._log_preparedsql(query, params) try: txn.execute(query, params) except self.dbapi.IntegrityError as e: #spider.log('%s FAILED executing: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) query, params = _sql_format(self.queries['update'], item, paramstyle=self.paramstyle, identifier=self.identifier) qlog = self._log_preparedsql(query, params) try: #spider.log('%s executing: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) txn.execute(query, params) except self.dbapi.OperationalError as e: # retrying in new transaction # spider.log('%s errored. Retrying.\nError: %s\nQuery: %s' % (self.__class__.__name__, e, qlog), level=log.WARNING) # self._spool.append((query, params, item)) #except Exception as e: if self.debug: spider.log('%s FAILED executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.WARNING) raise finally: if self.debug: spider.log('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) except self.dbapi.OperationalError as e: # also try again if self.debug: spider.log('%s failed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) raise finally: if self.debug: spider.log('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) """ def _log_preparedsql(self, query, params): """Simulate escaped query for log""" for p in params: query = re.sub('(\\'+self.paramstyle+r'\d?)', '"%s"' % p, query, count=1) return query def _database_error(self, e, item, spider=None): """Log exceptions.""" if spider: log.err(e, spider=spider) else: log.err(e) def query(self, sql): # run a query in the connection pool # parameters for prepared statements must be passed as 'sql=(query, params)' # (possible use-case from inside spider code) '''Spider Example: build start requests from database results from scrapy.exceptions import CloseSpider, NotConfigured from ..pipelines.sqlmagic import SQLMagicPipeline class MySpider(Spider): def spider_opened(self, spider): try: self.db = SQLMagicPipeline(self.settings.get('SQLMAGIC_DATABASE')) except NotConfigured: raise CloseSpider('Could not get database settings.') @defer.inlineCallbacks def db_queries(self, response): query = """CALL procedure ()""" result = yield self.db.query(query) # build requests requests = [] for value in result: r = yield self.build_request_fromdb(response, value) requests.append(r) # queue them defer.returnValue(requests) def start_requests(self): yield Request(self.start_urls[0], callback=self.database_queries) def build_request_fromdb(self, response, db): # custom logic to convert db result into a request r = Request(response.url) r.callback = self.parse return r ''' if query[:6].lower() in ('select',): deferred = self.__dbpool.runQuery(sql) if query[:4].lower() in ('call',): # potential fail: procedure must run a SELECT for this, # otherwise it should do runOperation deferred = self.__dbpool.runQuery(sql) else: deferred = self.__dbpool.runOperation(sql) return deferred
class SQLMagicPipeline(object): def __init__(self, settings, **kwargs): """Connect to database in the pool.""" if not isinstance(settings, dict): raise NotConfigured('No database connection settings found.') self.settings = settings self.stats = kwargs.get('stats') self.debug = kwargs.get('debug', False) self.paramstyle = ':' self.identifier = '"' # default to ANSI quoting self.queries = { 'select': "SELECT $fields FROM $table:esc WHERE $indices:and", # select on UniqueFields 'selectall': "SELECT $fields FROM $table:esc", 'selectone': "SELECT $fields FROM $table:esc WHERE $indices:and LIMIT 1", # if backend supports LIMIT # 'delete': "DELETE FROM $table:esc WHERE $indices:and", # match on UniqueFields 'deleteme': "DELETE FROM $table:esc WHERE $fields_values:and", # exact item match } self.dbapi = None if self.settings.get('drivername') == 'sqlite': self.dbapi = __import__('sqlite3', fromlist=['']) self.__dbpool = ConnectionPool( 'sqlite3', self.settings.get('database', ':memory:'), # apparently the connection pool / thread pool does not do the teardown in the same thread # https://twistedmatrix.com/trac/ticket/3629 # therefore throwing errors on finalClose at reactor shutdown # TODO: should be able to work around that? check_same_thread= False, # SQLite must be compiled threadsafe to use this # limit connection pool to one thread to avoid "database is locked" errors #cp_max=1, # - or raise the database timeout sufficiently timeout=300, ) # alternative escaping parameter #self.paramstyle = '?' #self.paramstyle = ':' #self.paramstyle = '$' # default statements for sqlite self.queries.update({ 'insert': "INSERT INTO $table:esc SET $fields_values", 'upsert': "INSERT OR REPLACE INTO $table:esc ($fields) VALUES ($values)", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) elif self.settings.get('drivername') == 'pgsql': self.dbapi = __import__('psycopg2', fromlist=['']) #from psycopg2.extras import DictCursor self.__dbpool = ConnectionPool( 'psycopg2', database=self.settings.get('database'), user=self.settings.get('username'), password=self.settings.get('password', None), host=self.settings.get('host', None), # default to unix socket port=self.settings.get('port', '5432'), # cursor_factory = DictCursor, ) self.paramstyle = '%s' # default statements for postgres self.queries.update({ 'insert': "INSERT INTO $table:esc ($fields) VALUES ($values)", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) elif self.settings.get('drivername') == 'mysql': self.dbapi = __import__('MySQLdb', fromlist=['']) from MySQLdb import cursors self.__dbpool = ReconnectingConnectionPool( 'MySQLdb', db=self.settings.get('database'), user=self.settings.get('username'), passwd=self.settings.get('password', None), host=self.settings.get( 'host', 'localhost'), # should default to unix socket port=self.settings.get('port', 3306), cursorclass=cursors.DictCursor, charset='utf8', use_unicode=True, # connpool settings cp_reconnect=True, #cp_noisy = True, #cp_min = 1, #cp_max = 1, ) self.paramstyle = '%s' self.identifier = '`' # MySQL quoting # default statements for mysql self.queries.update({ 'insert': "INSERT INTO $table:esc ($fields) VALUES ($values)", # 'upsert': "REPLACE INTO $table ($fields) VALUES ($values)", 'upsert': "INSERT INTO $table:esc SET $fields_values ON DUPLICATE KEY UPDATE $fields_values", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) elif self.settings.get('drivername') == 'firebird': # untested self.dbapi = __import__('fdb', fromlist=['']) self.__dbpool = ConnectionPool( 'fdb', database=self.settings.get('database'), user=self.settings.get('username'), password=self.settings.get('password', None), host=self.settings.get('host', None), # default to unix socket port=self.settings.get('port', 3050), #dialect = 1, # necessary for all dialect 1 databases charset='UTF8', # specify a character set for the connection ) self.paramstyle = '?' self.queries.update({ 'insert': "INSERT INTO $table:esc ($fields) VALUES ($values)", 'update': "UPDATE $table:esc SET $fields_values WHERE $indices:and", }) self.queries.update(kwargs.get('queries', {})) @classmethod def from_crawler(cls, crawler): if not crawler.settings.get('SQLMAGIC_DATABASE'): raise NotConfigured('No database connection settings found.') o = cls(settings=crawler.settings.get('SQLMAGIC_DATABASE'), stats=crawler.stats, queries=crawler.settings.get('SQLMAGIC_QUERIES', {}), debug=crawler.settings.getbool('SQLMAGIC_DEBUG')) return o def open_spider(self, spider): self.on_connect() def on_connect(self): ## override this to run some queries after connecting # e.g. create tables for an in-memory SQLite database pass def close_spider(self, spider): self.shutdown() def shutdown(self): """Shutdown connection pool, kill associated threads""" self.__dbpool.close() def process_item(self, item, spider): """Process the item.""" # Only handle items inheriting SQLItem if not isinstance(item, SQLItem): return item self.stats.inc_value('sqlmagic/total_items_caught') # always return original item deferred = self.operation(item, spider) deferred.addBoth(lambda _: item) return deferred def operation(self, item, spider): def on_insert(result, query, params): self.stats.inc_value('sqlmagic/sqlop_success_insert') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG, spider=spider) return result def on_update(result, query, params): self.stats.inc_value('sqlmagic/sqlop_success_update') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG, spider=spider) return result def on_integrityerror(error, query, params): error.trap(self.dbapi.IntegrityError) e = error.getErrorMessage() self.stats.inc_value('sqlmagic/error_integrity') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s failed executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.INFO, spider=spider) # error.raiseException() # keep bubbling def on_operationalerror(error, query, params): error.trap(self.dbapi.OperationalError) e = error.getErrorMessage() self.stats.inc_value('sqlmagic/error_operational') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s failed executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.WARNING, spider=spider) # error.raiseException() # keep bubbling def on_seriouserror(error, query, params): error.trap(self.dbapi.ProgrammingError, self.dbapi.InterfaceError) e = error.getErrorMessage() self.stats.inc_value('sqlmagic/error_connection') if self.debug: qlog = self._log_preparedsql(query, params) log.msg('%s FAILED executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.WARNING, spider=spider) error.raiseException() # keep bubbling return error def update(error, query, params): error.trap(self.dbapi.IntegrityError) if error.value[0] != 1062: # Duplicate key error.raiseException() # keep bubbling #e = error.getErrorMessage() #if self.debug: # qlog = self._log_preparedsql(query, params) # log.msg('%s got error %s - trying update' % (self.__class__.__name__, e), level=log.DEBUG, spider=spider) self.stats.inc_value('sqlmagic/sqlop_update_after_insert_tries') d = self.__dbpool.runInteraction(self.transaction, query, params, item, spider) d.addCallback(on_update, query, params) return d # try insert query, params = _sql_format(self.queries['insert'], item, paramstyle=self.paramstyle, identifier=self.identifier) #query, params = _sql_format(self.queries['upsert'], item, paramstyle=self.paramstyle, identifier=self.identifier) deferred = self.__dbpool.runInteraction(self.transaction, query, params, item, spider) deferred.addCallback(on_insert, query, params) deferred.addErrback(on_seriouserror, query, params) deferred.addErrback(on_operationalerror, query, params) #deferred.addErrback(on_integrityerror, query, params) # ignore failing inserts before update # on failure, update query, params = _sql_format(self.queries['update'], item, paramstyle=self.paramstyle, identifier=self.identifier) deferred.addErrback(update, query, params) deferred.addErrback(on_seriouserror, query, params) deferred.addErrback(on_operationalerror, query, params) deferred.addErrback(on_integrityerror, query, params) deferred.addErrback(self._database_error, item, spider) # deferred = self.insert_or_update((query,params), (update, uparams), item, spider) self.stats.inc_value('sqlmagic/total_items_returned') return deferred def transaction(self, txn, query, params, item, spider): self.stats.inc_value('sqlmagic/sqlop_transact_%s' % query[:6].lower()) txn.execute(query, params) """ def xtransaction(self, txn, query, params, item, spider): # primary key check query, params = _sql_format(self.queries['select'], item, paramstyle=self.paramstyle, identifier=self.identifier) txn.execute(query, params) result = txn.fetchone() if result: log.msg("Item already in db: (id) %s item:\n%r" % (result['id'], item), level=log.WARNING) query, params = _sql_format(self.queries['insert'], item, paramstyle=self.paramstyle, identifier=self.identifier) # transaction in thread qlog = self._log_preparedsql(query, params) try: txn.execute(query, params) except self.dbapi.IntegrityError as e: #spider.log('%s FAILED executing: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) query, params = _sql_format(self.queries['update'], item, paramstyle=self.paramstyle, identifier=self.identifier) qlog = self._log_preparedsql(query, params) try: #spider.log('%s executing: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) txn.execute(query, params) except self.dbapi.OperationalError as e: # retrying in new transaction # spider.log('%s errored. Retrying.\nError: %s\nQuery: %s' % (self.__class__.__name__, e, qlog), level=log.WARNING) # self._spool.append((query, params, item)) #except Exception as e: if self.debug: spider.log('%s FAILED executing: %s\nError: %s' % (self.__class__.__name__, qlog, e), level=log.WARNING) raise finally: if self.debug: spider.log('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) except self.dbapi.OperationalError as e: # also try again if self.debug: spider.log('%s failed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) raise finally: if self.debug: spider.log('%s executed: %s' % (self.__class__.__name__, qlog), level=log.DEBUG) """ def _log_preparedsql(self, query, params): """Simulate escaped query for log""" for p in params: query = re.sub('(\\' + self.paramstyle + r'\d?)', '"%s"' % p, query, count=1) return query def _database_error(self, e, item, spider=None): """Log exceptions.""" if spider: log.err(e, spider=spider) else: log.err(e) def query(self, sql): # run a query in the connection pool # parameters for prepared statements must be passed as 'sql=(query, params)' # (possible use-case from inside spider code) '''Spider Example: build start requests from database results from scrapy.exceptions import CloseSpider, NotConfigured from ..pipelines.sqlmagic import SQLMagicPipeline class MySpider(Spider): def spider_opened(self, spider): try: self.db = SQLMagicPipeline(self.settings.get('SQLMAGIC_DATABASE')) except NotConfigured: raise CloseSpider('Could not get database settings.') @defer.inlineCallbacks def db_queries(self, response): query = """CALL procedure ()""" result = yield self.db.query(query) # build requests requests = [] for value in result: r = yield self.build_request_fromdb(response, value) requests.append(r) # queue them defer.returnValue(requests) def start_requests(self): yield Request(self.start_urls[0], callback=self.database_queries) def build_request_fromdb(self, response, db): # custom logic to convert db result into a request r = Request(response.url) r.callback = self.parse return r ''' if query[:6].lower() in ('select', ): deferred = self.__dbpool.runQuery(sql) if query[:4].lower() in ('call', ): # potential fail: procedure must run a SELECT for this, # otherwise it should do runOperation deferred = self.__dbpool.runQuery(sql) else: deferred = self.__dbpool.runOperation(sql) return deferred