示例#1
0
    def validate_conditions(self):
        """ This happens after schema validation, and it checks the viability of the
        job based on "external" conditions like the existence of files, database connectivity,
        etc. We want to do this after we print_spec, because these checks don't have to do
        with the validity of the schema format."""

        self.s3_conn = None
        for path in self.job.paths:
            self.validate_path_conditions(path)

        # validate database/table exists
        with pool.get_connection(database='INFORMATION_SCHEMA', **self.job.spec.connection) as conn:
            has_database, has_table = db_utils.validate_database_table(conn, self.job.spec.target.database, self.job.spec.target.table)
        if not has_database:
            self.logger.error("The specified database `%s` does not exist.", self.job.spec.target.database)
            sys.exit(1)

        if not has_table:
            self.logger.error("The specified table `%s` does not exist.", self.job.spec.target.table)
            sys.exit(1)

        file_id_col = self.job.spec.options.file_id_column
        with pool.get_connection(database='INFORMATION_SCHEMA', **self.job.spec.connection) as conn:
            if not db_utils.validate_file_id_column(conn, self.job.spec.target.database, self.job.spec.target.table, file_id_col):
                self.logger.error("The `file_id_column` specified (%s) must exist in the table and be of type BIGINT UNSIGNED", file_id_col)
                sys.exit(1)
示例#2
0
    def validate_conditions(self):
        """ This happens after schema validation, and it checks the viability of the
        job based on "external" conditions like the existence of files, database connectivity,
        etc. We want to do this after we print_spec, because these checks don't have to do
        with the validity of the schema format."""

        self.s3_conn = None
        for path in self.job.paths:
            self.validate_path_conditions(path)

        # validate database/table exists
        with pool.get_connection(database='INFORMATION_SCHEMA',
                                 **self.job.spec.connection) as conn:
            has_database, has_table = db_utils.validate_database_table(
                conn, self.job.spec.target.database,
                self.job.spec.target.table)
        if not has_database:
            self.logger.error("The specified database `%s` does not exist.",
                              self.job.spec.target.database)
            sys.exit(1)

        if not has_table:
            self.logger.error("The specified table `%s` does not exist.",
                              self.job.spec.target.table)
            sys.exit(1)

        file_id_col = self.job.spec.options.file_id_column
        with pool.get_connection(database='INFORMATION_SCHEMA',
                                 **self.job.spec.connection) as conn:
            if not db_utils.validate_file_id_column(
                    conn, self.job.spec.target.database,
                    self.job.spec.target.table, file_id_col):
                self.logger.error(
                    "The `file_id_column` specified (%s) must exist in the table and be of type BIGINT UNSIGNED",
                    file_id_col)
                sys.exit(1)

        if self.options.dynamic_columns:
            #options.columns = [x.strip() for x in options.columns.split(",")]
            #TODO read in the first header line instead
            #header_columns = options.columns
            with pool.get_connection(self.job.spec.target.database,
                                     **self.job.spec.connection) as conn:
                self.job.spec.options.columns = db_utils.get_column_names(
                    conn, self.job.spec.target.database,
                    self.job.spec.target.table)
                if not self.job.spec.options.columns:
                    self.logger.error("The table specified (%s) must exist",
                                      self.job.spec.target.table)
                    sys.exit(1)
示例#3
0
 def kill_delete_query_if_exists(self, conn_args, conn_id):
     with pool.get_connection(database='information_schema',
                              **conn_args) as conn:
         id_row = conn.query(
             "SELECT id FROM processlist WHERE info LIKE '%%DELETE%%' AND id=%s",
             conn_id)
         if len(id_row) > 0:
             db_utils.try_kill_query(conn, conn_id)
示例#4
0
    def run(self):
        self.jobs = Jobs()
        self.tasks = Tasks()
        task = None

        ignore = lambda *args, **kwargs: None
        signal.signal(signal.SIGINT, ignore)
        signal.signal(signal.SIGQUIT, ignore)

        try:
            while not self.exiting():
                time.sleep(random.random() * 0.5)
                task = self.tasks.start()

                if task is None:
                    self.worker_working.value = 0
                else:
                    self.worker_working.value = 1

                    job_id = task.job_id
                    job = self.jobs.get(job_id)

                    old_conn_id = task.data.get('conn_id', None)
                    if old_conn_id is not None:
                        self.kill_query_if_exists(job.spec.connection, old_conn_id)

                    self.logger.info('Task %d: starting' % task.task_id)

                    try:
                        # can't use a pooled connection due to transactions staying open in the
                        # pool on failure
                        with pool.get_connection(database=job.spec.target.database, pooled=False, **job.spec.connection) as db_connection:
                            db_connection.execute("BEGIN")
                            self._process_task(task, db_connection)
                        self.logger.info('Task %d: finished with success', task.task_id)
                    except (RequeueTask, ConnectionException):
                        self.logger.info('Task %d: download failed, requeueing', task.task_id)
                        self.logger.debug("Traceback: %s" % (traceback.format_exc()))
                        task.requeue()
                    except TaskDoesNotExist as e:
                        self.logger.info('Task %d: finished with error, the task was either cancelled or deleted', task.task_id)
                        self.logger.debug("Traceback: %s" % (traceback.format_exc()))
                    except WorkerException as e:
                        task.error(str(e))
                        self.logger.info('Task %d: finished with error', task.task_id)
                    except Exception as e:
                        self.logger.debug("Traceback: %s" % (traceback.format_exc()))
                        raise

            raise ExitingException()

        except ExitingException:
            self.logger.debug('Worker exiting')
            if task is not None and not task.valid():
                try:
                    task.requeue()
                except APSWSQLStepQueueException:
                    pass
示例#5
0
 def kill_query_if_exists(self, conn_args, conn_id):
     with pool.get_connection(database='information_schema', **conn_args) as conn:
         id_row = conn.query("SELECT id FROM processlist WHERE info LIKE '%%LOAD DATA%%' AND id=%s", conn_id)
         if len(id_row) > 0:
             # Since this is a LOAD DATA LOCAL query, we need to kill the
             # connection, not the query, since LOAD DATA LOCAL queries
             # don't end until the file is fully read, even if they're
             # killed.
             db_utils.try_kill_connection(conn, conn_id)
示例#6
0
 def kill_query_if_exists(self, conn_args, conn_id):
     with pool.get_connection(database='information_schema',
                              **conn_args) as conn:
         id_row = conn.query(
             "SELECT id FROM processlist WHERE info LIKE '%%LOAD DATA%%' AND id=%s",
             conn_id)
         if len(id_row) > 0:
             # Since this is a LOAD DATA LOCAL query, we need to kill the
             # connection, not the query, since LOAD DATA LOCAL queries
             # don't end until the file is fully read, even if they're
             # killed.
             db_utils.try_kill_connection(conn, conn_id)
示例#7
0
    def abort(self):
        with self._conn_lock:
            if self._active_conn_id is not None:
                try:
                    with pool.get_connection(database='', **self._job.spec.connection) as conn:
                        db_utils.try_kill_connection(conn, self._active_conn_id)
                except pool.PoolConnectionException:
                    # If we couldn't connect, then its likely that we lost
                    # connection to the database and that the query is dead
                    # because of that anyways.
                    pass

                return True
            return False
示例#8
0
    def abort(self):
        with self._conn_lock:
            if self._active_conn_id is not None:
                try:
                    with pool.get_connection(
                            database='', **self._job.spec.connection) as conn:
                        db_utils.try_kill_connection(conn,
                                                     self._active_conn_id)
                except pool.PoolConnectionException:
                    # If we couldn't connect, then its likely that we lost
                    # connection to the database and that the query is dead
                    # because of that anyways.
                    pass

                return True
            return False
示例#9
0
    def run(self):
        self.jobs = Jobs()
        self.tasks = Tasks()
        task = None

        ignore = lambda *args, **kwargs: None
        signal.signal(signal.SIGINT, ignore)
        signal.signal(signal.SIGQUIT, ignore)

        try:
            while not self.exiting():
                time.sleep(random.random() * 0.5)
                task = self.tasks.start()

                if task is None:
                    self.worker_working.value = 0
                else:
                    self.worker_working.value = 1

                    job_id = task.job_id
                    job = self.jobs.get(job_id)

                    old_conn_id = task.data.get('conn_id', None)
                    if old_conn_id is not None:
                        self.kill_query_if_exists(job.spec.connection,
                                                  old_conn_id)

                    self.logger.info('Task %d: starting' % task.task_id)

                    try:
                        # can't use a pooled connection due to transactions staying open in the
                        # pool on failure
                        with pool.get_connection(
                                database=job.spec.target.database,
                                pooled=False,
                                **job.spec.connection) as db_connection:
                            db_connection.execute("BEGIN")
                            self._process_task(task, db_connection)
                        self.logger.info('Task %d: finished with success',
                                         task.task_id)
                    except (RequeueTask, ConnectionException):
                        self.logger.info(
                            'Task %d: download failed, requeueing',
                            task.task_id)
                        self.logger.debug("Traceback: %s" %
                                          (traceback.format_exc()))
                        task.requeue()
                    except TaskDoesNotExist as e:
                        self.logger.info(
                            'Task %d: finished with error, the task was either cancelled or deleted',
                            task.task_id)
                        self.logger.debug("Traceback: %s" %
                                          (traceback.format_exc()))
                    except WorkerException as e:
                        task.error(str(e))
                        self.logger.info('Task %d: finished with error',
                                         task.task_id)
                    except Exception as e:
                        self.logger.debug("Traceback: %s" %
                                          (traceback.format_exc()))
                        raise

            raise ExitingException()

        except ExitingException:
            self.logger.debug('Worker exiting')
            if task is not None and not task.valid():
                try:
                    task.requeue()
                except APSWSQLStepQueueException:
                    pass
示例#10
0
 def kill_delete_query_if_exists(self, conn_args, conn_id):
     with pool.get_connection(database='information_schema', **conn_args) as conn:
         id_row = conn.query("SELECT id FROM processlist WHERE info LIKE '%%DELETE%%' AND id=%s", conn_id)
         if len(id_row) > 0:
             db_utils.try_kill_query(conn, conn_id)