Пример #1
0
    def _try_fetch_upload_task_resource_id(self, job):
        task_id = job.upload_task_id

        try:
            ans = self.sandbox.list_task_resources(task_id)
        except Exception as e:
            # XXX TODO WTF
            # 2016-04-21 19:28:15,834 27491 WARNING  resource_sharing:
            # Failed to list resources of task 56706912:
            # HTTPSConnectionPool(host='sandbox.yandex-team.ru', port=443):
            # Max retries exceeded with url: /api/v1.0//resource?task_id=56706912&limit=100
            # (Caused by <class 'httplib.BadStatusLine'>: '')
            logging.warning("Failed to list resources of task %d: %s" % (task_id, e))
            return

        resource_id = None
        for res in ans['items']:
            if res['type'] == job.resource_type:
                resource_id = res['id']

        if not resource_id:
            raise RuntimeError("Resource of type %s not found in task %d resources" \
                % (job.resource_type, task_id))

        return resource_id
Пример #2
0
    def _sandbox_wait2_loop(self):
        while not self.should_stop:
            T('begin_wait2_loop')

            with self.lock:
                while not(self.should_stop or self.wait2_queue):
                    with Timing('wait2_queue_wait'):
                        self.wait2_queue_not_empty.wait()

                if self.should_stop: # TODO
                    return

                job = self.wait2_queue.popleft()

            try:
                with Timing('sbx_wait2_fetch_task_resource_id'):
                    resource_id = self._try_fetch_upload_task_resource_id(job)
            except Exception as e:
                logging.warning("Failed to get resource %s from task %d: %s" \
                    % (job.resource_type, job.upload_task_id, e))
                self._set_promise_error(job)
                del job
                continue

            if not resource_id:
                self._schedule_retry(job, self.Action.FETCH_RESOURCE_ID, delay=3.0)
            else:
                logging.debug('Done with %s, resource_id = %d' % (job, resource_id))
                self._set_promise_value(job, resource_id)

            del job
Пример #3
0
        def _finished(job_id, f):
            T('enter_sky_share_finished %d' % job_id)

            job = self.running[job_id]
            in_progress.remove(job)

            try:
                with Timing('sky_share_future_get %d' % job_id):
                    torrent_id = f.get()
            #except ???Error as e: # TODO
                #pass
            except Exception as e:
                logging.warning('sky share for %s faled: %s' % (job, e))

                # TODO better checks or collect STDERR
                for file in job.files:
                    if not os.path.exists(job.directory + '/' + file):
                        msg = 'Failed to share %s/%s: %s' % (job.directory, job.files, e)
                        self._set_promise_error(job, OSError(errno.ENOENT, msg))
                        return

                schedule_retry(job)
                return

            logging.debug('sky share successfully done for %s: %s' % (job, torrent_id))

            with self.lock:
                job.torrent_id = torrent_id
                self.upload_queue.append(job)
                self.upload_queue_not_empty.notify()
Пример #4
0
    def send_update(update, is_final):
        try:
            return rem_proxy.update_graph(
                opts.task_id,
                rpc_server_addr,
                update,
                is_final
            )

        except Exception as e:
            if isinstance(e, socket.error):
                logging.warning("on_notifier_fail: %s" % e)
            else:
                logging.exception("on_notifier_fail")

            if is_xmlrpc_exception(e, WrongTaskIdError):
                try:
                    on_notifier_fail()
                except:
                    logging.exception("on_notifier_fail")

                raise RuntimeError("Failed to send data to rem server: %s" % e.faultString)

            else:
                # FIXME Actually if isinstance(e, xmlrpclib.Fault) then not retriable
                #       but not fatal as WrongTaskIdError
                raise RemNotifier.RetriableError(str(e))
Пример #5
0
    def _try_create_upload_task(self, job):

        # TODO raise on non-recoverabl errors

        try:
            task = self._create_resource_upload_task(job)
        except Exception as e:
            logging.warning('Failed to create upload task %s to Sandbox: %s' % (job, e))
            return

        if job.description:
            try:
                task.update(description=job.description)
            except Exception as e:
                logging.warning('Failed to update task %s: %s' % (job, e))
                return

        try:
            task.start()
        except Exception as e:
            logging.warning('Failed to start upload task %s to Sandbox: %s' % (job, e))
            return

        return task
Пример #6
0
    def _sandbox_wait1_loop(self):
        poll_interval = 3.0
        in_progress = {}

        noop_sandbox_statuses = {
            'DRAFT',
            'ENQUEUING', 'ENQUEUED', 'PREPARING', 'EXECUTING', 'TEMPORARY',
            'FINISHING', 'STOPPING', 'WAIT_RES', 'WAIT_TASK', 'WAIT_TIME',
        }

        next_poll_time = time.time()

        while not self.should_stop:
            T('begin_wait1_loop')

            with self.lock:
                timeout = None
                if in_progress:
                    timeout = max(0.0, next_poll_time - time.time())

                T('before_before_wait1_queue_not_empty_sleep %s' \
                    % ((timeout, self.should_stop, len(self.wait1_queue)),))

                if (timeout is None or timeout) and not(self.should_stop or self.wait1_queue):
                    T('before_wait1_queue_not_empty_sleep %s' % timeout)
                    self.wait1_queue_not_empty.wait(timeout)

                if self.should_stop: # TODO
                    return

                job = None
                if self.wait1_queue:
                    job = self.wait1_queue.popleft()
                    in_progress[job.upload_task_id] = job
                    del job

                if time.time() < next_poll_time:
                    logging.debug('continue_wait1_sleep')
                    continue

            try:
                with Timing('sbx_list_task_statuses'):
                    statuses = self.sandbox.list_task_statuses(in_progress.keys())
            except Exception as e:
                logging.warning("Failed to get sandbox tasks' statuses: %s" % e)
                continue
            finally:
                next_poll_time = max(next_poll_time + poll_interval, time.time())
                T('wait1_next_poll_time=%s' % next_poll_time)

            logging.debug("Task statuses: %s" % statuses) # TODO Comment out

            done = []
            for task_id in in_progress.keys():
                status = statuses.get(task_id)

                if status in noop_sandbox_statuses:
                    continue

                job = in_progress.pop(task_id)

                if status == 'SUCCESS':
                    done.append(job)
                    logging.debug('Upload task=%d in SUCCESS for %s' % (task_id, job))

                elif status in ['FAILURE', 'EXCEPTION'] or status is None:
                    logging.warning("Task %d in FAILURE. Will create new task" % task_id)
                    self._schedule_retry(job, self.Action.CREATE_UPLOAD_TASK, delay=5.0)

                else:
                    logging.error("Unknown task status %s for task=%d, %s" % (status, task_id, job))
                    self._set_promise_error(job, RuntimeError("Unknown task status %s for task_id=%d" % (status, task_id)))

            T('after_process_all_wait1_statuses')

            with self.lock:
                self.wait2_queue.extend(done)
                self.wait2_queue_not_empty.notify()
Пример #7
0
def get_token_info(token):
    # FIXME use https (it's slower: 20ms vs 5ms)
    url = "http://blackbox.yandex-team.ru/blackbox?method=oauth" "&oauth_token={token}&userip={addr}&format=json"

    url = url.format(token=token, addr="127.0.0.1")

    last_error = None

    delay = MIN_FETCH_INTERVAL
    deadline = time.time() + MAX_TOTAL_FETCH_TIME

    while True:
        ok = False

        now = time.time()
        if now > deadline:
            if last_error:
                raise last_error
            else:
                fetch_timeout = 0.0
        else:
            fetch_timeout = min(FETCH_TIMEOUT, deadline - now)

        try:
            resp = requests.get(url, timeout=fetch_timeout)
            ok = True
        except (exceptions.Timeout, exceptions.SSLError, exceptions.ConnectionError) as e:
            logging.warning("Failed to get answer from blackbox: %s" % e)
            last_error = NetworkError(str(e))
        else:
            if resp.status_code / 100 == 5:  # doc says that blackbox always returns 200...
                ok = False
                msg = "Got %d http status from blackbox" % resp.status_code
                last_error = BlackboxServerError(msg)
                logging.warning(msg)

        if ok:
            break

        now = time.time()
        if now > deadline:
            raise last_error

        # logging.debug('RETRY: %s' % last_error)
        delay = min(delay * 2, MAX_FETCH_INTERVAL, deadline - now)
        time.sleep(delay)

    if resp.status_code != 200:
        raise RuntimeError("blackbox answered with %s status code" % resp.status_code)

    resp = resp.json()

    ret = OAuthInfo()
    ret.token = token
    ret.error = resp.get("error")

    if "status" in resp:
        ret.token_status = resp["status"]["value"]

        if ret.token_status == TokenStatus.VALID:
            ret.login = resp["login"]
            oauth = resp["oauth"]
            ret.client_id = oauth["client_id"]
    else:
        raise RuntimeError(ret.error)

    return ret