Пример #1
0
 def output_UPDATE_MAYBE_WAKEUP(self, context):
     # External configuration and state has changed. Let's pretend
     # local state hasn't changed. If when we calculate changes that
     # still indicates some action should be taken that means we should
     # wake up:
     discovered = self._last_discovered_local_state
     try:
         changes = self.deployer.calculate_changes(self.configuration,
                                                   self.cluster_state,
                                                   discovered)
     except:
         # Something went wrong in calculation due to a bug in the
         # code. We should wake up just in case in order to be more
         # responsive.
         write_traceback()
         changes = None
     if not isinstance(changes, NoOp):
         self.fsm.receive(ConvergenceLoopInputs.WAKEUP)
     else:
         # Check if the calculated NoOp suggests an earlier wakeup than
         # currently planned:
         remaining = self._sleep_timeout.getTime() - self.reactor.seconds()
         calculated = changes.sleep.total_seconds()
         if calculated < remaining:
             self._sleep_timeout.reset(calculated)
Пример #2
0
def detach_destroy_volumes(api):
    """
    Detach and destroy all volumes known to this API.
    If we failed to detach a volume for any reason,
    sleep for 1 second and retry until we hit CLEANUP_RETRY_LIMIT.
    This is to facilitate best effort cleanup of volume
    environment after each test run, so that future runs
    are not impacted.
    """
    volumes = api.list_volumes()
    retry = 0
    action_type = u"agent:blockdevice:cleanup:details"
    with start_action(action_type=action_type):
        while retry < CLEANUP_RETRY_LIMIT and len(volumes) > 0:
            for volume in volumes:
                try:
                    if volume.attached_to is not None:
                        api.detach_volume(volume.blockdevice_id)
                    api.destroy_volume(volume.blockdevice_id)
                except:
                    write_traceback(_logger)

            time.sleep(1.0)
            volumes = api.list_volumes()
            retry += 1

        if len(volumes) > 0:
            Message.new(u"agent:blockdevice:failedcleanup:volumes",
                        volumes=volumes).write()
Пример #3
0
def _retry_exception(f, steps=(0.1,) * 10, sleep=sleep):
    """
    Retry a function if it raises an exception.

    :return: Whatever the function returns.
    """
    steps = iter(steps)

    while True:
        try:
            Message.new(
                message_type=(u"flocker:provision:libcloud:retry_exception:trying"), function=fullyQualifiedName(f)
            ).write()
            return f()
        except:
            # Try to get the next sleep time from the steps iterator.  Do it
            # without raising an exception (StopIteration) to preserve the
            # current exception context.
            for step in steps:
                write_traceback()
                sleep(step)
                break
            else:
                # Didn't hit the break, so didn't iterate at all, so we're out
                # of retry steps.  Fail now.
                raise
Пример #4
0
def _retry_exception(f, steps=(0.1, ) * 10, sleep=sleep):
    """
    Retry a function if it raises an exception.

    :return: Whatever the function returns.
    """
    steps = iter(steps)

    while True:
        try:
            Message.new(
                message_type=(
                    u"flocker:provision:libcloud:retry_exception:trying"),
                function=fullyQualifiedName(f),
            ).write()
            return f()
        except:
            # Try to get the next sleep time from the steps iterator.  Do it
            # without raising an exception (StopIteration) to preserve the
            # current exception context.
            for step in steps:
                write_traceback()
                sleep(step)
                break
            else:
                # Didn't hit the break, so didn't iterate at all, so we're out
                # of retry steps.  Fail now.
                raise
Пример #5
0
def detach_destroy_volumes(api):
    """
    Detach and destroy all volumes known to this API.
    If we failed to detach a volume for any reason,
    sleep for 1 second and retry until we hit CLEANUP_RETRY_LIMIT.
    This is to facilitate best effort cleanup of volume
    environment after each test run, so that future runs
    are not impacted.
    """
    volumes = api.list_volumes()
    retry = 0
    action_type = u"agent:blockdevice:cleanup:details"
    with start_action(action_type=action_type):
        while retry < CLEANUP_RETRY_LIMIT and len(volumes) > 0:
            for volume in volumes:
                try:
                    if volume.attached_to is not None:
                        api.detach_volume(volume.blockdevice_id)
                    api.destroy_volume(volume.blockdevice_id)
                except:
                    write_traceback(_logger)

            time.sleep(1.0)
            volumes = api.list_volumes()
            retry += 1

        if len(volumes) > 0:
            Message.new(u"agent:blockdevice:failedcleanup:volumes",
                        volumes=volumes).write()
 def _get_container_details(self, container_ids):
     for identity in container_ids:
         try:
             details = deepcopy(self._client.inspect_container(identity))
             details[u"Config"]["Env"] = u"<elided>"
             yield details
         except:
             write_traceback()
Пример #7
0
 def emit(self, record):
     eliot.log_message(
         message_type=record.name,
         log_level=record.levelname,
         # logger=record.name,
         message=record.getMessage(),
     )
     if record.exc_info:
         write_traceback(exc_info=record.exc_info)
Пример #8
0
 def _loop(self):
     try:
         yield self._poll_collective()
         self._status.poll_status(
             self._config.name,
             PollerStatus(last_completed=seconds_to_ns(
                 self._clock.seconds()), ))
     except Exception:
         # in some cases, might want to surface elsewhere
         write_traceback()
Пример #9
0
    def emit(self, record):
        Message.new(message_type=u"eliot:stdlib",
                    log_level=record.levelname,
                    logger=record.name,
                    message=record.getMessage()).write(self.logger)

        if record.exc_info:
            write_traceback(
                logger=self.logger,
                exc_info=record.exc_info,
            )
 def _filter_detection(self, detection_results):
     applicable = []
     combined = zip(self._COLLECTORS, detection_results)
     for collector, (success, detection_result) in combined:
         if success:
             if detection_result:
                 applicable.append(collector)
         else:
             write_traceback(
                 system="log-agent:os-detection",
                 collector=collector.__class__.__name__,
             )
     return applicable
Пример #11
0
def _pytest_internalerror(excrepr, excinfo):
    """Log an internal error and terminate logging action."""

    # Eliot does introspection, so we have to HACK around that.
    with mock.patch(
        'eliot._traceback.sys.exc_info',
        mock.Mock(return_value=(excinfo.type, excinfo.value, excinfo.tb))
    ):
        eliot.write_traceback()
    Session.invenio_eliot_action.finish(excinfo.value)

    # Reporters
    finalize_reporters(Session.config.option.invenio_reporters)
Пример #12
0
    def _loop(self):
        """
        Called periodically to scan for new files.

        Performs a scan for files, and logs and consumes all errors.
        """
        try:
            yield self._scan()
            self._status.scan_status(
                ScannerStatus(last_completed=seconds_to_ns(
                    self._clock.seconds()), ))
        except Exception:
            write_traceback()
Пример #13
0
    def emit(self, record):
        Message.new(
            message_type=u"eliot:stdlib",
            log_level=record.levelname,
            logger=record.name,
            message=record.getMessage()
        ).write(self.logger)

        if record.exc_info:
            write_traceback(
                logger=self.logger,
                exc_info=record.exc_info,
            )
Пример #14
0
def fetch_pages():
    # fetch page contents
    with start_action(action_type="fetch_pages"):
        page_contents = {}
        for page_url in PAGE_URL_LIST:
            with start_action(action_type="dounload", url=page_url):
                try:
                    r = requests.get(page_url, timeout=30)
                    r.raise_for_status()
                except requests.exceptions.RequestException as e:
                    write_traceback()
                    continue
                page_contents[page_url] = r.text
        return page_contents
Пример #15
0
    def attach_volume(self, blockdevice_id, attach_to):
        with start_action(
            action_type=u"flocker:node:agents:gce:attach_volume",
            blockdevice_id=blockdevice_id,
            attach_to=attach_to,
        ) as action:
            config = dict(
                deviceName=blockdevice_id,
                autoDelete=False,
                boot=False,
                source=(
                    "https://www.googleapis.com/compute/v1/projects/%s/zones/"
                    "%s/disks/%s" % (self._project, self._zone, blockdevice_id)
                )
            )
            try:
                result = self._do_blocking_operation(
                    self._compute.instances().attachDisk,
                    instance=attach_to,
                    body=config,
                    timeout_sec=VOLUME_ATTACH_TIMEOUT,
                )

            except HttpError as e:
                if e.resp.status == 400:
                    write_traceback()
                    raise UnknownVolume(blockdevice_id)
                else:
                    raise
            errors = result.get('error', {}).get('errors', [])
            for e in errors:
                if e.get('code') == u"RESOURCE_IN_USE_BY_ANOTHER_RESOURCE":
                    raise AlreadyAttachedVolume(blockdevice_id)
            disk = self._get_gce_volume(blockdevice_id)
            result = BlockDeviceVolume(
                blockdevice_id=blockdevice_id,
                size=int(GiB(int(disk['sizeGb'])).to_Byte()),
                attached_to=attach_to,
                dataset_id=_blockdevice_id_to_dataset_id(blockdevice_id),
            )
            action.add_success_fields(
                final_volume={
                    'blockdevice_id': result.blockdevice_id,
                    'size': result.size,
                    'attached_to': result.attached_to,
                    'dataset_id': unicode(result.dataset_id),
                }
            )
            return result
Пример #16
0
 def _process_queue(self):
     """
     Wait for a single item from the queue and process it, forever.
     """
     while True:
         try:
             (path, d) = yield self._queue.get()
             with PROCESS_FILE_QUEUE(relpath=path.path):
                 yield self._snapshot_creator.store_local_snapshot(path)
                 d.callback(None)
         except CancelledError:
             break
         except Exception:
             # XXX Probably should fire d here, someone might be waiting.
             write_traceback()
Пример #17
0
def fetch_pages():
    """페이지의 내용을 추출합니다."""
    # 어떤 처리의 로그인지는 action_type으로 지정
    with start_action(action_type="fetch_pages"):
        page_contents = {}
        for page_url in PAGE_URL_LIST:
            # 어떤 처리의 로그인지 action_type으로 출력
            with start_action(action_type="download", url=page_url):
                try:
                    r = requests.get(page_url, timeout=30)
                    r.raise_for_status()
                except requests.exceptions.RequestException as e:
                    write_traceback()  # 예외가 발생하면 트레이스백 출력
                    continue
                page_contents[page_url] = r.text
        return page_contents
def fetch_pages():
    """ページ内容を取得する."""
    # どの処理のログかをaction_typeで指定
    with start_action(action_type="fetch_pages"):
        page_contents = {}
        for page_url in PAGE_URL_LIST:
            # どの処理のログかをaction_typeで指定
            with start_action(action_type="download", url=page_url):
                try:
                    r = requests.get(page_url, timeout=30)
                    r.raise_for_status()
                except requests.exceptions.RequestException as e:
                    write_traceback()  # 例外時はトレースバックを出力する
                    continue
                page_contents[page_url] = r.text
        return page_contents
Пример #19
0
 def output_UPDATE_MAYBE_WAKEUP(self, context):
     # External configuration and state has changed. Let's pretend
     # local state hasn't changed. If when we calculate changes that
     # still indicates some action should be taken that means we should
     # wake up:
     discovered = self._last_discovered_local_state
     try:
         changes = self.deployer.calculate_changes(
             self.configuration, self.cluster_state, discovered)
     except:
         # Something went wrong in calculation due to a bug in the
         # code. We should wake up just in case in order to be more
         # responsive.
         write_traceback()
         changes = None
     if changes != NoOp():
         self.fsm.receive(ConvergenceLoopInputs.WAKEUP)
Пример #20
0
 def output_UPDATE_MAYBE_WAKEUP(self, context):
     # External configuration and state has changed. Let's pretend
     # local state hasn't changed. If when we calculate changes that
     # still indicates some action should be taken that means we should
     # wake up:
     discovered = self._last_discovered_local_state
     try:
         changes = self.deployer.calculate_changes(self.configuration,
                                                   self.cluster_state,
                                                   discovered)
     except:
         # Something went wrong in calculation due to a bug in the
         # code. We should wake up just in case in order to be more
         # responsive.
         write_traceback()
         changes = None
     if changes != NoOp():
         self.fsm.receive(ConvergenceLoopInputs.WAKEUP)
Пример #21
0
    def _process_queue(self):
        """
        Wait for a single item from the queue and process it, forever.
        """
        while self.running:
            try:
                (snap, d) = yield self._queue.get()
            except CancelledError:
                return

            try:
                remote = yield self._perform_upload(snap)
                d.callback(remote)
            except CancelledError:
                d.cancel()
                return
            except Exception:
                d.errback()
                write_traceback()
Пример #22
0
    def save(self, deployment):
        """
        Save and flush new deployment to disk.

        :return Deferred: Fires when write is finished.
        """
        with _LOG_SAVE(self.logger, configuration=deployment):
            self._sync_save(deployment)
            self._deployment = deployment
            # At some future point this will likely involve talking to a
            # distributed system (e.g. ZooKeeper or etcd), so the API doesn't
            # guarantee immediate saving of the data.
            for callback in self._change_callbacks:
                try:
                    callback()
                except:
                    # Second argument will be ignored in next Eliot release, so
                    # not bothering with particular value.
                    write_traceback(self.logger, u"")
            return succeed(None)
Пример #23
0
    def save(self, deployment):
        """
        Save and flush new deployment to disk.

        :return Deferred: Fires when write is finished.
        """
        with _LOG_SAVE(self.logger, configuration=deployment):
            self._sync_save(deployment)
            self._deployment = deployment
            # At some future point this will likely involve talking to a
            # distributed system (e.g. ZooKeeper or etcd), so the API doesn't
            # guarantee immediate saving of the data.
            for callback in self._change_callbacks:
                try:
                    callback()
                except:
                    # Second argument will be ignored in next Eliot release, so
                    # not bothering with particular value.
                    write_traceback(self.logger, u"")
            return succeed(None)
Пример #24
0
    def _process_queue(self):
        """
        Wait for a single item from the queue and process it, forever.
        """
        while self.running:
            try:
                (path, d) = yield self._queue.get()
            except CancelledError:
                return

            try:
                with PROCESS_FILE_QUEUE(relpath=path.path):
                    snap = yield self._snapshot_creator.store_local_snapshot(
                        path)
                    d.callback(snap)
            except CancelledError:
                d.cancel()
                return
            except Exception:
                d.errback()
                write_traceback()
Пример #25
0
    def attach_volume(self, blockdevice_id, attach_to):
        with start_action(
            action_type=u"flocker:node:agents:gce:attach_volume",
            blockdevice_id=blockdevice_id,
            attach_to=attach_to,
        ) as action:
            try:
                result = self._operations.attach_disk(
                    disk_name=blockdevice_id,
                    instance_name=attach_to
                )

            except HttpError as e:
                if e.resp.status == 400:
                    write_traceback()
                    raise UnknownVolume(blockdevice_id)
                else:
                    raise
            errors = result.get('error', {}).get('errors', [])
            for e in errors:
                if e.get('code') == u"RESOURCE_IN_USE_BY_ANOTHER_RESOURCE":
                    raise AlreadyAttachedVolume(blockdevice_id)
            disk = self._operations.get_disk_details(blockdevice_id)
            result = BlockDeviceVolume(
                blockdevice_id=blockdevice_id,
                size=int(GiB(int(disk['sizeGb'])).to_Byte()),
                attached_to=attach_to,
                dataset_id=_blockdevice_id_to_dataset_id(blockdevice_id),
            )
            action.add_success_fields(
                final_volume={
                    'blockdevice_id': result.blockdevice_id,
                    'size': result.size,
                    'attached_to': result.attached_to,
                    'dataset_id': unicode(result.dataset_id),
                }
            )
            return result
Пример #26
0
 def output_UPDATE_MAYBE_WAKEUP(self, context):
     # External configuration and state has changed. Let's pretend
     # local state hasn't changed. If when we calculate changes that
     # still indicates some action should be taken that means we should
     # wake up:
     discovered = self._last_discovered_local_state
     try:
         changes = self.deployer.calculate_changes(
             self.configuration, self.cluster_state, discovered)
     except:
         # Something went wrong in calculation due to a bug in the
         # code. We should wake up just in case in order to be more
         # responsive.
         write_traceback()
         changes = None
     if not isinstance(changes, NoOp):
         self.fsm.receive(ConvergenceLoopInputs.WAKEUP)
     else:
         # Check if the calculated NoOp suggests an earlier wakeup than
         # currently planned:
         remaining = self._sleep_timeout.getTime() - self.reactor.seconds()
         calculated = changes.sleep.total_seconds()
         if calculated < remaining:
             self._sleep_timeout.reset(calculated)
Пример #27
0
    def attach_volume(self, blockdevice_id, attach_to):
        with start_action(
            action_type=u"flocker:node:agents:gce:attach_volume",
            blockdevice_id=blockdevice_id,
            attach_to=attach_to,
        ) as action:
            try:
                result = self._operations.attach_disk(
                    disk_name=blockdevice_id,
                    instance_name=attach_to
                )

            except HttpError as e:
                if e.resp.status == 400:
                    write_traceback()
                    raise UnknownVolume(blockdevice_id)
                else:
                    raise
            errors = result.get('error', {}).get('errors', [])
            for e in errors:
                if e.get('code') == u"RESOURCE_IN_USE_BY_ANOTHER_RESOURCE":
                    write_traceback()
                    raise AlreadyAttachedVolume(blockdevice_id)
                elif e.get('code') == u'RESOURCE_NOT_FOUND':
                    write_traceback()
                    raise UnknownVolume(blockdevice_id)
            disk = self._operations.get_disk_details(blockdevice_id)
            result = BlockDeviceVolume(
                blockdevice_id=blockdevice_id,
                size=int(GiB(int(disk['sizeGb'])).to_Byte()),
                attached_to=attach_to,
                dataset_id=_blockdevice_id_to_dataset_id(blockdevice_id),
            )
            action.add_success_fields(
                final_volume={
                    'blockdevice_id': result.blockdevice_id,
                    'size': result.size,
                    'attached_to': result.attached_to,
                    'dataset_id': unicode(result.dataset_id),
                }
            )
            return result
Пример #28
0
def test_edmond_uploads_then_restarts(reactor, request, temp_dir,
                                      introducer_furl, flog_gatherer, edmond):
    """
    ticket 2880: if a magic-folder client uploads something, then
    re-starts a spurious .backup file should not appear
    """
    name = "edmond"
    node_dir = join(temp_dir, name)

    magic_folder = join(temp_dir, 'magic-edmond')
    mkdir(magic_folder)

    with start_action(action_type=u"integration:edmond:magic_folder:create"):
        o = MagicFolderCommand()
        o.parseOptions([
            "--node-directory",
            node_dir,
            "create",
            "--poll-interval",
            "2",
            "magik:",
            "edmond_magic",
            magic_folder,
        ])
        assert 0 == do_magic_folder(o)

        # to actually-start the magic-folder we have to re-start
        yield edmond.restart_magic_folder()

    # add a thing to the magic-folder
    with open(join(magic_folder, "its_a_file"), "w") as f:
        f.write("edmond wrote this")

    # fixme, do status-update attempts in a loop below
    time.sleep(5)

    # let it upload; poll the HTTP magic-folder status API until it is
    # uploaded
    from magic_folder.scripts.magic_folder_cli import _get_json_for_fragment

    with open(join(edmond.node_directory, u'private', u'api_auth_token'),
              'rb') as f:
        token = f.read()

    uploaded = False
    for _ in range(10):
        options = {
            "node-url": "http://127.0.0.1:19985/",
        }
        try:
            magic_data = _get_json_for_fragment(options,
                                                'api?t=json',
                                                method='POST',
                                                post_args=dict(
                                                    t='json',
                                                    name='default',
                                                    token=token,
                                                ))
            for mf in magic_data:
                if mf['status'] == u'success' and mf['path'] == u'its_a_file':
                    uploaded = True
                    break
        except Exception:
            write_traceback()
            time.sleep(1)

    assert uploaded, "expected to upload 'its_a_file'"

    # re-starting edmond right now would previously have triggered the 2880 bug
    # kill edmond
    yield edmond.restart_magic_folder()

    # XXX how can we say for sure if we've waited long enough? look at
    # tail of logs for magic-folder ... somethingsomething?
    print("waiting 20 seconds to see if a .backup appears")
    for _ in range(20):
        assert exists(join(magic_folder, "its_a_file"))
        assert not exists(join(magic_folder, "its_a_file.backup"))
        time.sleep(1)
Пример #29
0
    app = make_wsgi_app(args.config_file)

    from ekklesia_portal.datamodel import Ballot, Department, Proposition, PropositionType, \
        User, VotingPhase, Supporter, Tag
    from ekklesia_common.database import Session

    session = Session()

    sqlalchemy.orm.configure_mappers()

    failed_propositions = {}

    for fp in args.filenames:
        with start_task(log_level="INFO", action_type="import_proposition"):
            try:
                imported_data = load_proposition_json_file(fp)
                insert_proposition(args.department, args.voting_phase,
                                   **imported_data)
            except MissingFieldsException as e:
                failed_propositions[fp] = e.args[0]
            except:
                write_traceback()

    if set(args.filenames) - set(failed_propositions):
        transaction.commit()

    if failed_propositions:
        Message.log(log_level="ERROR",
                    message_type="failed_propositions",
                    exceptions=failed_propositions)
Пример #30
0
    def _perform_remote_update(self, snapshot, staged_path, local_pathstate):
        """
        Resolve a remote update locally

        :param PathState local_pathstate: the PathState of the local
            file as it existed _right_ before we concluded it was fine
            (None if there was no local file before now)
        """
        # between when we checked for a local conflict while in the
        # _download_checking_local and when we _actually_ overwrite
        # the file (inside .mark_overwrite) there is an additional
        # window for last-second changes to happen .. we do the
        # equivalent of the dance described in detail in
        # https://magic-folder.readthedocs.io/en/latest/proposed/magic-folder/remote-to-local-sync.html#earth-dragons-collisions-between-local-filesystem-operations-and-downloads
        # although that spec doesn't include when to remove the
        # ".backup" files -- we use local_pathstate to double-check
        # that.

        if snapshot.content_cap is None:
            self._factory._magic_fs.mark_delete(snapshot.relpath)
            path_state = None
        else:
            try:
                path_state = self._factory._magic_fs.mark_overwrite(
                    snapshot.relpath,
                    snapshot.metadata["modification_time"],
                    staged_path,
                    local_pathstate,
                )
            except OSError as e:
                self._factory._folder_status.error_occurred(
                    "Failed to overwrite file '{}': {}".format(snapshot.relpath, str(e))
                )
                with self._action.context():
                    write_traceback()
                self._call_later(self._fatal_error_download, snapshot)
                return
            except BackupRetainedError as e:
                # this means that the mark_overwrite() code has
                # noticed some mismatch to the replaced file or its
                # .snaptmp version -- so this is a conflict, but we
                # didn't detect it in the _download_check_local since
                # it happened in the window _after_ that check.
                self._factory._folder_status.error_occurred(
                    "Unexpected content in '{}': {}".format(snapshot.relpath, str(e))
                )
                with self._action.context():
                    write_traceback()
                # mark as a conflict -- we use the retained tmpfile as
                # the original "staged" path here, causing "our"
                # emergency data to be in the conflict file .. maybe
                # this should just be the original tmpfile and we
                # shouldn't mess with it further?
                self._call_later(self._download_mismatch, snapshot, e.path)
                return

        # Note, if we crash here (after moving the file into place but
        # before noting that in our database) then we could produce
        # LocalSnapshots referencing the wrong parent. We will no
        # longer produce snapshots with the wrong parent once we
        # re-run and get past this point.

        # remember the last remote we've downloaded
        self._factory._config.store_downloaded_snapshot(
            snapshot.relpath, snapshot, path_state
        )

        def updated_snapshot(arg):
            self._factory._config.store_currentsnapshot_state(
                snapshot.relpath,
                path_state,
            )
            self._call_later(self._personal_dmd_updated, snapshot)
            return

        retry_delay_sequence = _delay_sequence()

        # It probably makes sense to have a separate state for this
        # part ("update remote dmd"). If we crash here (e.g. Tahoe is
        # down, keep retrying, but subsequently crash) and then
        # restart, we just won't update the remote DMD. So "something"
        # should notice at startup that 'store_downloaded_snapshot'
        # has run but not this part (because the database has a
        # different entry than the remote DMD) and inject an event to
        # get us here.

        def error(f):
            # XXX really need to "more visibly" log things like syntax
            # errors etc...
            write_failure(f)
            if f.check(CancelledError):
                self._factory._folder_status.error_occurred(
                    "Cancelled: {}".format(self._relpath)
                )
                self._call_later(self._cancel, snapshot)
                return

            self._factory._folder_status.error_occurred(
                "Error updating personal DMD: {}".format(f.getErrorMessage())
            )
            with self._action.context():
                write_failure(f)
            delay_amt = next(retry_delay_sequence)
            delay = self._delay_later(delay_amt, perform_update)
            delay.addErrback(error)
            return None

        def perform_update():
            d = maybeDeferred(
                self._factory._write_participant.update_snapshot,
                snapshot.relpath,
                snapshot.capability,
            )
            d.addCallback(updated_snapshot)
            d.addErrback(error)
            return d

        d = perform_update()
        return d
        def maybe_open_then_iterate(log_stream):
            if log_stream is None:
                # Try opening the stream any time we don't already have it
                # open.  Maybe the container we're watching went away for a
                # minute and is going to come back.
                try:
                    log_stream = self._open_log_stream()
                except NotFound:
                    Message.new(
                        system="log-agent:docker-collector:open:failed", container=self.container_id, reason="not found"
                    ).write()
                    return None
                except:
                    write_traceback(system="log-agent:docker-collector:open:failed", container=self.container_id)
                    return None
                else:
                    Message.new(system="log-agent:docker-collector:open:succeeded", container=self.container_id).write()

            def get_time(logchunk):
                try:
                    # Hope we find an Eliot message
                    message = json.loads(logchunk)
                    return message["timestamp"]
                except:
                    # Probably was some other kind of message, hope the next one is better.
                    return None

            chunks = []
            start_time = None
            recent_time = time()
            for logchunk in log_stream:
                chunks.append(logchunk)
                if start_time is None:
                    start_time = get_time(logchunk)
                now = time()

                if start_time is None:
                    if len(chunks) > 25:
                        # Don't collect more than some chunk-count-bounded
                        # quantity of logs if we're failing to determine a
                        # starting timestamp.  Something may be going wrong at
                        # a layer of the system that doesn't generate
                        # recognizable timestamps.  If that's happening we want
                        # to avoid an unbounded loop.
                        break
                else:
                    time_passed = now - start_time
                    if time_passed > 15:
                        # Don't collect more than some time-bounded quantity of
                        # logs at a time.  Nothing is reported to Firehose
                        # until we get out of this loop and return a result.
                        break

                time_passed = now - recent_time
                # If a long time passed getting this new message then we're
                # probably caught up and we were blocking waiting for the
                # container to write a new log message.  If we're caught
                # up, we may as well send what we have now.
                if time_passed > 1:
                    break

                # Remember when we got this message for comparison to the next time
                recent_time = now

            # XXX Think about the case where we've reached EOF on some
            # container's logs.  This happens if the container stops.  Current
            # behavior is probably to never notice logs when the container
            # restarts which is probably bad.
            return (chunks, log_stream)
Пример #32
0
def test_edmond_uploads_then_restarts(reactor, request, temp_dir,
                                      introducer_furl, flog_gatherer, edmond):
    """
    ticket 2880: if a magic-folder client uploads something, then
    re-starts a spurious .backup file should not appear
    """
    name = "edmond"
    node_dir = join(temp_dir, name)

    magic_folder = join(temp_dir, 'magic-edmond')
    mkdir(magic_folder)

    with start_action(action_type=u"integration:edmond:magic_folder:create"):
        o = MagicFolderCommand()
        o.parseOptions([
            "--node-directory",
            node_dir,
            "create",
            "--poll-interval",
            "2",
            "magik:",
            "edmond_magic",
            magic_folder,
        ])
        rc = yield do_magic_folder(o)
        assert 0 == rc

        # to actually-start the magic-folder we have to re-start
        yield edmond.restart_magic_folder()

    # add a thing to the magic-folder
    with open(join(magic_folder, "its_a_file"), "w") as f:
        f.write("edmond wrote this")

    # fixme, do status-update attempts in a loop below
    time.sleep(5)

    # let it upload; poll the HTTP magic-folder status API until it is
    # uploaded
    uploaded = False
    for _ in range(10):
        try:
            treq = HTTPClient(Agent(reactor))
            mf = yield status(unicode('default', 'utf-8'),
                              FilePath(edmond.node_directory), treq)
            if mf.folder_status[0]['status'] == u'success' and \
               mf.local_files.get(u'its_a_file', None) is not None:
                uploaded = True
                break
        except Exception:
            write_traceback()
            time.sleep(1)

    assert uploaded, "expected to upload 'its_a_file'"

    # re-starting edmond right now would previously have triggered the 2880 bug
    # kill edmond
    yield edmond.restart_magic_folder()

    # XXX how can we say for sure if we've waited long enough? look at
    # tail of logs for magic-folder ... somethingsomething?
    print("waiting 20 seconds to see if a .backup appears")
    for _ in range(20):
        assert exists(join(magic_folder, "its_a_file"))
        assert not exists(join(magic_folder, "its_a_file.backup"))
        time.sleep(1)
Пример #33
0
def exception(message):
    write_traceback(exc_info=sys.exc_info())
 def exception(self, msg, *args, **kwargs):
     eliot.write_traceback()
     if args and msg:
         msg = self._format(msg, *args)
     self._log(exception=msg)