def output_UPDATE_MAYBE_WAKEUP(self, context): # External configuration and state has changed. Let's pretend # local state hasn't changed. If when we calculate changes that # still indicates some action should be taken that means we should # wake up: discovered = self._last_discovered_local_state try: changes = self.deployer.calculate_changes(self.configuration, self.cluster_state, discovered) except: # Something went wrong in calculation due to a bug in the # code. We should wake up just in case in order to be more # responsive. write_traceback() changes = None if not isinstance(changes, NoOp): self.fsm.receive(ConvergenceLoopInputs.WAKEUP) else: # Check if the calculated NoOp suggests an earlier wakeup than # currently planned: remaining = self._sleep_timeout.getTime() - self.reactor.seconds() calculated = changes.sleep.total_seconds() if calculated < remaining: self._sleep_timeout.reset(calculated)
def detach_destroy_volumes(api): """ Detach and destroy all volumes known to this API. If we failed to detach a volume for any reason, sleep for 1 second and retry until we hit CLEANUP_RETRY_LIMIT. This is to facilitate best effort cleanup of volume environment after each test run, so that future runs are not impacted. """ volumes = api.list_volumes() retry = 0 action_type = u"agent:blockdevice:cleanup:details" with start_action(action_type=action_type): while retry < CLEANUP_RETRY_LIMIT and len(volumes) > 0: for volume in volumes: try: if volume.attached_to is not None: api.detach_volume(volume.blockdevice_id) api.destroy_volume(volume.blockdevice_id) except: write_traceback(_logger) time.sleep(1.0) volumes = api.list_volumes() retry += 1 if len(volumes) > 0: Message.new(u"agent:blockdevice:failedcleanup:volumes", volumes=volumes).write()
def _retry_exception(f, steps=(0.1,) * 10, sleep=sleep): """ Retry a function if it raises an exception. :return: Whatever the function returns. """ steps = iter(steps) while True: try: Message.new( message_type=(u"flocker:provision:libcloud:retry_exception:trying"), function=fullyQualifiedName(f) ).write() return f() except: # Try to get the next sleep time from the steps iterator. Do it # without raising an exception (StopIteration) to preserve the # current exception context. for step in steps: write_traceback() sleep(step) break else: # Didn't hit the break, so didn't iterate at all, so we're out # of retry steps. Fail now. raise
def _retry_exception(f, steps=(0.1, ) * 10, sleep=sleep): """ Retry a function if it raises an exception. :return: Whatever the function returns. """ steps = iter(steps) while True: try: Message.new( message_type=( u"flocker:provision:libcloud:retry_exception:trying"), function=fullyQualifiedName(f), ).write() return f() except: # Try to get the next sleep time from the steps iterator. Do it # without raising an exception (StopIteration) to preserve the # current exception context. for step in steps: write_traceback() sleep(step) break else: # Didn't hit the break, so didn't iterate at all, so we're out # of retry steps. Fail now. raise
def _get_container_details(self, container_ids): for identity in container_ids: try: details = deepcopy(self._client.inspect_container(identity)) details[u"Config"]["Env"] = u"<elided>" yield details except: write_traceback()
def emit(self, record): eliot.log_message( message_type=record.name, log_level=record.levelname, # logger=record.name, message=record.getMessage(), ) if record.exc_info: write_traceback(exc_info=record.exc_info)
def _loop(self): try: yield self._poll_collective() self._status.poll_status( self._config.name, PollerStatus(last_completed=seconds_to_ns( self._clock.seconds()), )) except Exception: # in some cases, might want to surface elsewhere write_traceback()
def emit(self, record): Message.new(message_type=u"eliot:stdlib", log_level=record.levelname, logger=record.name, message=record.getMessage()).write(self.logger) if record.exc_info: write_traceback( logger=self.logger, exc_info=record.exc_info, )
def _filter_detection(self, detection_results): applicable = [] combined = zip(self._COLLECTORS, detection_results) for collector, (success, detection_result) in combined: if success: if detection_result: applicable.append(collector) else: write_traceback( system="log-agent:os-detection", collector=collector.__class__.__name__, ) return applicable
def _pytest_internalerror(excrepr, excinfo): """Log an internal error and terminate logging action.""" # Eliot does introspection, so we have to HACK around that. with mock.patch( 'eliot._traceback.sys.exc_info', mock.Mock(return_value=(excinfo.type, excinfo.value, excinfo.tb)) ): eliot.write_traceback() Session.invenio_eliot_action.finish(excinfo.value) # Reporters finalize_reporters(Session.config.option.invenio_reporters)
def _loop(self): """ Called periodically to scan for new files. Performs a scan for files, and logs and consumes all errors. """ try: yield self._scan() self._status.scan_status( ScannerStatus(last_completed=seconds_to_ns( self._clock.seconds()), )) except Exception: write_traceback()
def emit(self, record): Message.new( message_type=u"eliot:stdlib", log_level=record.levelname, logger=record.name, message=record.getMessage() ).write(self.logger) if record.exc_info: write_traceback( logger=self.logger, exc_info=record.exc_info, )
def fetch_pages(): # fetch page contents with start_action(action_type="fetch_pages"): page_contents = {} for page_url in PAGE_URL_LIST: with start_action(action_type="dounload", url=page_url): try: r = requests.get(page_url, timeout=30) r.raise_for_status() except requests.exceptions.RequestException as e: write_traceback() continue page_contents[page_url] = r.text return page_contents
def attach_volume(self, blockdevice_id, attach_to): with start_action( action_type=u"flocker:node:agents:gce:attach_volume", blockdevice_id=blockdevice_id, attach_to=attach_to, ) as action: config = dict( deviceName=blockdevice_id, autoDelete=False, boot=False, source=( "https://www.googleapis.com/compute/v1/projects/%s/zones/" "%s/disks/%s" % (self._project, self._zone, blockdevice_id) ) ) try: result = self._do_blocking_operation( self._compute.instances().attachDisk, instance=attach_to, body=config, timeout_sec=VOLUME_ATTACH_TIMEOUT, ) except HttpError as e: if e.resp.status == 400: write_traceback() raise UnknownVolume(blockdevice_id) else: raise errors = result.get('error', {}).get('errors', []) for e in errors: if e.get('code') == u"RESOURCE_IN_USE_BY_ANOTHER_RESOURCE": raise AlreadyAttachedVolume(blockdevice_id) disk = self._get_gce_volume(blockdevice_id) result = BlockDeviceVolume( blockdevice_id=blockdevice_id, size=int(GiB(int(disk['sizeGb'])).to_Byte()), attached_to=attach_to, dataset_id=_blockdevice_id_to_dataset_id(blockdevice_id), ) action.add_success_fields( final_volume={ 'blockdevice_id': result.blockdevice_id, 'size': result.size, 'attached_to': result.attached_to, 'dataset_id': unicode(result.dataset_id), } ) return result
def _process_queue(self): """ Wait for a single item from the queue and process it, forever. """ while True: try: (path, d) = yield self._queue.get() with PROCESS_FILE_QUEUE(relpath=path.path): yield self._snapshot_creator.store_local_snapshot(path) d.callback(None) except CancelledError: break except Exception: # XXX Probably should fire d here, someone might be waiting. write_traceback()
def fetch_pages(): """페이지의 내용을 추출합니다.""" # 어떤 처리의 로그인지는 action_type으로 지정 with start_action(action_type="fetch_pages"): page_contents = {} for page_url in PAGE_URL_LIST: # 어떤 처리의 로그인지 action_type으로 출력 with start_action(action_type="download", url=page_url): try: r = requests.get(page_url, timeout=30) r.raise_for_status() except requests.exceptions.RequestException as e: write_traceback() # 예외가 발생하면 트레이스백 출력 continue page_contents[page_url] = r.text return page_contents
def fetch_pages(): """ページ内容を取得する.""" # どの処理のログかをaction_typeで指定 with start_action(action_type="fetch_pages"): page_contents = {} for page_url in PAGE_URL_LIST: # どの処理のログかをaction_typeで指定 with start_action(action_type="download", url=page_url): try: r = requests.get(page_url, timeout=30) r.raise_for_status() except requests.exceptions.RequestException as e: write_traceback() # 例外時はトレースバックを出力する continue page_contents[page_url] = r.text return page_contents
def output_UPDATE_MAYBE_WAKEUP(self, context): # External configuration and state has changed. Let's pretend # local state hasn't changed. If when we calculate changes that # still indicates some action should be taken that means we should # wake up: discovered = self._last_discovered_local_state try: changes = self.deployer.calculate_changes( self.configuration, self.cluster_state, discovered) except: # Something went wrong in calculation due to a bug in the # code. We should wake up just in case in order to be more # responsive. write_traceback() changes = None if changes != NoOp(): self.fsm.receive(ConvergenceLoopInputs.WAKEUP)
def output_UPDATE_MAYBE_WAKEUP(self, context): # External configuration and state has changed. Let's pretend # local state hasn't changed. If when we calculate changes that # still indicates some action should be taken that means we should # wake up: discovered = self._last_discovered_local_state try: changes = self.deployer.calculate_changes(self.configuration, self.cluster_state, discovered) except: # Something went wrong in calculation due to a bug in the # code. We should wake up just in case in order to be more # responsive. write_traceback() changes = None if changes != NoOp(): self.fsm.receive(ConvergenceLoopInputs.WAKEUP)
def _process_queue(self): """ Wait for a single item from the queue and process it, forever. """ while self.running: try: (snap, d) = yield self._queue.get() except CancelledError: return try: remote = yield self._perform_upload(snap) d.callback(remote) except CancelledError: d.cancel() return except Exception: d.errback() write_traceback()
def save(self, deployment): """ Save and flush new deployment to disk. :return Deferred: Fires when write is finished. """ with _LOG_SAVE(self.logger, configuration=deployment): self._sync_save(deployment) self._deployment = deployment # At some future point this will likely involve talking to a # distributed system (e.g. ZooKeeper or etcd), so the API doesn't # guarantee immediate saving of the data. for callback in self._change_callbacks: try: callback() except: # Second argument will be ignored in next Eliot release, so # not bothering with particular value. write_traceback(self.logger, u"") return succeed(None)
def _process_queue(self): """ Wait for a single item from the queue and process it, forever. """ while self.running: try: (path, d) = yield self._queue.get() except CancelledError: return try: with PROCESS_FILE_QUEUE(relpath=path.path): snap = yield self._snapshot_creator.store_local_snapshot( path) d.callback(snap) except CancelledError: d.cancel() return except Exception: d.errback() write_traceback()
def attach_volume(self, blockdevice_id, attach_to): with start_action( action_type=u"flocker:node:agents:gce:attach_volume", blockdevice_id=blockdevice_id, attach_to=attach_to, ) as action: try: result = self._operations.attach_disk( disk_name=blockdevice_id, instance_name=attach_to ) except HttpError as e: if e.resp.status == 400: write_traceback() raise UnknownVolume(blockdevice_id) else: raise errors = result.get('error', {}).get('errors', []) for e in errors: if e.get('code') == u"RESOURCE_IN_USE_BY_ANOTHER_RESOURCE": raise AlreadyAttachedVolume(blockdevice_id) disk = self._operations.get_disk_details(blockdevice_id) result = BlockDeviceVolume( blockdevice_id=blockdevice_id, size=int(GiB(int(disk['sizeGb'])).to_Byte()), attached_to=attach_to, dataset_id=_blockdevice_id_to_dataset_id(blockdevice_id), ) action.add_success_fields( final_volume={ 'blockdevice_id': result.blockdevice_id, 'size': result.size, 'attached_to': result.attached_to, 'dataset_id': unicode(result.dataset_id), } ) return result
def output_UPDATE_MAYBE_WAKEUP(self, context): # External configuration and state has changed. Let's pretend # local state hasn't changed. If when we calculate changes that # still indicates some action should be taken that means we should # wake up: discovered = self._last_discovered_local_state try: changes = self.deployer.calculate_changes( self.configuration, self.cluster_state, discovered) except: # Something went wrong in calculation due to a bug in the # code. We should wake up just in case in order to be more # responsive. write_traceback() changes = None if not isinstance(changes, NoOp): self.fsm.receive(ConvergenceLoopInputs.WAKEUP) else: # Check if the calculated NoOp suggests an earlier wakeup than # currently planned: remaining = self._sleep_timeout.getTime() - self.reactor.seconds() calculated = changes.sleep.total_seconds() if calculated < remaining: self._sleep_timeout.reset(calculated)
def attach_volume(self, blockdevice_id, attach_to): with start_action( action_type=u"flocker:node:agents:gce:attach_volume", blockdevice_id=blockdevice_id, attach_to=attach_to, ) as action: try: result = self._operations.attach_disk( disk_name=blockdevice_id, instance_name=attach_to ) except HttpError as e: if e.resp.status == 400: write_traceback() raise UnknownVolume(blockdevice_id) else: raise errors = result.get('error', {}).get('errors', []) for e in errors: if e.get('code') == u"RESOURCE_IN_USE_BY_ANOTHER_RESOURCE": write_traceback() raise AlreadyAttachedVolume(blockdevice_id) elif e.get('code') == u'RESOURCE_NOT_FOUND': write_traceback() raise UnknownVolume(blockdevice_id) disk = self._operations.get_disk_details(blockdevice_id) result = BlockDeviceVolume( blockdevice_id=blockdevice_id, size=int(GiB(int(disk['sizeGb'])).to_Byte()), attached_to=attach_to, dataset_id=_blockdevice_id_to_dataset_id(blockdevice_id), ) action.add_success_fields( final_volume={ 'blockdevice_id': result.blockdevice_id, 'size': result.size, 'attached_to': result.attached_to, 'dataset_id': unicode(result.dataset_id), } ) return result
def test_edmond_uploads_then_restarts(reactor, request, temp_dir, introducer_furl, flog_gatherer, edmond): """ ticket 2880: if a magic-folder client uploads something, then re-starts a spurious .backup file should not appear """ name = "edmond" node_dir = join(temp_dir, name) magic_folder = join(temp_dir, 'magic-edmond') mkdir(magic_folder) with start_action(action_type=u"integration:edmond:magic_folder:create"): o = MagicFolderCommand() o.parseOptions([ "--node-directory", node_dir, "create", "--poll-interval", "2", "magik:", "edmond_magic", magic_folder, ]) assert 0 == do_magic_folder(o) # to actually-start the magic-folder we have to re-start yield edmond.restart_magic_folder() # add a thing to the magic-folder with open(join(magic_folder, "its_a_file"), "w") as f: f.write("edmond wrote this") # fixme, do status-update attempts in a loop below time.sleep(5) # let it upload; poll the HTTP magic-folder status API until it is # uploaded from magic_folder.scripts.magic_folder_cli import _get_json_for_fragment with open(join(edmond.node_directory, u'private', u'api_auth_token'), 'rb') as f: token = f.read() uploaded = False for _ in range(10): options = { "node-url": "http://127.0.0.1:19985/", } try: magic_data = _get_json_for_fragment(options, 'api?t=json', method='POST', post_args=dict( t='json', name='default', token=token, )) for mf in magic_data: if mf['status'] == u'success' and mf['path'] == u'its_a_file': uploaded = True break except Exception: write_traceback() time.sleep(1) assert uploaded, "expected to upload 'its_a_file'" # re-starting edmond right now would previously have triggered the 2880 bug # kill edmond yield edmond.restart_magic_folder() # XXX how can we say for sure if we've waited long enough? look at # tail of logs for magic-folder ... somethingsomething? print("waiting 20 seconds to see if a .backup appears") for _ in range(20): assert exists(join(magic_folder, "its_a_file")) assert not exists(join(magic_folder, "its_a_file.backup")) time.sleep(1)
app = make_wsgi_app(args.config_file) from ekklesia_portal.datamodel import Ballot, Department, Proposition, PropositionType, \ User, VotingPhase, Supporter, Tag from ekklesia_common.database import Session session = Session() sqlalchemy.orm.configure_mappers() failed_propositions = {} for fp in args.filenames: with start_task(log_level="INFO", action_type="import_proposition"): try: imported_data = load_proposition_json_file(fp) insert_proposition(args.department, args.voting_phase, **imported_data) except MissingFieldsException as e: failed_propositions[fp] = e.args[0] except: write_traceback() if set(args.filenames) - set(failed_propositions): transaction.commit() if failed_propositions: Message.log(log_level="ERROR", message_type="failed_propositions", exceptions=failed_propositions)
def _perform_remote_update(self, snapshot, staged_path, local_pathstate): """ Resolve a remote update locally :param PathState local_pathstate: the PathState of the local file as it existed _right_ before we concluded it was fine (None if there was no local file before now) """ # between when we checked for a local conflict while in the # _download_checking_local and when we _actually_ overwrite # the file (inside .mark_overwrite) there is an additional # window for last-second changes to happen .. we do the # equivalent of the dance described in detail in # https://magic-folder.readthedocs.io/en/latest/proposed/magic-folder/remote-to-local-sync.html#earth-dragons-collisions-between-local-filesystem-operations-and-downloads # although that spec doesn't include when to remove the # ".backup" files -- we use local_pathstate to double-check # that. if snapshot.content_cap is None: self._factory._magic_fs.mark_delete(snapshot.relpath) path_state = None else: try: path_state = self._factory._magic_fs.mark_overwrite( snapshot.relpath, snapshot.metadata["modification_time"], staged_path, local_pathstate, ) except OSError as e: self._factory._folder_status.error_occurred( "Failed to overwrite file '{}': {}".format(snapshot.relpath, str(e)) ) with self._action.context(): write_traceback() self._call_later(self._fatal_error_download, snapshot) return except BackupRetainedError as e: # this means that the mark_overwrite() code has # noticed some mismatch to the replaced file or its # .snaptmp version -- so this is a conflict, but we # didn't detect it in the _download_check_local since # it happened in the window _after_ that check. self._factory._folder_status.error_occurred( "Unexpected content in '{}': {}".format(snapshot.relpath, str(e)) ) with self._action.context(): write_traceback() # mark as a conflict -- we use the retained tmpfile as # the original "staged" path here, causing "our" # emergency data to be in the conflict file .. maybe # this should just be the original tmpfile and we # shouldn't mess with it further? self._call_later(self._download_mismatch, snapshot, e.path) return # Note, if we crash here (after moving the file into place but # before noting that in our database) then we could produce # LocalSnapshots referencing the wrong parent. We will no # longer produce snapshots with the wrong parent once we # re-run and get past this point. # remember the last remote we've downloaded self._factory._config.store_downloaded_snapshot( snapshot.relpath, snapshot, path_state ) def updated_snapshot(arg): self._factory._config.store_currentsnapshot_state( snapshot.relpath, path_state, ) self._call_later(self._personal_dmd_updated, snapshot) return retry_delay_sequence = _delay_sequence() # It probably makes sense to have a separate state for this # part ("update remote dmd"). If we crash here (e.g. Tahoe is # down, keep retrying, but subsequently crash) and then # restart, we just won't update the remote DMD. So "something" # should notice at startup that 'store_downloaded_snapshot' # has run but not this part (because the database has a # different entry than the remote DMD) and inject an event to # get us here. def error(f): # XXX really need to "more visibly" log things like syntax # errors etc... write_failure(f) if f.check(CancelledError): self._factory._folder_status.error_occurred( "Cancelled: {}".format(self._relpath) ) self._call_later(self._cancel, snapshot) return self._factory._folder_status.error_occurred( "Error updating personal DMD: {}".format(f.getErrorMessage()) ) with self._action.context(): write_failure(f) delay_amt = next(retry_delay_sequence) delay = self._delay_later(delay_amt, perform_update) delay.addErrback(error) return None def perform_update(): d = maybeDeferred( self._factory._write_participant.update_snapshot, snapshot.relpath, snapshot.capability, ) d.addCallback(updated_snapshot) d.addErrback(error) return d d = perform_update() return d
def maybe_open_then_iterate(log_stream): if log_stream is None: # Try opening the stream any time we don't already have it # open. Maybe the container we're watching went away for a # minute and is going to come back. try: log_stream = self._open_log_stream() except NotFound: Message.new( system="log-agent:docker-collector:open:failed", container=self.container_id, reason="not found" ).write() return None except: write_traceback(system="log-agent:docker-collector:open:failed", container=self.container_id) return None else: Message.new(system="log-agent:docker-collector:open:succeeded", container=self.container_id).write() def get_time(logchunk): try: # Hope we find an Eliot message message = json.loads(logchunk) return message["timestamp"] except: # Probably was some other kind of message, hope the next one is better. return None chunks = [] start_time = None recent_time = time() for logchunk in log_stream: chunks.append(logchunk) if start_time is None: start_time = get_time(logchunk) now = time() if start_time is None: if len(chunks) > 25: # Don't collect more than some chunk-count-bounded # quantity of logs if we're failing to determine a # starting timestamp. Something may be going wrong at # a layer of the system that doesn't generate # recognizable timestamps. If that's happening we want # to avoid an unbounded loop. break else: time_passed = now - start_time if time_passed > 15: # Don't collect more than some time-bounded quantity of # logs at a time. Nothing is reported to Firehose # until we get out of this loop and return a result. break time_passed = now - recent_time # If a long time passed getting this new message then we're # probably caught up and we were blocking waiting for the # container to write a new log message. If we're caught # up, we may as well send what we have now. if time_passed > 1: break # Remember when we got this message for comparison to the next time recent_time = now # XXX Think about the case where we've reached EOF on some # container's logs. This happens if the container stops. Current # behavior is probably to never notice logs when the container # restarts which is probably bad. return (chunks, log_stream)
def test_edmond_uploads_then_restarts(reactor, request, temp_dir, introducer_furl, flog_gatherer, edmond): """ ticket 2880: if a magic-folder client uploads something, then re-starts a spurious .backup file should not appear """ name = "edmond" node_dir = join(temp_dir, name) magic_folder = join(temp_dir, 'magic-edmond') mkdir(magic_folder) with start_action(action_type=u"integration:edmond:magic_folder:create"): o = MagicFolderCommand() o.parseOptions([ "--node-directory", node_dir, "create", "--poll-interval", "2", "magik:", "edmond_magic", magic_folder, ]) rc = yield do_magic_folder(o) assert 0 == rc # to actually-start the magic-folder we have to re-start yield edmond.restart_magic_folder() # add a thing to the magic-folder with open(join(magic_folder, "its_a_file"), "w") as f: f.write("edmond wrote this") # fixme, do status-update attempts in a loop below time.sleep(5) # let it upload; poll the HTTP magic-folder status API until it is # uploaded uploaded = False for _ in range(10): try: treq = HTTPClient(Agent(reactor)) mf = yield status(unicode('default', 'utf-8'), FilePath(edmond.node_directory), treq) if mf.folder_status[0]['status'] == u'success' and \ mf.local_files.get(u'its_a_file', None) is not None: uploaded = True break except Exception: write_traceback() time.sleep(1) assert uploaded, "expected to upload 'its_a_file'" # re-starting edmond right now would previously have triggered the 2880 bug # kill edmond yield edmond.restart_magic_folder() # XXX how can we say for sure if we've waited long enough? look at # tail of logs for magic-folder ... somethingsomething? print("waiting 20 seconds to see if a .backup appears") for _ in range(20): assert exists(join(magic_folder, "its_a_file")) assert not exists(join(magic_folder, "its_a_file.backup")) time.sleep(1)
def exception(message): write_traceback(exc_info=sys.exc_info())
def exception(self, msg, *args, **kwargs): eliot.write_traceback() if args and msg: msg = self._format(msg, *args) self._log(exception=msg)