def _update(self, provider): self.provider = provider self.path = provider.get('config', {}).get('path', None) if not self.path: logger.info('No path') return [] for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created): try: filepath = os.path.join(self.path, filename) if os.path.isfile(filepath): stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('last_updated')): item = self.parser.parse_file(filepath, provider) dpa_derive_dateline(item) self.move_file(self.path, filename, provider=provider, success=True) yield [item] else: self.move_file(self.path, filename, provider=provider, success=True) except Exception as ex: self.move_file(self.path, filename, provider=provider, success=False) raise ParserError.parseFileError('DPA', filename, ex, provider)
def after_extracting(self, article, provider): dpa_derive_dateline(article)