Exemplo n.º 1
0
    def _transmit(self, queue_item, subscriber):
        config = queue_item.get("destination", {}).get("config", {})

        try:
            with ftp_connect(config) as ftp:

                if config.get("push_associated", False):
                    # Set the working directory for the associated files
                    if "associated_path" in config and config.get(
                            "associated_path"):
                        ftp.cwd("/" +
                                config.get("associated_path", "").lstrip("/"))

                    item = self._get_published_item(queue_item)
                    if item:
                        self._copy_published_media_files(item, ftp)

                    # If the directory was changed to push associated files change it back
                    if "associated_path" in config and config.get(
                            "associated_path"):
                        ftp.cwd("/" + config.get("path").lstrip("/"))

                filename = get_publish_service().get_filename(queue_item)
                b = BytesIO(
                    queue_item.get(
                        "encoded_item",
                        queue_item.get("formatted_item").encode("UTF-8")))
                ftp.storbinary("STOR " + filename, b)
        except PublishFtpError:
            raise
        except Exception as ex:
            raise PublishFtpError.ftpError(ex, queue_item.get("destination"))
Exemplo n.º 2
0
    def _transmit(self, queue_item, subscriber):
        config = queue_item.get('destination', {}).get('config', {})

        try:
            with ftp_connect(config) as ftp:

                if config.get('push_associated', False):
                    # Set the working directory for the associated files
                    if 'associated_path' in config and config.get('associated_path'):
                        ftp.cwd('/' + config.get('associated_path', '').lstrip('/'))

                    item = self._get_published_item(queue_item)
                    if item:
                        self._copy_published_media_files(item, ftp)

                    # If the directory was changed to push associated files change it back
                    if 'associated_path' in config and config.get('associated_path'):
                        ftp.cwd('/' + config.get('path').lstrip('/'))

                filename = get_publish_service().get_filename(queue_item)
                b = BytesIO(queue_item.get('encoded_item', queue_item.get('formatted_item').encode('UTF-8')))
                ftp.storbinary('STOR ' + filename, b)
        except PublishFtpError:
            raise
        except Exception as ex:
            raise PublishFtpError.ftpError(ex, config)
Exemplo n.º 3
0
    def _update(self, provider, update):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')
        crt_last_updated = None

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        try:
            with ftp_connect(config) as ftp:
                items = []
                for filename, facts in ftp.mlsd():
                    if facts.get('type', '') != 'file':
                        continue

                    if not filename.lower().endswith(self.FILE_SUFFIX):
                        continue

                    if last_updated:
                        item_last_updated = datetime.strptime(facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc)
                        if item_last_updated < last_updated:
                            continue
                        elif not crt_last_updated or item_last_updated > crt_last_updated:
                            crt_last_updated = item_last_updated

                    local_file_path = os.path.join(config['dest_path'], filename)
                    try:
                        with open(local_file_path, 'xb') as f:
                            try:
                                ftp.retrbinary('RETR %s' % filename, f.write)
                            except ftplib.all_errors as ex:
                                os.remove(local_file_path)
                                logger.exception('Exception retrieving from FTP server')
                                continue
                    except FileExistsError:
                        logger.exception('Exception retrieving from FTP server, file already exists')
                        continue

                    registered_parser = self.get_feed_parser(provider)
                    if isinstance(registered_parser, XMLFeedParser):
                        xml = etree.parse(local_file_path).getroot()
                        parser = self.get_feed_parser(provider, xml)
                        parsed = parser.parse(xml, provider)
                    else:
                        parser = self.get_feed_parser(provider, local_file_path)
                        parsed = parser.parse(local_file_path, provider)

                    if isinstance(parsed, dict):
                        parsed = [parsed]

                    items.append(parsed)
            if crt_last_updated:
                update[LAST_UPDATED] = crt_last_updated
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 4
0
 def _test(self, provider):
     config = provider.get('config', {})
     try:
         with ftp_connect(config) as ftp:
             ftp.mlsd()
     except IngestFtpError:
         raise
     except Exception as ex:
         raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 5
0
    def _update(self, provider):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        try:
            with ftp_connect(config) as ftp:
                items = []
                for filename, facts in ftp.mlsd():
                    if facts.get('type', '') != 'file':
                        continue

                    if not filename.lower().endswith(self.FILE_SUFFIX):
                        continue

                    if last_updated:
                        item_last_updated = datetime.strptime(
                            facts['modify'],
                            self.DATE_FORMAT).replace(tzinfo=utc)
                        if item_last_updated < last_updated:
                            continue

                    local_file_path = os.path.join(config['dest_path'],
                                                   filename)
                    try:
                        with open(local_file_path, 'xb') as f:
                            try:
                                ftp.retrbinary('RETR %s' % filename, f.write)
                            except ftplib.all_errors as ex:
                                os.remove(local_file_path)
                                logger.exception(
                                    'Exception retrieving from FTP server')
                                continue
                    except FileExistsError:
                        continue

                    registered_parser = self.get_feed_parser(provider)
                    if isinstance(registered_parser, XMLFeedParser):
                        xml = etree.parse(local_file_path).getroot()
                        parser = self.get_feed_parser(provider, xml)
                        parsed = parser.parse(xml, provider)
                    else:
                        parser = self.get_feed_parser(provider,
                                                      local_file_path)
                        parsed = parser.parse(local_file_path, provider)

                    if isinstance(parsed, dict):
                        parsed = [parsed]

                    items.append(parsed)
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 6
0
 def _test(self, provider):
     config = provider.get('config', {})
     try:
         with ftp_connect(config) as ftp:
             ftp.mlsd()
     except IngestFtpError:
         raise
     except Exception as ex:
         raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 7
0
 def _test(self, provider):
     config = provider.get("config", {})
     try:
         with ftp_connect(config) as ftp:
             ftp.mlsd()
     except IngestFtpError:
         raise
     except Exception as ex:
         if "500" in str(ex):
             ftp.nlst()
         else:
             raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 8
0
    def _transmit(self, queue_item, subscriber):
        config = queue_item.get('destination', {}).get('config', {})

        try:
            with ftp_connect(config) as ftp:
                filename = PublishService.get_filename(queue_item)
                b = BytesIO(queue_item['encoded_item'])
                ftp.storbinary("STOR " + filename, b)
        except PublishFtpError:
            raise
        except Exception as ex:
            raise PublishFtpError.ftpError(ex, config)
Exemplo n.º 9
0
    def _transmit(self, queue_item, subscriber):
        config = queue_item.get('destination', {}).get('config', {})

        try:
            with ftp_connect(config) as ftp:
                filename = PublishService.get_filename(queue_item)
                b = BytesIO(queue_item['encoded_item'])
                ftp.storbinary("STOR " + filename, b)
        except PublishFtpError:
            raise
        except Exception as ex:
            raise PublishFtpError.ftpError(ex, config)
Exemplo n.º 10
0
 def _move_file(self, file_dir, filename, config):
     if self._provider.get('feeding_service') == 'ftp':
         with ftp_connect(config) as ftp:
             if config.get('move', False):
                 ftp_service = FTPFeedingService()
                 move_path, _ = ftp_service._create_move_folders(config, ftp)
                 ftp_service._move(
                     ftp, os.path.join(file_dir, filename), os.path.join(move_path, filename),
                     datetime.now(), False
                 )
     else:
         file_service = FileFeedingService()
         # move processed attachments to the same folder with XML
         file_dir = os.path.dirname(file_dir)
         file_service.move_file(file_dir, 'attachments/' + filename, self._provider)
Exemplo n.º 11
0
def _get_file(filename):
    retries = 0
    while True:
        try:
            raw = BytesIO()
            with ftp_connect({'username': app.config.get('BOM_WEATHER_FTP_USERNAME', ''),
                              'password': app.config.get('BOM_WEATHER_FTP_PASSWORD', ''),
                              'host': 'ftp.bom.gov.au',
                              'path': 'fwo'}) as ftp:
                ftp.retrbinary('RETR ' + filename, raw.write)
        except:
            logger.exception('Failed to download on attempt {} file: {}'.format(retries, filename), exc_info=True)
            retries += 1
            if retries < 3:
                continue
            else:
                logger.exception('Retries exceeded downloading {}'.format(filename))
                raise
        break

    return raw.getvalue()
Exemplo n.º 12
0
    def _update(self, provider, update):
        config = provider.get("config", {})
        do_move = config.get("move", False)
        last_processed_file_modify = provider.get(
            "private", {}).get("last_processed_file_modify")
        limit = app.config.get("FTP_INGEST_FILES_LIST_LIMIT", 100)
        registered_parser = self.get_feed_parser(provider)
        allowed_ext = getattr(registered_parser, "ALLOWED_EXT",
                              self.ALLOWED_EXT_DEFAULT)

        try:
            self._timer.start("ftp_connect")
            with ftp_connect(config) as ftp:
                self._log_msg(
                    "Connected to FTP server. Exec time: {:.4f} secs.".format(
                        self._timer.stop("ftp_connect")))
                files_to_process = []
                files = self._sort_files(self._list_files(ftp, provider))

                if do_move:
                    move_path, move_path_error = self._create_move_folders(
                        config, ftp)

                self._timer.start("files_to_process")

                for filename, modify in files:
                    # filter by extension
                    if not self._is_allowed(filename, allowed_ext):
                        logger.info(
                            "ignoring file {filename} because of file extension"
                            .format(filename=filename))
                        continue

                    # filter by modify datetime
                    file_modify = datetime.strptime(
                        modify, self.DATE_FORMAT).replace(tzinfo=utc)
                    if last_processed_file_modify:
                        # ignore limit and add files for processing
                        if last_processed_file_modify == file_modify:
                            files_to_process.append((filename, file_modify))
                        elif last_processed_file_modify < file_modify:
                            # even if we have reached a limit, we must add at least one file to increment
                            # a `last_processed_file_modify` in provider
                            files_to_process.append((filename, file_modify))
                            # limit amount of files to process per ingest update
                            if len(files_to_process) >= limit:
                                break
                    else:
                        # limit amount of files to process per ingest update
                        if len(files_to_process) >= limit:
                            break
                        # add files for processing
                        files_to_process.append((filename, file_modify))

                self._log_msg(
                    "Got {} file for processing. Exec time: {:.4f} secs.".
                    format(len(files_to_process),
                           self._timer.stop("files_to_process")))

                # process files
                self._timer.start("start_processing")
                for filename, file_modify in files_to_process:
                    try:
                        update["private"] = {
                            "last_processed_file_modify": file_modify
                        }
                        failed = yield self._retrieve_and_parse(
                            ftp, config, filename, provider, registered_parser)

                        if do_move:
                            move_dest_file_path = os.path.join(
                                move_path if not failed else move_path_error,
                                filename)
                            self._move(ftp,
                                       filename,
                                       move_dest_file_path,
                                       file_modify,
                                       failed=failed)
                    except EmptyFile:
                        continue
                    except Exception as e:
                        logger.error(
                            "Error while parsing {filename}: {msg}".format(
                                filename=filename, msg=e))

                        if do_move:
                            move_dest_file_path_error = os.path.join(
                                move_path_error, filename)
                            self._move(ftp,
                                       filename,
                                       move_dest_file_path_error,
                                       file_modify,
                                       failed=True)

                self._log_msg(
                    "Processing finished. Exec time: {:.4f} secs.".format(
                        self._timer.stop("start_processing")))

        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 13
0
    def _update(self, provider, update):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')
        registered_parser = self.get_feed_parser(provider)
        try:
            allowed_ext = registered_parser.ALLOWED_EXT
        except AttributeError:
            allowed_ext = self.ALLOWED_EXT_DEFAULT
        crt_last_updated = None
        if config.get('move', False):
            do_move = True
            if not config.get('move_path'):
                logger.debug('missing move_path, default will be used')
            move_dest_path = os.path.join(
                config.get('path', ''),
                config.get('move_path') or DEFAULT_SUCCESS_PATH)
            if not config.get('move_path_error'):
                logger.debug('missing move_path_error, default will be used')
            move_dest_path_error = os.path.join(
                config.get('path', ''),
                config.get('move_path_error') or DEFAULT_FAILURE_PATH)
        else:
            do_move = False

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        try:
            with ftp_connect(config) as ftp:
                if do_move:
                    try:
                        self._create_if_missing(ftp, move_dest_path)
                        self._create_if_missing(ftp, move_dest_path_error)
                    except ftplib.all_errors as e:
                        logger.warning(
                            "Can't create move directory, files will not be moved: {reason}"
                            .format(reason=e))
                        do_move = False
                items = []
                for filename, facts in ftp.mlsd():
                    if facts.get('type', '') != 'file':
                        continue
                    try:
                        if not self._is_allowed(filename, allowed_ext):
                            logger.info(
                                'ignoring file {filename} because of file extension'
                                .format(filename=filename))
                            continue

                        if last_updated:
                            item_last_updated = datetime.strptime(
                                facts['modify'],
                                self.DATE_FORMAT).replace(tzinfo=utc)
                            if item_last_updated <= last_updated:
                                continue
                            elif not crt_last_updated or item_last_updated > crt_last_updated:
                                crt_last_updated = item_last_updated

                        local_file_path = os.path.join(config['dest_path'],
                                                       filename)
                        with open(local_file_path, 'wb') as f:
                            try:
                                ftp.retrbinary('RETR %s' % filename, f.write)
                            except ftplib.all_errors:
                                os.remove(local_file_path)
                                raise Exception(
                                    'Exception retrieving file from FTP server ({filename})'
                                    .format(filename=filename))

                        if isinstance(registered_parser, XMLFeedParser):
                            xml = etree.parse(local_file_path).getroot()
                            parser = self.get_feed_parser(provider, xml)
                            parsed = parser.parse(xml, provider)
                        else:
                            parser = self.get_feed_parser(
                                provider, local_file_path)
                            parsed = parser.parse(local_file_path, provider)

                        if isinstance(parsed, dict):
                            parsed = [parsed]

                        items.append(parsed)
                        if do_move:
                            move_dest_file_path = os.path.join(
                                move_dest_path, filename)
                            self._move(ftp, filename, move_dest_file_path)
                    except Exception as e:
                        logger.error(
                            "Error while parsing {filename}: {msg}".format(
                                filename=filename, msg=e))
                        if do_move:
                            move_dest_file_path_error = os.path.join(
                                move_dest_path_error, filename)
                            self._move(ftp, filename,
                                       move_dest_file_path_error)
            if crt_last_updated:
                update[LAST_UPDATED] = crt_last_updated
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 14
0
    def _update(self, provider, update):
        self.HTTP_URL = provider.get('config', {}).get('api_url', '')
        self.provider = provider

        # Set the apikey parameter we're going to use it on all calls
        params = dict()
        params['apikey'] = provider.get('config', {}).get('apikey')

        # Use the next link if one is available in the config
        if provider.get('config', {}).get('next_link'):
            r = self.get_url(url=provider.get('config', {}).get('next_link'),
                             params=params,
                             verify=False,
                             allow_redirects=True)
            r.raise_for_status()
        else:
            id_list = provider.get('config', {}).get('productList', '').strip()
            recovery_time = provider.get('config',
                                         {}).get('recoverytime', '1').strip()
            if recovery_time == '':
                recovery_time = '1'
            start = (utcnow() - timedelta(hours=int(recovery_time))
                     ).isoformat()[:19] + 'Z'
            # If there has been a list of products defined then we format them for the request, if not all
            # allowed products will be returned.
            if id_list:
                # we remove spaces and empty values from id_list to do a clean list
                id_list = ' OR '.join(
                    [id_.strip() for id_ in id_list.split(',') if id_.strip()])
                params[
                    'q'] = 'productid:(' + id_list + ') AND mindate:>{}'.format(
                        start)
            else:
                params['q'] = 'mindate:>{}'.format(start)
            params['page_size'] = '100'

            r = self.get_url(params=params, verify=False, allow_redirects=True)
            r.raise_for_status()
        try:
            response = json.loads(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(
                Exception('error parsing response'))

        nextLink = response.get('data', {}).get('next_page')
        # Got the same next link as last time so nothing new
        if nextLink == provider.get('config', {}).get('next_link'):
            logger.info('Nothing new from AP Media')
            return []

        if len(response.get('data', {}).get('items', [])) > 0:
            try:
                sequence_number = int(
                    provider.get('config', {}).get('sequence', 0))
                with ftp_connect({
                        'username':
                        provider.get('config', {}).get('ftp_user', ''),
                        'password':
                        provider.get('config', {}).get('ftp_password', ''),
                        'host':
                        provider.get('config', {}).get('ftp_server', ''),
                        'path':
                        provider.get('config', {}).get('ftp_path', '')
                }) as ftp:
                    for item in response.get('data', {}).get('items', []):
                        try:
                            if item['item']['type'] == 'picture':
                                image_ref = item['item']['renditions']['main'][
                                    'href']
                                if provider.get('config', {}).get(
                                        'filenametemplate', '') == '':
                                    filename = to_ascii(
                                        item['item']['renditions']['main']
                                        ['originalfilename'])
                                else:
                                    # The filename is generated by applying the date format string in the template
                                    filename = datetime.now().strftime(
                                        provider.get('config', {}).get(
                                            'filenametemplate', ''))
                                    # and appending the sequence number
                                    filename += '-' + str(
                                        sequence_number).zfill(4) + '.jpg'
                                    sequence_number = (sequence_number +
                                                       1) % 10000

                                logger.info(
                                    'file: {} versioncreated: {}'.format(
                                        filename,
                                        item['item']['versioncreated']))
                                r = requests.get(url=image_ref,
                                                 params={
                                                     'apikey':
                                                     provider.get(
                                                         'config',
                                                         {}).get('apikey')
                                                 })
                                r.raise_for_status()
                                try:
                                    ftp.storbinary('STOR {}'.format(filename),
                                                   BytesIO(r.content))
                                except ftplib.all_errors as e:
                                    logger.error(e)

                        # Any exception processing an indivisual item is swallowed
                        except Exception as ex:
                            logger.exception(ex)
            except Exception as ex:
                logger.exception(ex)

        # Save the link for next time
        upd_provider = provider.get('config')
        upd_provider['next_link'] = nextLink
        upd_provider['recoverytime'] = ''
        upd_provider['sequence'] = str(sequence_number)
        update['config'] = upd_provider

        return None
Exemplo n.º 15
0
    def _update(self, provider, update):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')
        crt_last_updated = None
        if config.get('move', False):
            do_move = True
            if not config.get('move_path'):
                logger.debug('missing move_path, default will be used')
            move_dest_path = os.path.join(config.get('path', ''), config.get('move_path') or DEFAULT_SUCCESS_PATH)
            if not config.get('move_path_error'):
                logger.debug('missing move_path_error, default will be used')
            move_dest_path_error = os.path.join(config.get('path', ''),
                                                config.get('move_path_error') or DEFAULT_FAILURE_PATH)
        else:
            do_move = False

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        try:
            with ftp_connect(config) as ftp:
                if do_move:
                    try:
                        self._create_if_missing(ftp, move_dest_path)
                        self._create_if_missing(ftp, move_dest_path_error)
                    except ftplib.all_errors as e:
                        logger.warning("Can't create move directory, files will not be moved: {reason}".format(
                            reason=e))
                        do_move = False
                items = []
                for filename, facts in ftp.mlsd():
                    if facts.get('type', '') != 'file':
                        continue
                    try:
                        if not filename.lower().endswith(self.FILE_SUFFIX):
                            raise

                        if last_updated:
                            item_last_updated = datetime.strptime(facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc)
                            if item_last_updated < last_updated:
                                continue
                            elif not crt_last_updated or item_last_updated > crt_last_updated:
                                crt_last_updated = item_last_updated

                        local_file_path = os.path.join(config['dest_path'], filename)
                        try:
                            with open(local_file_path, 'xb') as f:
                                try:
                                    ftp.retrbinary('RETR %s' % filename, f.write)
                                except ftplib.all_errors as ex:
                                    os.remove(local_file_path)
                                    raise Exception('Exception retrieving file from FTP server ({filename})'.format(
                                                    filename=filename))
                        except FileExistsError as e:
                            raise Exception('Exception retrieving from FTP server, file already exists ({filename])'
                                            .format(filename=local_file_path))

                        registered_parser = self.get_feed_parser(provider)
                        if isinstance(registered_parser, XMLFeedParser):
                            xml = etree.parse(local_file_path).getroot()
                            parser = self.get_feed_parser(provider, xml)
                            parsed = parser.parse(xml, provider)
                        else:
                            parser = self.get_feed_parser(provider, local_file_path)
                            parsed = parser.parse(local_file_path, provider)

                        if isinstance(parsed, dict):
                            parsed = [parsed]

                        items.append(parsed)
                        if do_move:
                            move_dest_file_path = os.path.join(move_dest_path, filename)
                            self._move(ftp, filename, move_dest_file_path)
                    except Exception as e:
                        logger.error("Error while parsing {filename}: {msg}".format(filename=filename, msg=e))
                        if do_move:
                            move_dest_file_path_error = os.path.join(move_dest_path_error, filename)
                            self._move(ftp, filename, move_dest_file_path_error)
            if crt_last_updated:
                update[LAST_UPDATED] = crt_last_updated
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 16
0
 def _download_file(self, filename, file_path, config):
     tmp_dir = os.path.join(gettempdir(), filename)
     with ftp_connect(config) as ftp, open(tmp_dir, 'wb') as f:
         ftp.retrbinary('RETR ' + file_path, f.write)
         return tmp_dir
Exemplo n.º 17
0
    def _update(self, provider, update):
        config = provider.get('config', {})
        do_move = config.get('move', False)
        last_processed_file_modify = provider.get(
            'private', {}).get('last_processed_file_modify')
        limit = app.config.get('FTP_INGEST_FILES_LIST_LIMIT', 100)
        registered_parser = self.get_feed_parser(provider)
        allowed_ext = getattr(registered_parser, 'ALLOWED_EXT',
                              self.ALLOWED_EXT_DEFAULT)

        try:
            with ftp_connect(config) as ftp:
                items = []
                files_to_process = []
                files = self._sort_files(self._list_files(ftp, provider))

                if do_move:
                    move_path, move_path_error = self._create_move_folders(
                        config, ftp)

                for filename, modify in files:
                    # filter by extension
                    if not self._is_allowed(filename, allowed_ext):
                        logger.info(
                            'ignoring file {filename} because of file extension'
                            .format(filename=filename))
                        continue

                    # filter by modify datetime
                    file_modify = datetime.strptime(
                        modify, self.DATE_FORMAT).replace(tzinfo=utc)
                    if last_processed_file_modify:
                        # ignore limit and add files for processing
                        if last_processed_file_modify == file_modify:
                            files_to_process.append((filename, file_modify))
                        elif last_processed_file_modify < file_modify:
                            # evenv if we have reached a limit, we must add at least one file to increment
                            # a `last_processed_file_modify` in provider
                            files_to_process.append((filename, file_modify))
                            # limit amount of files to process per ingest update
                            if len(files_to_process) >= limit:
                                break
                    else:
                        # limit amount of files to process per ingest update
                        if len(files_to_process) >= limit:
                            break
                        # add files for processing
                        files_to_process.append((filename, file_modify))

                # process files
                for filename, file_modify in files_to_process:
                    try:
                        items += self._retrieve_and_parse(
                            ftp, config, filename, provider, registered_parser)
                        update['private'] = {
                            'last_processed_file_modify': file_modify
                        }

                        if do_move:
                            move_dest_file_path = os.path.join(
                                move_path, filename)
                            self._move(ftp, filename, move_dest_file_path)
                    except Exception as e:
                        logger.error(
                            "Error while parsing {filename}: {msg}".format(
                                filename=filename, msg=e))

                        if do_move:
                            move_dest_file_path_error = os.path.join(
                                move_path_error, filename)
                            self._move(ftp, filename,
                                       move_dest_file_path_error)

            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
Exemplo n.º 18
0
    def _update(self, provider, update):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')
        registered_parser = self.get_feed_parser(provider)
        try:
            allowed_ext = registered_parser.ALLOWED_EXT
        except AttributeError:
            allowed_ext = self.ALLOWED_EXT_DEFAULT
        crt_last_updated = None
        if config.get('move', False):
            do_move = True
            if not config.get('ftp_move_path'):
                logger.debug('missing move_path, default will be used')
            move_dest_path = os.path.join(config.get('path', ''), config.get('ftp_move_path') or DEFAULT_SUCCESS_PATH)
            if not config.get('move_path_error'):
                logger.debug('missing move_path_error, default will be used')
            move_dest_path_error = os.path.join(config.get('path', ''),
                                                config.get('move_path_error') or DEFAULT_FAILURE_PATH)
        else:
            do_move = False

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        try:
            with ftp_connect(config) as ftp:
                if do_move:
                    try:
                        self._create_if_missing(ftp, move_dest_path)
                        self._create_if_missing(ftp, move_dest_path_error)
                    except ftplib.all_errors as e:
                        logger.warning("Can't create move directory, files will not be moved: {reason}".format(
                            reason=e))
                        do_move = False
                items = []

                for filename, facts in self._list_items(ftp, provider):
                    try:
                        if not self._is_allowed(filename, allowed_ext):
                            logger.info('ignoring file {filename} because of file extension'.format(filename=filename))
                            continue

                        if last_updated:
                            item_last_updated = datetime.strptime(facts, self.DATE_FORMAT).replace(tzinfo=utc)
                            if item_last_updated <= last_updated:
                                continue
                            elif not crt_last_updated or item_last_updated > crt_last_updated:
                                crt_last_updated = item_last_updated

                        items += self._retrieve_and_parse(ftp, config, filename, provider, registered_parser)
                        if do_move:
                            move_dest_file_path = os.path.join(move_dest_path, filename)
                            self._move(ftp, filename, move_dest_file_path)
                    except Exception as e:
                        logger.error("Error while parsing {filename}: {msg}".format(filename=filename, msg=e))
                        if do_move:
                            move_dest_file_path_error = os.path.join(move_dest_path_error, filename)
                            self._move(ftp, filename, move_dest_file_path_error)
            if crt_last_updated:
                update[LAST_UPDATED] = crt_last_updated
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)