def file_of_published_article(self, filename):
     eif_filename_without_path = os.path.basename(filename)
     article_info = ArticleInfo(eif_filename_without_path)
     version = article_info.get_version_from_zip_filename()
     update_date = article_info.get_update_date_from_zip_filename()
     if version != None and update_date != None:
         return True
     return False
class TestArticleStructure(unittest.TestCase):

    @unpack
    @data({'input': 'elife-07702-vor-r4.zip', 'expected': None},
          {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected':'2012-10-15T00:00:00Z'})
    def test_get_update_date_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.get_update_date_from_zip_filename()
        self.assertEqual(result, expected)

    @unpack
    @data({'input': 'elife-07702-vor-r4.zip', 'expected': None},
          {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': '1'})
    def test_get_version_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.get_version_from_zip_filename()
        self.assertEqual(result, expected)
 def get_article_xml_key(bucket, expanded_folder_name):
     files = bucket.list(expanded_folder_name + "/", "/")
     for bucket_file in files:
         key = bucket.get_key(bucket_file.key)
         filename = key.name.rsplit('/', 1)[1]
         info = ArticleInfo(filename)
         if info.file_type == 'ArticleXML':
             return key, filename
     return None
Exemplo n.º 4
0
    def process_key(self, key, cdn_path):
        # determine filename (without folder) and obtain ArticleInfo instance
        filename = key.name.rsplit('/', 1)[1]
        info = ArticleInfo(filename)

        # see if we have any formats available for the file_type of this file
        formats = self.get_formats(info.file_type)
        if formats is not None:
            # generate images for relevant formats
            fp = self.get_file_pointer(key)
            self.generate_images(formats, fp, info, cdn_path)
Exemplo n.º 5
0
 def get_xml_file_name(self, settings, expanded_folder_name, xml_bucket, version):
     files = self._get_bucket_files(settings, expanded_folder_name, xml_bucket)
     for filename in files:
         info = ArticleInfo(filename)
         if info.file_type == 'ArticleXML':
             if version is None:
                 return filename
             v_number = '-v'+ version + '.'
             if v_number in filename:
                 return filename
     return None
Exemplo n.º 6
0
    def do_activity(self, data=None):

        try:
            session = Session(self.settings)
            version = session.get_value(data['run'], 'version')
            filename = session.get_value(data['run'], 'filename_last_element')

            article_structure = ArticleInfo(filename)

            version_date, error = self.get_version(
                self.settings, article_structure, article_structure.article_id,
                version)

            if error is not None:
                self.logger.error(error)
                self.emit_monitor_event(
                    self.settings, article_structure.article_id, version,
                    data['run'], self.pretty_name, "error", " ".join(
                        ("Error Looking up version article",
                         article_structure.article_id, "message:", error)))
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "end", " ".join(
                    ("Finished Version Lookup for article",
                     article_structure.article_id, "version:", version)))

            session.store_value(data['run'], 'update_date', version_date)

            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(
                "Exception when trying to Lookup next version")
            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "error", " ".join(
                    ("Error looking up version for article",
                     article_structure.article_id, "message:", str(e))))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
 def test_get_version_from_zip_filename(self, input, expected):
     self.articleinfo = ArticleInfo(input)
     result = self.articleinfo.get_version_from_zip_filename()
     self.assertEqual(result, expected)
 def find_xml_filename_in_map(self, file_name_map):
     for old_name, new_name in file_name_map.iteritems():
         info = ArticleInfo(new_name)
         if info.file_type == 'ArticleXML':
             return new_name
Exemplo n.º 9
0
class TestArticleStructure(unittest.TestCase):

    @unpack
    @data({'input': 'elife-07702-vor-r4.zip', 'expected': None},
          {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected':'2012-10-15T00:00:00Z'})
    def test_get_update_date_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.get_update_date_from_zip_filename()
        self.assertEqual(result, expected)

    @unpack
    @data({'input': 'elife-07702-vor-r4.zip', 'expected': None},
          {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': '1'})
    def test_get_version_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.get_version_from_zip_filename()
        self.assertEqual(result, expected)

    @unpack
    @data(
        {'input': 'elife-07702-vor-r4.zip', 'expected': 'ArticleZip'},
        {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': 'ArticleZip'},
        {'input': 'elife-00666-v1.pdf', 'expected': 'Other'},
        {'input': 'elife-00666-v1.xml', 'expected': 'ArticleXML'},
        {'input': 'elife-00666-app1-fig1-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-app1-fig1-figsupp1-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-app2-video1.mp4', 'expected': 'Other'},
        {'input': 'elife-00666-box2-fig1-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-code1-v1.xml', 'expected': 'Other'},
        {'input': 'elife-00666-data1-v1.xlsx', 'expected': 'Other'},
        {'input': 'elife-00666-fig1-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig2-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig2-figsupp1-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig2-figsupp2-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig3-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig3-v10.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig3-figsupp1-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig3-video1.mp4', 'expected': 'Other'},
        {'input': 'elife-00666-fig4-v1.tif', 'expected': 'Figure'},
        {'input': 'elife-00666-fig4-code1-v1.xlsx', 'expected': 'Other'},
        {'input': 'elife-00666-figures-v1.pdf', 'expected': 'FigurePDF'},
        {'input': 'elife-00666-inf001-v1.jpeg', 'expected': 'Inline'},
        {'input': 'elife-00666-repstand1-v1.pdf', 'expected': 'Other'},
        {'input': 'elife-00666-resp-fig1-v1.png', 'expected': 'Figure'},
        {'input': 'elife-00666-resp-video1.mp4', 'expected': 'Other'},
        {'input': 'elife-00666-supp1-v1.csv', 'expected': 'Other'},
        {'input': 'elife-00666-supp2-v2.tif', 'expected': 'Other'},
        {'input': 'elife-00666-supp2-v3.docx', 'expected': 'Other'},
        {'input': 'elife-00666-table3-data1-v1.xlsx', 'expected': 'Other'},
        {'input': 'elife-00666-video1.mp4', 'expected': 'Other'},
        {'input': 'elife-00666-video1-data1-v1.xlsx', 'expected': 'Other'},
          )
    def test_get_file_type_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.file_type
        self.assertEqual(result, expected)

    @unpack
    @data(
        {'input': 'elife-07702-vor-r4.zip', 'expected': False},
        {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': False},
        {'input': 'elife-00666-v1.pdf', 'expected': False},
        {'input': 'elife-00666-v1.xml', 'expected': False},
        {'input': 'elife-00666-app1-fig1-v1.tif', 'expected': True},
        {'input': 'elife-00666-app1-fig1-figsupp1-v1.tif', 'expected': True},
        {'input': 'elife-00666-app2-video1.mp4', 'expected': False},
        {'input': 'elife-00666-box2-fig1-v1.tif', 'expected': True},
        {'input': 'elife-00666-code1-v1.xml', 'expected': False},
        {'input': 'elife-00666-data1-v1.xlsx', 'expected': False},
        {'input': 'elife-00666-fig1-v1.tif', 'expected': True},
        {'input': 'elife-00666-fig2-v1.tif', 'expected': True},
        {'input': 'elife-00666-fig2-figsupp1-v1.tif', 'expected': True},
        {'input': 'elife-00666-fig2-figsupp2-v1.tif', 'expected': True},
        {'input': 'elife-00666-fig3-v1.tif', 'expected': True},
        {'input': 'elife-00666-fig3-figsupp1-v1.tif', 'expected': True},
        {'input': 'elife-00666-fig3-video1.mp4', 'expected': False},
        {'input': 'elife-00666-fig4-v1.tif', 'expected': True},
        {'input': 'elife-00666-fig4-code1-v1.xlsx', 'expected': False},
        {'input': 'elife-00666-figures-v1.pdf', 'expected': False},
        {'input': 'elife-00666-inf001-v1.jpeg', 'expected': False},
        {'input': 'elife-00666-repstand1-v1.pdf', 'expected': False},
        {'input': 'elife-00666-resp-fig1-v1.png', 'expected': True},
        {'input': 'elife-00666-resp-video1.mp4', 'expected': False},
        {'input': 'elife-00666-supp1-v1.csv', 'expected': False},
        {'input': 'elife-00666-table3-data1-v1.xlsx', 'expected': False},
        {'input': 'elife-00666-video1.mp4', 'expected': False},
        {'input': 'elife-00666-video1-data1-v1.xlsx', 'expected': False},
        {'input': 'elife-00666-supp1-v1.tif', 'expected': False}
          )
    def test_article_figure(self, input, expected):
        self.assertEqual(article_structure.article_figure(input), expected)

    def test_get_original_files(self):
        files = ['elife-00666-fig2-figsupp2-v1.tif',
                 'elife-00666-fig2-figsupp2-v10.tif',
                 'elife-00666-inf001-v1.jpg',
                 'elife-00666-inf001-v1-80w.jpg',
                 'elife-00666-table3-data1-v1.xlsx',
                 'elife-07702-vor-r4.zip',
                 'elife-07398-media1.jpg']
        expected = ['elife-00666-fig2-figsupp2-v1.tif',
                    'elife-00666-fig2-figsupp2-v10.tif',
                    'elife-00666-inf001-v1.jpg',
                    'elife-00666-table3-data1-v1.xlsx']

        self.assertListEqual(article_structure.get_original_files(files), expected)

    def test_get_media_file_images(self):
        files = ['elife-00666-fig2-figsupp2-v1.tif',
                 'elife-00666-inf001-v1.jpg',
                 'elife-00666-inf001-v1-80w.jpg',
                 'elife-00666-table3-data1-v1.xlsx',
                 'elife-07702-vor-r4.zip',
                 'elife-00666-video2.jpg',
                 'elife-07398-media1.jpg']
        expected = ['elife-00666-video2.jpg',
                    'elife-07398-media1.jpg']
        self.assertListEqual(article_structure.get_media_file_images(files), expected)

    def test_get_figures_for_iiif(self):
        "Only .tif of original figures"
        files = ['elife-00666-app1-fig1-figsupp1-v1.tif',
                 'elife-00666-fig2-figsupp2-v1.tif',
                 'elife-00666-fig2-figsupp2-v1.jpg',
                 'elife-00666-inf001-v1.jpg',
                 'elife-00666-inf001-v1-80w.jpg',
                 'elife-00666-table3-data1-v1.xlsx',
                 'elife-07702-vor-r4.zip',
                 'elife-6148691793723703318-fig10-v1.gif',
                 'elife-9204580859652100230-fig2-data1-v1.xls',
                 'elife-00666-video2.jpg',
                 'elife-07398-media1.jpg']
        expected = ['elife-00666-app1-fig1-figsupp1-v1.tif',
                    'elife-00666-fig2-figsupp2-v1.tif',
                    'elife-00666-video2.jpg',
                    'elife-07398-media1.jpg']
        self.assertListEqual(article_structure.get_figures_for_iiif(files), expected)

    # see https://github.com/elifesciences/elife-continuum-documentation/blob/master/file-naming/file_naming_spec.md
    def test_get_figures_pdfs(self):
        files = ['elife-07398-media1.jpg',
                 'elife-00666-figures-v1.pdf',
                 'elife-00353-v1.pdf',
                 'elife-00353-v1.xml',
                 'elife-18425-figures-v2.pdf']
        expected = ['elife-00666-figures-v1.pdf',
                    'elife-18425-figures-v2.pdf']
        self.assertListEqual(article_structure.get_figures_pdfs(files), expected)


    @data(u'elife-15224-fig1-figsupp1.tif',
          u'elife-15224-resp-fig1.tif', u'elife-15224-figures.pdf',
          u'elife-15802-fig9-data3.docx', u'elife-11792.mp4',
          u'elife-00005-media1-code1.wrl')
    def test_is_video_file_false(self, filename):
        result = article_structure.is_video_file(filename)
        self.assertFalse(result)

    @data(u'elife-11792-media2.mp4', u'elife-15224-fig1-figsupp1-media.tif', u'elife-11792-video1.mp4',
          u'elife-99999-resp-media1.avi', u'elife-00005-media1.mov')
    def test_is_video_file_true(self,filename):
        result = article_structure.is_video_file(filename)
        self.assertTrue(result)

    @data(u'elife-15224-fig1-figsupp1.tif')
    def test_file_parts(self, filename):
        prefix, extension = article_structure.file_parts(filename)
        self.assertEqual(prefix, u'elife-15224-fig1-figsupp1')
        self.assertEqual(extension, u'tif')

    def test_get_videos(self):
        files = [u'elife-13273-fig1-v1.tif', u'elife-13273-fig2-figsupp1-v1.tif', u'elife-13273-fig2-figsupp2-v1.tif', u'elife-13273-fig2-figsupp3-v1.tif', u'elife-13273-fig2-v1.tif', u'elife-13273-fig3-data1-v1.xlsx', u'elife-13273-fig3-figsupp1-v1.tif', u'elife-13273-fig3-figsupp2-v1.tif', u'elife-13273-fig3-figsupp3-v1.tif', u'elife-13273-fig3-figsupp4-v1.tif', u'elife-13273-fig3-figsupp5-v1.tif', u'elife-13273-fig3-v1.tif', u'elife-13273-fig4-figsupp1-v1.tif', u'elife-13273-fig4-v1.tif', u'elife-13273-fig5-data1-v1.xlsx', u'elife-13273-fig5-figsupp1-v1.tif', u'elife-13273-fig5-v1.tif', u'elife-13273-fig6-data1-v1.xlsx', u'elife-13273-fig6-data2-v1.xlsx', u'elife-13273-fig6-figsupp1-v1.tif', u'elife-13273-fig6-figsupp2-v1.tif', u'elife-13273-fig6-v1.tif', u'elife-13273-fig7-v1.tif', u'elife-13273-fig8-v1.tif', u'elife-13273-fig9-v1.tif', u'elife-13273-figures-v1.pdf', u'elife-13273-media1.mp4', u'elife-13273-v1.pdf', u'elife-13273-v1.xml']

        result = article_structure.get_videos(files)

        self.assertListEqual(result, [u'elife-13273-media1.mp4'])

    def test_pre_ingest_assets(self):
        files = ['elife-00666-app1-fig1-figsupp1-v1.tif',
                 'elife-00666-fig2-figsupp2-v1.tif',
                 'elife-00666-fig2-figsupp2-v1.jpg',
                 'elife-00666-inf001-v1.jpg',
                 'elife-00666-inf001-v1-80w.jpg',
                 'elife-00666-table3-data1-v1.xlsx',
                 'elife-07702-vor-r4.zip',
                 'elife-6148691793723703318-fig10-v1.gif',
                 'elife-9204580859652100230-fig2-data1-v1.xls',
                 'elife-00666-video2.jpg',
                 'elife-07398-media1.jpg',
                 'elife-00666-figures-v1.pdf',
                 'elife-18425-figures-v2.pdf',
                 'elife-13273-media1.mp4']
        expected = ['elife-00666-app1-fig1-figsupp1-v1.tif',
                    'elife-00666-fig2-figsupp2-v1.tif',
                    'elife-00666-video2.jpg',
                    'elife-07398-media1.jpg',
                    'elife-13273-media1.mp4',
                    'elife-00666-figures-v1.pdf',
                    'elife-18425-figures-v2.pdf']
        self.assertItemsEqual(article_structure.pre_ingest_assets(files), expected)
Exemplo n.º 10
0
 def test_get_file_type_from_zip_filename(self, input, expected):
     self.articleinfo = ArticleInfo(input)
     result = self.articleinfo.file_type
     self.assertEqual(result, expected)
Exemplo n.º 11
0
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)
            self.check_filenames(upload_filenames)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
Exemplo n.º 12
0
class TestArticleStructure(unittest.TestCase):
    @unpack
    @data({
        'input': 'elife-07702-vor-r4.zip',
        'expected': None
    }, {
        'input': 'elife-00013-vor-v1-20121015000000.zip',
        'expected': '2012-10-15T00:00:00Z'
    })
    def test_get_update_date_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.get_update_date_from_zip_filename()
        self.assertEqual(result, expected)

    @unpack
    @data({
        'input': 'elife-07702-vor-r4.zip',
        'expected': None
    }, {
        'input': 'elife-00013-vor-v1-20121015000000.zip',
        'expected': '1'
    })
    def test_get_version_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.get_version_from_zip_filename()
        self.assertEqual(result, expected)

    @unpack
    @data(
        {
            'input': 'elife-07702-vor-r4.zip',
            'expected': 'ArticleZip'
        },
        {
            'input': 'elife-00013-vor-v1-20121015000000.zip',
            'expected': 'ArticleZip'
        },
        {
            'input': 'elife-00666-v1.pdf',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-v1.xml',
            'expected': 'ArticleXML'
        },
        {
            'input': 'elife-00666-app1-fig1-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-app1-fig1-figsupp1-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-app2-video1.mp4',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-box2-fig1-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-code1-v1.xml',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-data1-v1.xlsx',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-fig1-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig2-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig2-figsupp1-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig2-figsupp2-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig3-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig3-v10.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig3-figsupp1-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig3-video1.mp4',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-fig4-v1.tif',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-fig4-code1-v1.xlsx',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-figures-v1.pdf',
            'expected': 'FigurePDF'
        },
        {
            'input': 'elife-00666-inf001-v1.jpeg',
            'expected': 'Inline'
        },
        {
            'input': 'elife-00666-repstand1-v1.pdf',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-resp-fig1-v1.png',
            'expected': 'Figure'
        },
        {
            'input': 'elife-00666-resp-video1.mp4',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-supp1-v1.csv',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-supp2-v2.tif',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-supp2-v3.docx',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-table3-data1-v1.xlsx',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-video1.mp4',
            'expected': 'Other'
        },
        {
            'input': 'elife-00666-video1-data1-v1.xlsx',
            'expected': 'Other'
        },
    )
    def test_get_file_type_from_zip_filename(self, input, expected):
        self.articleinfo = ArticleInfo(input)
        result = self.articleinfo.file_type
        self.assertEqual(result, expected)

    @unpack
    @data({
        'input': 'elife-07702-vor-r4.zip',
        'expected': False
    }, {
        'input': 'elife-00013-vor-v1-20121015000000.zip',
        'expected': False
    }, {
        'input': 'elife-00666-v1.pdf',
        'expected': False
    }, {
        'input': 'elife-00666-v1.xml',
        'expected': False
    }, {
        'input': 'elife-00666-app1-fig1-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-app1-fig1-figsupp1-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-app2-video1.mp4',
        'expected': False
    }, {
        'input': 'elife-00666-box2-fig1-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-code1-v1.xml',
        'expected': False
    }, {
        'input': 'elife-00666-data1-v1.xlsx',
        'expected': False
    }, {
        'input': 'elife-00666-fig1-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-fig2-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-fig2-figsupp1-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-fig2-figsupp2-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-fig3-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-fig3-figsupp1-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-fig3-video1.mp4',
        'expected': False
    }, {
        'input': 'elife-00666-fig4-v1.tif',
        'expected': True
    }, {
        'input': 'elife-00666-fig4-code1-v1.xlsx',
        'expected': False
    }, {
        'input': 'elife-00666-figures-v1.pdf',
        'expected': False
    }, {
        'input': 'elife-00666-inf001-v1.jpeg',
        'expected': False
    }, {
        'input': 'elife-00666-repstand1-v1.pdf',
        'expected': False
    }, {
        'input': 'elife-00666-resp-fig1-v1.png',
        'expected': True
    }, {
        'input': 'elife-00666-resp-video1.mp4',
        'expected': False
    }, {
        'input': 'elife-00666-supp1-v1.csv',
        'expected': False
    }, {
        'input': 'elife-00666-table3-data1-v1.xlsx',
        'expected': False
    }, {
        'input': 'elife-00666-video1.mp4',
        'expected': False
    }, {
        'input': 'elife-00666-video1-data1-v1.xlsx',
        'expected': False
    }, {
        'input': 'elife-00666-supp1-v1.tif',
        'expected': False
    })
    def test_article_figure(self, input, expected):
        self.assertEqual(article_structure.article_figure(input), expected)

    def test_get_original_files(self):
        files = [
            'elife-00666-fig2-figsupp2-v1.tif',
            'elife-00666-fig2-figsupp2-v10.tif', 'elife-00666-inf001-v1.jpg',
            'elife-00666-inf001-v1-80w.jpg',
            'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip',
            'elife-07398-media1.jpg'
        ]
        expected = [
            'elife-00666-fig2-figsupp2-v1.tif',
            'elife-00666-fig2-figsupp2-v10.tif', 'elife-00666-inf001-v1.jpg',
            'elife-00666-table3-data1-v1.xlsx'
        ]

        self.assertListEqual(article_structure.get_original_files(files),
                             expected)

    def test_get_media_file_images(self):
        files = [
            'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-inf001-v1.jpg',
            'elife-00666-inf001-v1-80w.jpg',
            'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip',
            'elife-00666-video2.jpg', 'elife-07398-media1.jpg'
        ]
        expected = ['elife-00666-video2.jpg', 'elife-07398-media1.jpg']
        self.assertListEqual(article_structure.get_media_file_images(files),
                             expected)

    def test_get_figures_for_iiif(self):
        "Only .tif of original figures"
        files = [
            'elife-00666-app1-fig1-figsupp1-v1.tif',
            'elife-00666-fig2-figsupp2-v1.tif',
            'elife-00666-fig2-figsupp2-v1.jpg', 'elife-00666-inf001-v1.jpg',
            'elife-00666-inf001-v1-80w.jpg',
            'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip',
            'elife-6148691793723703318-fig10-v1.gif',
            'elife-9204580859652100230-fig2-data1-v1.xls',
            'elife-00666-video2.jpg', 'elife-07398-media1.jpg'
        ]
        expected = [
            'elife-00666-app1-fig1-figsupp1-v1.tif',
            'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-video2.jpg',
            'elife-07398-media1.jpg'
        ]
        self.assertListEqual(article_structure.get_figures_for_iiif(files),
                             expected)

    # see https://github.com/elifesciences/elife-continuum-documentation/blob/master/file-naming/file_naming_spec.md
    def test_get_figures_pdfs(self):
        files = [
            'elife-07398-media1.jpg', 'elife-00666-figures-v1.pdf',
            'elife-00353-v1.pdf', 'elife-00353-v1.xml',
            'elife-18425-figures-v2.pdf'
        ]
        expected = ['elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf']
        self.assertListEqual(article_structure.get_figures_pdfs(files),
                             expected)

    @data(u'elife-15224-fig1-figsupp1.tif', u'elife-15224-resp-fig1.tif',
          u'elife-15224-figures.pdf', u'elife-15802-fig9-data3.docx',
          u'elife-11792.mp4', u'elife-00005-media1-code1.wrl')
    def test_is_video_file_false(self, filename):
        result = article_structure.is_video_file(filename)
        self.assertFalse(result)

    @data(u'elife-11792-media2.mp4', u'elife-15224-fig1-figsupp1-media.tif',
          u'elife-11792-video1.mp4', u'elife-99999-resp-media1.avi',
          u'elife-00005-media1.mov')
    def test_is_video_file_true(self, filename):
        result = article_structure.is_video_file(filename)
        self.assertTrue(result)

    @data(u'elife-15224-fig1-figsupp1.tif')
    def test_file_parts(self, filename):
        prefix, extension = article_structure.file_parts(filename)
        self.assertEqual(prefix, u'elife-15224-fig1-figsupp1')
        self.assertEqual(extension, u'tif')

    def test_get_videos(self):
        files = [
            u'elife-13273-fig1-v1.tif', u'elife-13273-fig2-figsupp1-v1.tif',
            u'elife-13273-fig2-figsupp2-v1.tif',
            u'elife-13273-fig2-figsupp3-v1.tif', u'elife-13273-fig2-v1.tif',
            u'elife-13273-fig3-data1-v1.xlsx',
            u'elife-13273-fig3-figsupp1-v1.tif',
            u'elife-13273-fig3-figsupp2-v1.tif',
            u'elife-13273-fig3-figsupp3-v1.tif',
            u'elife-13273-fig3-figsupp4-v1.tif',
            u'elife-13273-fig3-figsupp5-v1.tif', u'elife-13273-fig3-v1.tif',
            u'elife-13273-fig4-figsupp1-v1.tif', u'elife-13273-fig4-v1.tif',
            u'elife-13273-fig5-data1-v1.xlsx',
            u'elife-13273-fig5-figsupp1-v1.tif', u'elife-13273-fig5-v1.tif',
            u'elife-13273-fig6-data1-v1.xlsx',
            u'elife-13273-fig6-data2-v1.xlsx',
            u'elife-13273-fig6-figsupp1-v1.tif',
            u'elife-13273-fig6-figsupp2-v1.tif', u'elife-13273-fig6-v1.tif',
            u'elife-13273-fig7-v1.tif', u'elife-13273-fig8-v1.tif',
            u'elife-13273-fig9-v1.tif', u'elife-13273-figures-v1.pdf',
            u'elife-13273-media1.mp4', u'elife-13273-v1.pdf',
            u'elife-13273-v1.xml'
        ]

        result = article_structure.get_videos(files)

        self.assertListEqual(result, [u'elife-13273-media1.mp4'])

    def test_pre_ingest_assets(self):
        files = [
            'elife-00666-app1-fig1-figsupp1-v1.tif',
            'elife-00666-fig2-figsupp2-v1.tif',
            'elife-00666-fig2-figsupp2-v1.jpg', 'elife-00666-inf001-v1.jpg',
            'elife-00666-inf001-v1-80w.jpg',
            'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip',
            'elife-6148691793723703318-fig10-v1.gif',
            'elife-9204580859652100230-fig2-data1-v1.xls',
            'elife-00666-video2.jpg', 'elife-07398-media1.jpg',
            'elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf',
            'elife-13273-media1.mp4'
        ]
        expected = [
            'elife-00666-app1-fig1-figsupp1-v1.tif',
            'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-video2.jpg',
            'elife-07398-media1.jpg', 'elife-13273-media1.mp4',
            'elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf'
        ]
        self.assertItemsEqual(article_structure.pre_ingest_assets(files),
                              expected)
Exemplo n.º 13
0
 def test_get_version_from_zip_filename(self, input, expected):
     self.articleinfo = ArticleInfo(input)
     result = self.articleinfo.get_version_from_zip_filename()
     self.assertEqual(result, expected)
Exemplo n.º 14
0
 def test_get_file_type_from_zip_filename(self, input, expected):
     self.articleinfo = ArticleInfo(input)
     result = self.articleinfo.file_type
     self.assertEqual(result, expected)