示例#1
0
    def test_filename(self):
        """Check file extension handling.

        - An item index file without extension is transformed to <filename>._
          to prevent conflict with folder name.
        """
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000001": {
    "index": "20200101000000001",
    "type": "file",
    "title": "File1"
  },
  "20200101000000002": {
    "type": "bookmark",
    "title": "Bookmark1",
    "source": "http://example.com"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000001"
  ],
  "20200101000000001": [
    "20200101000000002"
  ]
})""")

        index_file = os.path.join(self.test_input, '20200101000000001')
        with open(index_file, 'wb') as fh:
            fh.write(b'dummy')

        for info in wsb2file.run(self.test_input, self.test_output):
            pass

        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, '1-File1'),
                os.path.join(self.test_output, '1-File1', '1-Bookmark1.htm'),
                os.path.join(self.test_output, '1-File1._'),
            })
示例#2
0
    def test_recursive(self):
        """Check if recursive item is correctly handled."""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000001": {
    "type": "folder",
    "title": "Folder1"
  },
  "20200101000000002": {
    "type": "folder",
    "title": "Folder2"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000001"
  ],
  "20200101000000001": [
    "20200101000000002"
  ],
  "20200101000000002": [
    "20200101000000001"
  ]
})""")

        for info in wsb2file.run(self.test_input, self.test_output):
            pass

        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, '1-Folder1'),
                os.path.join(self.test_output, '1-Folder1', '1-Folder2'),
                os.path.join(self.test_output, '1-Folder1', '1-Folder2',
                             '1-Folder1'),
            })
示例#3
0
    def test_basic01(self):
        """Check for typical WebScrapBook items. (prefix=True)"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000001": {
    "index": "20200101000000001/index.html",
    "type": "",
    "title": "Page item - folder"
  },
  "20200101000000002": {
    "index": "20200101000000002.htz",
    "type": "",
    "title": "Page item - htz"
  },
  "20200101000000003": {
    "index": "20200101000000003.maff",
    "type": "",
    "title": "Page item - maff"
  },
  "20200101000000004": {
    "index": "20200101000000004.html",
    "type": "",
    "title": "Page item - single html"
  },
  "20200101000000005": {
    "type": "bookmark",
    "title": "Bookmark item",
    "source": "http://example.com/mypath?a=123&b=456"
  },
  "20200101000000006": {
    "type": "folder",
    "title": "Folder item"
  },
  "20200101000000007": {
    "type": "separator",
    "title": "Separator item"
  },
  "20200101000000008": {
    "type": "separator"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000001",
    "20200101000000002",
    "20200101000000003",
    "20200101000000004",
    "20200101000000005",
    "20200101000000006",
    "20200101000000007",
    "20200101000000008"
  ]
})""")

        index_file = os.path.join(self.test_input, '20200101000000001',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file), exist_ok=True)
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('page content')

        index_file = os.path.join(self.test_input, '20200101000000002.htz')
        with open(index_file, 'wb') as fh:
            fh.write(b'dummy htz')

        index_file = os.path.join(self.test_input, '20200101000000003.maff')
        with open(index_file, 'wb') as fh:
            fh.write(b'dummy maff')

        index_file = os.path.join(self.test_input, '20200101000000004.html')
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('single file content')

        for info in wsb2file.run(self.test_input, self.test_output):
            pass

        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, '1-Page item - folder.htd'),
                os.path.join(self.test_output, '1-Page item - folder.htd',
                             'index.html'),
                os.path.join(self.test_output, '2-Page item - htz.htz'),
                os.path.join(self.test_output, '3-Page item - maff.maff'),
                os.path.join(self.test_output,
                             '4-Page item - single html.html'),
                os.path.join(self.test_output, '5-Bookmark item.htm'),
                os.path.join(self.test_output, '6-Folder item'),
                os.path.join(self.test_output, '7-Separator item.-'),
                os.path.join(self.test_output, '8-----.-'),
            })
        with open(os.path.join(self.test_output, '1-Page item - folder.htd',
                               'index.html'),
                  'r',
                  encoding='UTF-8') as fh:
            self.assertEqual(fh.read(), 'page content')
        with open(os.path.join(self.test_output, '2-Page item - htz.htz'),
                  'rb') as fh:
            self.assertEqual(fh.read(), b'dummy htz')
        with open(os.path.join(self.test_output, '3-Page item - maff.maff'),
                  'rb') as fh:
            self.assertEqual(fh.read(), b'dummy maff')
        with open(os.path.join(self.test_output,
                               '4-Page item - single html.html'),
                  'r',
                  encoding='UTF-8') as fh:
            self.assertEqual(fh.read(), 'single file content')
        with open(os.path.join(self.test_output, '5-Bookmark item.htm'),
                  'r',
                  encoding='UTF-8') as fh:
            self.assertEqual(
                fh.read(), '<!DOCTYPE html>'
                '<meta charset="UTF-8">'
                '<meta http-equiv="refresh" content="0;url=http://example.com/mypath?a=123&amp;b=456">'
            )
        self.assertTrue(
            os.path.isdir(os.path.join(self.test_output, '6-Folder item')))
        with open(os.path.join(self.test_output, '7-Separator item.-'),
                  'rb') as fh:
            self.assertEqual(fh.read(), b'')
        with open(os.path.join(self.test_output, '8-----.-'), 'rb') as fh:
            self.assertEqual(fh.read(), b'')
示例#4
0
    def test_deduplicate(self):
        """Check duplicated title handling. (prefix=False)

        - Deduplicate even if extension is different.
        """
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000001": {
    "index": "20200101000000001/index.html",
    "type": "",
    "title": "myitem"
  },
  "20200101000000002": {
    "index": "20200101000000002.htz",
    "type": "",
    "title": "myitem"
  },
  "20200101000000003": {
    "index": "20200101000000003.maff",
    "type": "",
    "title": "myitem"
  },
  "20200101000000004": {
    "index": "20200101000000004.html",
    "type": "",
    "title": "myitem"
  },
  "20200101000000005": {
    "type": "bookmark",
    "title": "myitem",
    "source": "http://example.com/mypath?a=123&b=456"
  },
  "20200101000000006": {
    "type": "folder",
    "title": "myitem"
  },
  "20200101000000007": {
    "type": "separator",
    "title": "myitem"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000001",
    "20200101000000002",
    "20200101000000003",
    "20200101000000004",
    "20200101000000005",
    "20200101000000006",
    "20200101000000007"
  ]
})""")

        index_file = os.path.join(self.test_input, '20200101000000001',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file), exist_ok=True)
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('page content')

        index_file = os.path.join(self.test_input, '20200101000000002.htz')
        with open(index_file, 'wb') as fh:
            fh.write(b'dummy htz')

        index_file = os.path.join(self.test_input, '20200101000000003.maff')
        with open(index_file, 'wb') as fh:
            fh.write(b'dummy maff')

        index_file = os.path.join(self.test_input, '20200101000000004.html')
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('single file content')

        for info in wsb2file.run(self.test_input,
                                 self.test_output,
                                 prefix=False):
            pass

        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, 'myitem.htd'),
                os.path.join(self.test_output, 'myitem.htd', 'index.html'),
                os.path.join(self.test_output, 'myitem(1).htz'),
                os.path.join(self.test_output, 'myitem(2).maff'),
                os.path.join(self.test_output, 'myitem(3).html'),
                os.path.join(self.test_output, 'myitem(4).htm'),
                os.path.join(self.test_output, 'myitem(5)'),
            })
示例#5
0
    def test_numbering(self):
        """Check filename prefix is correctly zero-padded. (prefix=True)"""
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000001": {
    "type": "folder",
    "title": "Folder1"
  },
  "20200101000000002": {
    "type": "folder",
    "title": "Folder2"
  },
  "20200101000000003": {
    "type": "folder",
    "title": "Folder3"
  },
  "20200101000000004": {
    "type": "folder",
    "title": "Folder4"
  },
  "20200101000000005": {
    "type": "folder",
    "title": "Folder5"
  },
  "20200101000000006": {
    "type": "folder",
    "title": "Folder6"
  },
  "20200101000000007": {
    "type": "folder",
    "title": "Folder7"
  },
  "20200101000000008": {
    "type": "folder",
    "title": "Folder8"
  },
  "20200101000000009": {
    "type": "folder",
    "title": "Folder9"
  },
  "20200101000000010": {
    "type": "folder",
    "title": "Folder10"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000001",
    "20200101000000002",
    "20200101000000003",
    "20200101000000004",
    "20200101000000005",
    "20200101000000006",
    "20200101000000007",
    "20200101000000008",
    "20200101000000009",
    "20200101000000010"
  ]
})""")

        for info in wsb2file.run(self.test_input, self.test_output):
            pass

        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)), {
                               os.path.join(self.test_output, ''),
                               os.path.join(self.test_output, '01-Folder1'),
                               os.path.join(self.test_output, '02-Folder2'),
                               os.path.join(self.test_output, '03-Folder3'),
                               os.path.join(self.test_output, '04-Folder4'),
                               os.path.join(self.test_output, '05-Folder5'),
                               os.path.join(self.test_output, '06-Folder6'),
                               os.path.join(self.test_output, '07-Folder7'),
                               os.path.join(self.test_output, '08-Folder8'),
                               os.path.join(self.test_output, '09-Folder9'),
                               os.path.join(self.test_output, '10-Folder10'),
                           })
示例#6
0
    def test_path02(self):
        """Check hierarchical filename. (prefix=False)

        - An item with data and descendants is transformed into <title>/ and <title>.htd/
        """
        with open(self.test_input_meta, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.meta({
  "20200101000000001": {
    "type": "folder",
    "title": "Folder1"
  },
  "20200101000000002": {
    "index": "20200101000000002/index.html",
    "type": "",
    "title": "Folder1 sub"
  },
  "20200101000000003": {
    "index": "20200101000000003/index.html",
    "type": "",
    "title": "Folder2"
  },
  "20200101000000004": {
    "index": "20200101000000004/index.html",
    "type": "",
    "title": "Folder2 sub"
  }
})""")
        with open(self.test_input_toc, 'w', encoding='UTF-8') as fh:
            fh.write("""\
scrapbook.toc({
  "root": [
    "20200101000000001",
    "20200101000000003"
  ],
  "20200101000000001": [
    "20200101000000002"
  ],
  "20200101000000003": [
    "20200101000000004"
  ]
})""")

        index_file = os.path.join(self.test_input, '20200101000000002',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file), exist_ok=True)
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('page content')

        index_file = os.path.join(self.test_input, '20200101000000003',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file), exist_ok=True)
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('page content')

        index_file = os.path.join(self.test_input, '20200101000000004',
                                  'index.html')
        os.makedirs(os.path.dirname(index_file), exist_ok=True)
        with open(index_file, 'w', encoding='UTF-8') as fh:
            fh.write('page content')

        for info in wsb2file.run(self.test_input,
                                 self.test_output,
                                 prefix=False):
            pass

        self.assertEqual(
            set(
                glob.iglob(os.path.join(self.test_output, '**'),
                           recursive=True)),
            {
                os.path.join(self.test_output, ''),
                os.path.join(self.test_output, 'Folder1'),
                os.path.join(self.test_output, 'Folder1', 'Folder1 sub.htd'),
                os.path.join(self.test_output, 'Folder1', 'Folder1 sub.htd',
                             'index.html'),
                os.path.join(self.test_output, 'Folder2'),
                os.path.join(self.test_output, 'Folder2', 'Folder2 sub.htd'),
                os.path.join(self.test_output, 'Folder2', 'Folder2 sub.htd',
                             'index.html'),
                os.path.join(self.test_output, 'Folder2.htd'),
                os.path.join(self.test_output, 'Folder2.htd', 'index.html'),
            })