Exemplo n.º 1
0
    def test_modify(self):
        path = self.repo.store.documententry_path("123/a")
        util.ensure_dir(path)
        with open(path, "w") as fp:
            fp.write(self.basic_json)

        d = DocumentEntry(path=path)
        d.orig_updated = datetime(2013, 3, 27, 20, 59, 42, 325067)
        d.id = "http://example.org/123/a"
        # do this in setUp?
        with open(self.datadir+"/xhtml","w") as f:
            f.write("<div>xhtml fragment</div>")

        d.set_content(self.datadir+"/xhtml", "http://example.org/test",
                      mimetype="xhtml", inline=True)
        d.save()
        self.assertEqual(self.d2u(util.readfile(path)), self.modified_json)
Exemplo n.º 2
0
    def test_modify(self):
        path = self.repo.store.documententry_path("123/a")
        util.ensure_dir(path)
        with open(path, "w") as fp:
            fp.write(self.basic_json)

        d = DocumentEntry(path=path)
        d.orig_updated = datetime(2013, 3, 27, 20, 59, 42, 325067)
        d.id = "http://example.org/123/a"
        # do this in setUp?
        with open(self.datadir+"/xhtml","w") as f:
            f.write("<div>xhtml fragment</div>")

        d.set_content(self.datadir+"/xhtml", "http://example.org/test",
                      mimetype="xhtml", inline=True)
        d.save()
        self.assertEqual(self.d2u(util.readfile(path)), self.modified_json)
Exemplo n.º 3
0
    def importarchive(self, archivedir):
        """Imports downloaded data from an archive from legacy lagen.nu data.

        In particular, creates proper archive storage for older
        versions of each text.

        """
        current = archived = 0
        for f in util.list_dirs(archivedir, ".html"):
            if not f.startswith("downloaded/sfs"):  # sfst or sfsr
                continue
            for regex in self.templ:
                m = re.match(regex, f)
                if not m:
                    continue
                if "vcheck" in m.groupdict():  # silently ignore
                    break
                basefile = "%s:%s" % (m.group("byear"), m.group("bnum"))

                # need to look at the file to find out its version
                # text = t.extractfile(f).read(4000).decode("latin-1")
                text = open(f).read(4000).decode("latin-1")
                reader = TextReader(string=text)
                updated_to = self._find_uppdaterad_tom(basefile, reader=reader)

                if "vyear" in m.groupdict():  # this file is marked as
                    # an archival version
                    archived += 1
                    version = updated_to

                    if m.group("vyear") == "first":
                        pass
                    else:
                        exp = "%s:%s" % (m.group("vyear"), m.group("vnum"))
                        if version != exp:
                            self.log.warning("%s: Expected %s, found %s" %
                                             (f, exp, version))
                else:
                    version = None
                    current += 1
                    de = DocumentEntry()
                    de.basefile = basefile
                    de.id = self.canonical_uri(basefile, updated_to)
                    # fudge timestamps best as we can
                    de.orig_created = datetime.fromtimestamp(
                        os.path.getctime(f))
                    de.orig_updated = datetime.fromtimestamp(
                        os.path.getmtime(f))
                    de.orig_updated = datetime.now()
                    de.orig_url = self.document_url_template % locals()
                    de.published = datetime.now()
                    de.url = self.generated_url(basefile)
                    de.title = "SFS %s" % basefile
                    # de.set_content()
                    # de.set_link()
                    de.save(self.store.documententry_path(basefile))
                # this yields more reasonable basefiles, but they are not
                # backwards compatible -- skip them for now
                # basefile = basefile.replace("_", "").replace(".", "")
                if "type" in m.groupdict() and m.group("type") == "sfsr":
                    dest = self.store.register_path(basefile)
                    current -= 1  # to offset the previous increment
                else:
                    dest = self.store.downloaded_path(basefile, version)
                self.log.debug("%s: extracting %s to %s" % (basefile, f, dest))
                util.ensure_dir(dest)
                shutil.copy2(f, dest)
                break
            else:
                self.log.warning("Couldn't process %s" % f)
        self.log.info(
            "Extracted %s current versions and %s archived versions" %
            (current, archived))