def test_modify(self): path = self.repo.store.documententry_path("123/a") util.ensure_dir(path) with open(path, "w") as fp: fp.write(self.basic_json) d = DocumentEntry(path=path) d.orig_updated = datetime(2013, 3, 27, 20, 59, 42, 325067) d.id = "http://example.org/123/a" # do this in setUp? with open(self.datadir+"/xhtml","w") as f: f.write("<div>xhtml fragment</div>") d.set_content(self.datadir+"/xhtml", "http://example.org/test", mimetype="xhtml", inline=True) d.save() self.assertEqual(self.d2u(util.readfile(path)), self.modified_json)
def test_modify(self): path = self.repo.store.documententry_path("123/a") util.ensure_dir(path) with open(path, "w") as fp: fp.write(self.basic_json) d = DocumentEntry(path=path) d.orig_updated = datetime(2013, 3, 27, 20, 59, 42, 325067) d.id = "http://example.org/123/a" # do this in setUp? with open(self.datadir+"/xhtml","w") as f: f.write("<div>xhtml fragment</div>") d.set_content(self.datadir+"/xhtml", "http://example.org/test", mimetype="xhtml", inline=True) d.save() self.assertEqual(self.d2u(util.readfile(path)), self.modified_json)
def importarchive(self, archivedir): """Imports downloaded data from an archive from legacy lagen.nu data. In particular, creates proper archive storage for older versions of each text. """ current = archived = 0 for f in util.list_dirs(archivedir, ".html"): if not f.startswith("downloaded/sfs"): # sfst or sfsr continue for regex in self.templ: m = re.match(regex, f) if not m: continue if "vcheck" in m.groupdict(): # silently ignore break basefile = "%s:%s" % (m.group("byear"), m.group("bnum")) # need to look at the file to find out its version # text = t.extractfile(f).read(4000).decode("latin-1") text = open(f).read(4000).decode("latin-1") reader = TextReader(string=text) updated_to = self._find_uppdaterad_tom(basefile, reader=reader) if "vyear" in m.groupdict(): # this file is marked as # an archival version archived += 1 version = updated_to if m.group("vyear") == "first": pass else: exp = "%s:%s" % (m.group("vyear"), m.group("vnum")) if version != exp: self.log.warning("%s: Expected %s, found %s" % (f, exp, version)) else: version = None current += 1 de = DocumentEntry() de.basefile = basefile de.id = self.canonical_uri(basefile, updated_to) # fudge timestamps best as we can de.orig_created = datetime.fromtimestamp( os.path.getctime(f)) de.orig_updated = datetime.fromtimestamp( os.path.getmtime(f)) de.orig_updated = datetime.now() de.orig_url = self.document_url_template % locals() de.published = datetime.now() de.url = self.generated_url(basefile) de.title = "SFS %s" % basefile # de.set_content() # de.set_link() de.save(self.store.documententry_path(basefile)) # this yields more reasonable basefiles, but they are not # backwards compatible -- skip them for now # basefile = basefile.replace("_", "").replace(".", "") if "type" in m.groupdict() and m.group("type") == "sfsr": dest = self.store.register_path(basefile) current -= 1 # to offset the previous increment else: dest = self.store.downloaded_path(basefile, version) self.log.debug("%s: extracting %s to %s" % (basefile, f, dest)) util.ensure_dir(dest) shutil.copy2(f, dest) break else: self.log.warning("Couldn't process %s" % f) self.log.info( "Extracted %s current versions and %s archived versions" % (current, archived))