示例#1
0
def clobber_document(params, opts):
    doc = openn_db.get_doc(params)

    if logger.getEffectiveLevel() >= logging.INFO:
        msg = "Preparing to clobber document id: %d,"
        msg += " repository: %s, base_dir: %s"
        logger.info(msg % doc.id, doc.collection, doc.base_dir)

    if doc.is_online:
        msg = "Clobber requested, but refusing to delete record "
        msg += "for document on-line at: %s" % (doc.package_dir, )
        raise OPennException(msg)
    else:
        if opts.yes is True:
            logger.info("Deleting existing document.")
        elif opts.no is True:
            msg = "User canceled clobber; no changes made"
            raise OPennException(msg)
        else:
            question = "Proceed with clobber and delete this document?"
            yes_response = "OK. Deleting existing document."
            no_response = "User canceled clobber; no changes made."
            # the following raises an exception unless the user enters yes
            handle_yes_no_input(question, yes_response, no_response)

        # We only get here if it's ok to proceed
        doc.delete()
示例#2
0
def redo_document(doc, opts):
    if logger.getEffectiveLevel() >= logging.INFO:
        msg = "Preparing to redo document id: %d,"
        msg += " repository: %s, base_dir: %s"
        logger.info(msg % doc.id, doc.collection, doc.base_dir)
    if doc.is_online and str(os.getenv('OPENN_REDO_OVERRIDE_ONLINE_HALT',
                                       None)).lower() != 'true':
        msg = "Redo requested, but refusing to redo record "
        msg += "for document on-line at: %s" % (doc.package_dir, )
        raise OPennException(msg)
    else:
        if opts.yes is True:
            logger.info("Removing images from existing document.")
        elif opts.no is True:
            msg = "User canceled redo; no changes made"
            raise OPennException(msg)
        else:
            question = "Proceed with redo and delete all images?"
            yes_response = "OK. Removing images from existing document."
            no_response = "User canceled redo; no changes made."
            # the following raises an exception unless the user enters yes
            handle_yes_no_input(question, yes_response, no_response)

        # We only get here if it's ok to proceed
        doc.image_set.all().delete()
示例#3
0
 def bibid_filename(self):
     if not os.path.exists(self.source_dir):
         raise OPennException("Could not find source_dir: %s" % self.source_dir)
     bibid_txt = os.path.join(self.source_dir, 'bibid.txt')
     if not os.path.exists(bibid_txt):
         raise OPennException("Could not find bibid.txt: %s" % bibid_txt)
     return bibid_txt
示例#4
0
    def get_config_dict(self, tag):
        configlist = [ x for x in self._configs if x['tag'] == tag ]

        if len(configlist) == 1:
            return configlist[0]
        elif len(configlist) > 1:
            msg = "Invalid repositories config: more than one has tag '%s'"
            raise OPennException(msg % (tag,))
        else:
            raise OPennException("Unknown tag: '%s'" % (tag,))
示例#5
0
    def regen_partial_tei(self, doc, **kwargs):
        # validate directory
        # Move files:
        #
        #  - pages.xlsx     required
        #  - marc.xml       required unless bibid.txt present
        #  - bibid.txt      ignored; BibID should be in existing TEI
        #  - holdingid.txt  optional; may be required for Penn MSS (with BibID in TEI)

        data_dir = kwargs.get('METADATA_DIR', None)
        if data_dir is None or data_dir.strip() == '':
            raise OPennException("Missing required METADATA_DIR")

        if not os.path.exists(data_dir):
            raise OPennException("Cannot find METADATA_DIR: '%s'" % (data_dir,))

        metadata_files = ('pages.xlsx', 'marc.xml', 'holdingid.txt')
        for file in metadata_files:
            full_path = os.path.abspath(os.path.join(data_dir, file))
            if os.path.exists(full_path):
                dest = os.path.abspath(os.path.join(self.source_dir, file))
                if full_path == dest:
                    pass
                elif os.path.exists(full_path):
                    shutil.copyfile(full_path, dest)

        tei = OPennTEI(doc.tei_xml)
        bibid = tei.bibid

        # make sure we have the marc.xml file
        if os.path.exists(self.marc_xml):
            pass
        elif bibid is None:
            OPennException("Saved TEI lacks BibID; required MARC file missing: '%s'" % (self.marc_xml,))
        else:
            if not self.NEW_BIBID_RE.match(bibid):
                bibid = '99%s3503681' % (str(bibid),)
            self.write_xml(bibid, self.marc_xml)

        # create pages.xml from the page.xlsx
        self.write_openn_xml(self.openn_xml_path)
        # fake the pih.xml by merging pages.xml with marc.xml (from above)
        self.write_pih_xml()
        self.save_rights_data()
        partial_tei_xml = self.gen_partial_tei()
        self.write_partial_tei(self.source_dir, partial_tei_xml)
        self.validate_partial_tei()
        self.stage_marc_xml()

        self.add_removal(self.pih_filename)
        self.add_removal(self.bibid_filename)
        self.add_removal(self.holdingid_filename)
        self.add_removal(self.openn_xml_path)
        self.add_removal(self.xlsx_path)
        self.add_removal(os.path.join(self.source_dir, 'sha1manifest.txt'))
示例#6
0
 def check_file_names(self, expected):
     # print sys_file_names(source_dir)
     if len(expected) < 1:
         raise OPennException("Penn in Hand XML lists no files: see %s" % pih_xml)
     missing = []
     for file in expected:
         path = os.path.join(self.source_dir, file)
         if not os.path.exists(path):
             missing.append(file)
     if len(missing) > 0:
         smiss = ', '.join(missing)
         raise OPennException("Expected images are missing from %s: %s" % (self.source_dir, smiss))
示例#7
0
def main(cmdline=None):
    """op-info

    """
    status = 0
    parser = make_parser()

    opts, args = parser.parse_args(cmdline)

    setup_logger()
    logger = logging.getLogger(__name__)

    try:
        if len(args) != 2:
            raise OPennException("Wrong number of arguments")
        repo_name, pkg_dir = args
        errors = validate(repo_name, pkg_dir)
        if len(errors) > 0:
            logging.error("Errors found checking package directory: %s" % (args[1],))
            for er in errors:
                logging.error(er)
            status = 1
        else:
            logging.info("Valid package directory: %s" % (args[1],))
    except OPennException as ex:
        parser.error(str(ex))
        status = 4
    except Exception as ex:
        parser.error(str(ex))
        status = 4

    return status
示例#8
0
def make_readme_html(readme, opts):
    try:
        readme_dict = find_readme(readme)
        if readme_dict == None:
            raise OPennException("Unknown readme file: %s" % (readme, ))
        page = Page(readme, site_dir(), title=readme_dict['title'])

        if is_makeable(page, opts):
            logging.info("Creating page: %s" % (page.outfile_path(), ))
            if not opts.dry_run:
                page.create_pages()
        else:
            logging.info("Skipping page: %s" % (page.outfile_path(), ))
    except TemplateDoesNotExist as ex:
        msg = "Could not find template: %s" % (readme, )
        raise OPennException(msg)
示例#9
0
    def validate_workbook(self):
        if not os.path.exists(self.xlsx_path):
            msg = 'Cannot find required metadata workbook: %s' % (
                self.xlsx_path)
            raise OPennException(msg)

        self.workbook().validate()
示例#10
0
 def check_valid(self):
     """ Confirm that the source dir has a data directory, PARTIAL_TEI.xml, and
     file_list.json """
     for name in PackageDir._required_paths:
         path = getattr(self,PackageDir._required_paths[name])
         if not os.path.exists(path):
             raise OPennException("No %s found in %s" % (name, self.source_dir))
示例#11
0
def copy_current_manifest(doc, source_dir):
    dest_path = os.path.join(source_dir, 'manifest-sha1.txt')
    if os.path.exists(dest_path):
        logger.info("Manifest found in source_dir: %s", dest_path)
        return

    site_manifest = os.path.join(os.environ['OPENN_SITE_DIR'],
                                 doc.manifest_path)
    if os.path.exists(site_manifest):
        logger.info("Copying manifest from %s", site_manifest)
        dest = os.path.join(source_dir, os.path.basename(site_manifest))
        shutil.copyfile(site_manifest, dest)
        return

    staged_manifest = os.path.join(os.environ['OPENN_STAGING_DIR'],
                                   doc.manifest_path)
    if os.path.exists(staged_manifest):
        logger.info("Copying manifest from %s", staged_manifest)
        dest = os.path.join(source_dir, os.path.basename(staged_manifest))
        shutil.copyfile(staged_manifest, dest)
        return

    url = "https://%s/%s" % (settings.OPENN_HOST, doc.manifest_path)
    logger.info("Downloading manifest from %s", url)
    try:
        data = urllib2.urlopen(url).read()
        with open(dest_path, 'w+') as f:
            f.write(data)
    except urllib2.HTTPError as ex:
        if ex.getcode() == 404:
            raise OPennException("Manifest not found at %s" % (url, ))
        else:
            raise ex
示例#12
0
 def prep_class_parameter(self, name):
     try:
         return self.prep_class_params()[name]
     except KeyError:
         msg = "Cannot find prep_class_parameter '%s' in dict %s"
         msg = msg % (name, json.dumps(self.prep_class_params()))
         raise OPennException(msg)
示例#13
0
 def holdingid_filename(self):
     if not os.path.exists(self.source_dir):
         raise OPennException("Could not find source_dir: %s" % self.source_dir)
     holdingid_txt = os.path.join(self.source_dir, 'holdingid.txt')
     if not os.path.exists(holdingid_txt):
         return None
     return holdingid_txt
示例#14
0
 def validate(self):
     errors = []
     if self.package_validation:
         errors = self.package_validation.validate(self.source_dir)
     if len(errors) > 0:
         msg = 'Invalid package directory: %s' % (self.source_dir, )
         raise (OPennException('\n'.join([msg] + errors)))
示例#15
0
 def _get_prep_config_dict(self, tag):
     try:
         return self._prep_configs[tag]
     except KeyError:
         msg = "Could not find prep_config_dict for tag '%s' (known: %s)"
         msg = msg % (tag,self.prep_config_tags())
         raise OPennException(msg)
示例#16
0
 def folder(self):
     oprepo = self.repository()
     if oprepo is not None:
         return oprepo.long_id()
     else:
         msg = "RepositoryWrapper with tag '%s' is not in db; has no folder"
         raise OPennException(msg % self.tag)
示例#17
0
    def regen_partial_tei(self, doc, **kwargs):
        xlsx_path = None

        data_dir = kwargs.get('METADATA_DIR', None)

        if data_dir is None:
            msg = 'METADATA_DIR is required to update TEI (document ID: %d)'
            raise OPennException(msg % (self.document.id,))

        # copy the xlsx file into the source_dir as openn_metadata.xlsx
        xlsx_path = os.path.abspath(os.path.join(data_dir, 'openn_metadata.xlsx'))
        dest = os.path.abspath(self.xlsx_path)
        if xlsx_path == dest:
            pass
        else:
            shutil.copyfile(xlsx_path, dest)

        self.write_openn_xml(self.openn_xml_path())
        partial_tei = self.gen_partial_tei()
        # xxxxx
        self.write_partial_tei(self.source_dir, partial_tei)
        self.check_page_count(self.openn_xml_path(), doc)
        self.update_serial_numbers(self.openn_xml_path(), doc)
        self.validate_partial_tei()
        self.archive_xlsx()
        self.add_removal(self.openn_xml_path())
        self.add_removal(self.xlsx_path)
示例#18
0
 def get_bibid(self):
     bibid = open(self.bibid_filename).read().strip()
     if not re.match('\d+$', bibid):
         raise OPennException("Bad BibID; expected only digits; found: '%s'" % bibid)
     if len(bibid) > 7:
         return bibid
     else:
         return '99%s3503681' % (str(bibid),)
示例#19
0
def main(cmdline=None):
    """op-prep main
    """
    status = 0
    parser = make_parser()

    opts, args = parser.parse_args(cmdline)

    if len(args) < 2 or len(args) > 3:
        parser.error('Wrong number of arguments')

    # Prep config is required, b/c only some prep methods implement
    # TEI regeneration.
    prep_config_tag = args[0]
    doc_id          = args[1]
    metadata_dir    = None
    if len(args) > 2:
        metadata_dir = args[2]

    setup_logger()
    logger = logging.getLogger(__name__)

    try:
        prep_config = get_prep_config(prep_config_tag)
        doc = Document.objects.get(pk=doc_id)
        output_dir = os.path.join(opts.out_dir, doc.base_dir)
        if os.path.exists(output_dir):
            raise OPennException("Output directory already exists: %s" % (output_dir))
        else:
            os.mkdir(output_dir)

        kwargs = {}
        if metadata_dir is not None:
            if os.path.exists(metadata_dir):
                kwargs['METADATA_DIR'] = metadata_dir
            else:
                raise OPennException("Cannot find METADATA_DIR: '%s'" % (metadata_dir,))

        OPennPrep().update_tei(output_dir, doc, prep_config, **kwargs)
    except OPennException as ex:
        if opts.verbose:
            opfunc.print_exc()
        status = 4
        parser.error(str(ex))

    return status
示例#20
0
 def get_tei(self, document):
     try:
         return OPennTEI(document.tei_xml)
     except OPennException as oex:
         msg = "Error processing document: id: %d, base_dir: '%s'" % (
             document.id, document.base_dir)
         self.logger.error(msg)
         raise OPennException(msg, oex, str(oex))
示例#21
0
 def image_types(self):
     try:
         return self._repo_prep_dict['image_types']
     except KeyError:
         msg = "Cannot find required PREP_CONFIG parameter 'image_type'"
         msg += " in dict %s"
         msg = msg % (json.dumps(self._repo_prep_dict), )
         raise OPennException(msg)
示例#22
0
    def get_method_config(self, tag):
        for cfg in self._prep_methods:
            if cfg.get('tag', False) == tag:
                return cfg

        msg = "Could not find prep method for tag '%s' (known %s)"
        msg = msg % (tag, ', '.join(self.known_tags()))
        raise OPennException(msg)
示例#23
0
 def validate(self):
     for sheet in self.sheets():
         sheet.validate()
         if self.has_metadata_errors():
             msg = [
                 "Errors found in metadata for workbook: %s" %
                 (self.xlsx_path, )
             ] + self.metadata_errors()
             raise OPennException('\n'.join(msg))
示例#24
0
def prep_source_dir_arg(source_dir):
    if source_dir.strip().endswith('/'):
        source_dir = source_dir[:-1]

    if not os.path.exists(source_dir):
        msg = "SOURCE_DIR does not exist: %s" % source_dir
        raise OPennException(msg)

    return source_dir
示例#25
0
    def validate(self):
        msgs = []
        msgs += self.validate_unique_fields()
        msgs += self.validate_required_fields()

        if len(msgs) > 0:
            msgs = ["    %s" % (x) for x in msgs]
            msgs.insert(0, "Errors found in configurations:")
            msg = "\n".join(msgs)
            raise OPennException(msg)
示例#26
0
    def validate_file_lists(self):
        for sheet in self.sheets():
            sheet.validate_file_lists()

            if self.has_file_errors():
                msg = [
                    "Errors found checking files in workbook: %s" %
                    (self.xlsx_path, )
                ] + self.file_errors()
                raise OPennException('\n'.join(msg))
示例#27
0
def rewrite_manifest(doc, source_dir):
    manifest_path = os.path.join(source_dir, 'manifest-sha1.txt')
    tei_rel_path = os.path.join('data', doc.tei_basename)
    tei_full_path = os.path.join(source_dir, tei_rel_path)
    marc_rel_path = os.path.join('data', 'marc.xml')
    marc_full_path = os.path.join(source_dir, marc_rel_path)

    if not os.path.exists(tei_full_path):
        raise OPennException("No TEI file found at %s" % (tei_full_path, ))

    tei_sha1 = hashlib.sha1()
    with open(tei_full_path, 'rb') as tei:
        tei_sha1.update(tei.read())
    tei_digest = tei_sha1.hexdigest()

    if os.path.exists(marc_full_path):
        marc_sha1 = hashlib.sha1()
        with open(marc_full_path, 'rb') as marc:
            marc_sha1.update(marc.read())
        marc_digest = marc_sha1.hexdigest()

    with open(manifest_path, "r") as manifest:
        lines = manifest.readlines()

    # make sure we need to update the manifest;
    tei_line_re = r'^%s +%s' % (tei_digest, tei_rel_path)
    for line in lines:
        if re.search(tei_line_re, line):
            raise OPennException("Manifest already up-to-date")

    with open(manifest_path, "w") as manifest:
        for line in lines:
            parts = re.split('\s+', line.strip(), 1)
            if len(parts) < 2:
                continue
            file = parts[1]
            if file == tei_rel_path:
                manifest.write("%s  %s\n" % (tei_digest, tei_rel_path))
            elif file == marc_rel_path and marc_digest is not None:
                manifest.write("%s  %s\n" % (marc_digest, marc_rel_path))
                logger.info('Writing marc_digest: %s' % (marc_digest, ))
            else:
                manifest.write(line)
示例#28
0
def validate_source_dir(prep_method, source_dir):
    validation_params = prep_method.package_validations()

    if validation_params is None:
        return
    validator = PackageValidation(**validation_params)
    errors = validator.validate(source_dir)
    if len(errors) > 0:
        msg = 'Invalid package directory: %s' % (source_dir, )
        raise (OPennException('\n'.join([msg] + errors)))
示例#29
0
    def gen_partial_tei(self):
        xsl_command = 'op-gen-tei'
        p = subprocess.Popen([xsl_command, self.openn_xml_path(), self._xsl],
                stderr=subprocess.PIPE,
                stdout=subprocess.PIPE)
        out, err = p.communicate()
        if p.returncode != 0:
            raise OPennException("TEI Generation failed: %s" % err)

        return out
示例#30
0
    def repository(self):
        try:
            if self._repository is None:
                self._repository = Repository.objects.get(tag=self.tag())
        except Repository.DoesNotExist:
            repos = [x.tag for x in Repository.objects.all()]
            raise OPennException(
                "Could not find repository for tag: %s; repos: %s" %
                (self.tag(), ', '.join(repos)))

        return self._repository