Пример #1
0
def clean_headers(xd):
    # remove known unwanted header fields, log unknown headers
    for hdr in list(xd.headers.keys()):
        if hdr in ["Source", "Identifier", "Acquired", "Issued", "Category"]:
            xd.set_header(hdr, None)
        else:
            if hdr.lower() not in xdfile.HEADER_ORDER:
                utils.warn("%s: '%s' header not known: '%s'" %
                           (xd.filename, hdr, xd.headers[hdr]))

    # clean Author and Editor headers
    author = xd.get_header("Author") or ""
    if not author:
        if xd.get_header("Creator"):
            assert not author
            author = xd.get_header("Creator")
            xd.set_header("Creator", None)

    editor = xd.get_header("Editor") or ""

    newauthor, neweditor = clean_author(author, editor)

    if newauthor != author:
        xd.set_header("Author" + CLEAN_SUFFIX, newauthor)

    if neweditor != editor:
        xd.set_header("Editor" + CLEAN_SUFFIX, neweditor)

    # clean Title header
    title = xd.get_header("Title") or ""
    newtitle = clean_title(title)

    if newtitle != title:
        xd.set_header("Title" + CLEAN_SUFFIX, newtitle)
    # create Date header
    dt = xd.get_header("Date")

    ## try getting Date from filename
    if not dt:
        try:
            d = utils.parse_date_from_filename(xd.filename)
            if d:
                dt = d.strftime("%Y-%m-%d")
        except Exception as e:
            utils.error(str(e))
            if args.debug:
                raise

    ## try getting Date from copyright
    if not dt:
        rights = xd.get_header("Copyright") or ""
        dt = find_date(rights)

    if dt:
        xd.set_header("Date", dt)
Пример #2
0
def clean_headers(xd):
    # remove known unwanted header fields, log unknown headers
    for hdr in list(xd.headers.keys()):
        if hdr in ["Source", "Identifier", "Acquired", "Issued", "Category"]:
            xd.set_header(hdr, None)
        else:
            if hdr.lower() not in xdfile.HEADER_ORDER:
                utils.warn("%s: '%s' header not known: '%s'" % (xd.filename, hdr, xd.headers[hdr]))

    # clean Author and Editor headers
    author = xd.get_header("Author") or ""
    if not author:
        if xd.get_header("Creator"):
            assert not author
            author = xd.get_header("Creator")
            xd.set_header("Creator", None)

    editor = xd.get_header("Editor") or ""

    newauthor, neweditor = clean_author(author, editor)

    if newauthor != author:
        xd.set_header("Author" + CLEAN_SUFFIX, newauthor)

    if neweditor != editor:
        xd.set_header("Editor" + CLEAN_SUFFIX, neweditor)

    # clean Title header
    title = xd.get_header("Title") or ""
    newtitle = clean_title(title)

    if newtitle != title:
        xd.set_header("Title" + CLEAN_SUFFIX, newtitle)
    # create Date header
    dt = xd.get_header("Date")

    ## try getting Date from filename
    if not dt:
        try:
            d = utils.parse_date_from_filename(xd.filename)
            if d:
                dt = d.strftime("%Y-%m-%d")
        except Exception as e:
            utils.error(str(e))
            if args.debug:
                raise

    ## try getting Date from copyright
    if not dt:
        rights = xd.get_header("Copyright") or ""
        dt = find_date(rights)

    if dt:
        xd.set_header("Date", dt)
Пример #3
0
def deduce_set_seqnum(xd):
    # look to filename
    base = utils.parse_pathname(xd.filename).base

    # check for date
    dt = utils.parse_date_from_filename(base)  # datetime object
    if dt:
        xd.set_header("Date", dt)
    else:
        # check for number in full path (eltana dir had number)
        m = re.search(r'(\d+)', xd.filename)
        if m:
            xd.set_header("Number", int(m.group(1)))
Пример #4
0
def deduce_set_seqnum(xd):
    # look to filename
    base = utils.parse_pathname(xd.filename).base

    # check for date
    dt = utils.parse_date_from_filename(base)  # datetime object
    if dt:
        xd.set_header("Date", dt)
    else:
        # check for number in full path (eltana dir had number)
        m = re.search(r'(\d+)', xd.filename)
        if m:
            xd.set_header("Number", int(m.group(1)))