Exemplo n.º 1
0
def load_error_mail(ia, marc_display, subject):
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    subject += ': ' + ia
    msg = 'http://www.archive.org/details/%s\n' % ia
    msg += 'http://www.archive.org/download/%s\n'
    msg += '\n' + bad_binary
    error_mail(msg_from, msg_to, subject, msg)
Exemplo n.º 2
0
def load_error_mail(ia, marc_display, subject):
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    subject += ': ' + ia
    msg = 'http://www.archive.org/details/%s\n' % ia
    msg += 'http://www.archive.org/download/%s\n'
    msg += '\n' + bad_binary
    error_mail(msg_from, msg_to, subject, msg)
Exemplo n.º 3
0
def load_error_mail(ia, marc_display, subject):
    msg_from = "*****@*****.**"
    msg_to = ["*****@*****.**"]
    subject += ": " + ia
    msg = "http://www.archive.org/details/%s\n" % ia
    msg += "http://www.archive.org/download/%s\n"
    msg += "\n" + bad_binary
    error_mail(msg_from, msg_to, subject, msg)
Exemplo n.º 4
0
def bad_marc_alert(bad_marc):
    assert bad_marc
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    subject = '%d bad MARC' % len(bad_marc)
    msg = '\n'.join(('http://www.archive.org/details/%s\n' +
                     'http://www.archive.org/download/%s\n\n' + '%s\n\n') %
                    (ia, ia, repr(data)) for ia, data in bad_marc)
    error_mail(msg_from, msg_to, subject, msg)
Exemplo n.º 5
0
def bad_marc_alert(bad_marc):
    assert bad_marc
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    subject = '%d bad MARC' % len(bad_marc)
    msg = '\n'.join((
        'http://www.archive.org/details/%s\n' +
        'http://www.archive.org/download/%s\n\n' +
        '%s\n\n') % (ia, ia, repr(data)) for ia, data in bad_marc)
    error_mail(msg_from, msg_to, subject, msg)
Exemplo n.º 6
0
def marc_formats(ia, host=None, path=None):
    files = {
        ia + '_marc.xml': 'xml',
        ia + '_meta.mrc': 'bin',
    }
    has = {'xml': False, 'bin': False}
    ending = 'files.xml'
    if host and path:
        url = 'http://%s%s/%s_%s' % (host, path, ia, ending)
    else:
        url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending
    for attempt in range(10):
        f = urlopen_keep_trying(url)
        if f is not None:
            break
        sleep(10)
    if f is None:
        msg_from = '*****@*****.**'
        msg_to = ['*****@*****.**']
        subject = "error reading %s_files.xml" % ia
        msg = url
        error_mail(msg_from, msg_to, subject, msg)
        return has
    data = f.read()
    try:
        root = etree.fromstring(data)
    except:
        print 'bad:', ` data `
        return has
    for e in root:
        name = e.attrib['name']
        if name in files:
            has[files[name]] = True
        if all(has.values()):
            break
    return has
Exemplo n.º 7
0
def find_author(name, send_mail=True):
    q = {'type': '/type/author', 'name': name, 'limit': 0}
    del q['limit']
    reply = list(ol.query(q))
    authors = [ol.get(k) for k in reply]
    if any(a['type'] != '/type/author' for a in authors):
        subject = 'author query redirect: ' + `q['name']`
        body = 'Error: author query result should not contain redirects\n\n'
        body += 'query: ' + `q` + '\n\nresult\n'
        if send_mail:
            for a in authors:
                if a['type'] == '/type/redirect':
                    body += a['key'] + ' redirects to ' + a['location'] + '\n'
                elif a['type'] == '/type/delete':
                    body += a['key'] + ' is deleted ' + '\n'
                elif a['type'] == '/type/author':
                    body += a['key'] + ' is an author: ' + `a['name']` + '\n'
                else:
                    body += a['key'] + 'has bad type' + a + '\n'
            addr = '*****@*****.**'
            error_mail(addr, [addr], subject, body)
        seen = set()
        authors = [walk_redirects(a, seen) for a in authors if a['key'] not in seen]
    return authors
Exemplo n.º 8
0
def marc_formats(ia, host=None, path=None):
    files = {
        ia + '_marc.xml': 'xml',
        ia + '_meta.mrc': 'bin',
    }
    has = { 'xml': False, 'bin': False }
    ending = 'files.xml'
    if host and path:
        url = 'http://%s%s/%s_%s' % (host, path, ia, ending)
    else:
        url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending
    for attempt in range(10):
        f = urlopen_keep_trying(url)
        if f is not None:
            break
        sleep(10)
    if f is None:
        msg_from = '*****@*****.**'
        msg_to = ['*****@*****.**']
        subject = "error reading %s_files.xml" % ia
        msg = url
        error_mail(msg_from, msg_to, subject, msg)
        return has
    data = f.read()
    try:
        root = etree.fromstring(data)
    except:
        print 'bad:', `data`
        return has
    for e in root:
        name = e.attrib['name']
        if name in files:
            has[files[name]] = True
        if all(has.values()):
            break
    return has
Exemplo n.º 9
0
def error_marc_403(ia):
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    msg = 'http://www.archive.org/details/' + ia
    subject = 'MARC 403: ' + ia
    error_mail(msg_from, msg_to, subject, msg)
Exemplo n.º 10
0
=======
        url = 'http://%s%s/%s_%s' % (host, path, ia, ending)
>>>>>>> a6e890c72315ff97b2f8a600f189fce28668fefe
    else:
        url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending
    for attempt in range(10):
        f = urlopen_keep_trying(url)
        if f is not None:
            break
        sleep(10)
    if f is None:
        msg_from = '*****@*****.**'
        msg_to = ['*****@*****.**']
        subject = "error reading %s_files.xml" % ia
        msg = url
        error_mail(msg_from, msg_to, subject, msg)
        return has
    data = f.read()
    try:
        root = etree.fromstring(data)
    except:
        print 'bad:', `data`
        return has
    for e in root:
        name = e.attrib['name']
        if name in files:
            has[files[name]] = True
        if all(has.values()):
            break
    return has
Exemplo n.º 11
0
def error_marc_403(ia):
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    msg = 'http://www.archive.org/details/' + ia
    subject = 'MARC 403: ' + ia
    error_mail(msg_from, msg_to, subject, msg)
Exemplo n.º 12
0
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    msg = 'http://www.archive.org/details/' + ia
    subject = 'MARC 403: ' + ia
    error_mail(msg_from, msg_to, subject, msg)


def bad_marc_alert(bad_marc):
    assert bad_marc
    msg_from = '*****@*****.**'
    msg_to = ['*****@*****.**']
    subject = '%d bad MARC' % len(bad_marc)
    msg = '\n'.join(('http://www.archive.org/details/%s\n' +
                     'http://www.archive.org/download/%s\n\n' + '%s\n\n') %
                    (ia, ia, ` data `) for ia, data in bad_marc)
    error_mail(msg_from, msg_to, subject, msg)


if __name__ == '__main__':
    fh_log = open(load_scribe_log, 'a')

    open(config.runtime_config['state_dir'] + '/load_scribe.pid',
         'w').write(os.getpid())
    start = open(state_file).readline()[:-1]
    bad_marc_last_sent = time()
    bad_marc = []

    while True:

        if args.item_id:
            db_iter = db.query(