示例#1
0
def process_TOC(x):
    res = []
    for e in split_subfields(x):
        if len(e)<9000:
            e = e.replace("--\n", '-- ')
            res.append({'tag': '505', 'ind1': '0', 'ind2': ' ', 'subs': {'a': e}})
    return res
示例#2
0
def process_STAMP(x):
    parts = split_subfields(x)
    subfields = {}
    subfields['a'] = parts[0]
    if len(parts) > 1:
        subfields['b'] = parts[1]
    if len(parts) > 2:
        subfields['c'] = parts[2]
    return [{'tag': '129', 'ind1': ' ', 'ind2': ' ', 'subs': subfields}]
示例#3
0
 def _process_UP(x):
     parts = split_subfields(x)
     subfields = {}
     subfields['a'] = parts[0]
     if len(parts) == 2:
         subfields['b'] = format_date(parts[1].lower(), '%d %b %Y')
     elif len(parts) > 2:
         subfields['b'] = format_date(parts[1], '%Y %m %d')
         subfields['c'] = parts[2]
     return [{'tag': tag_number, 'ind1': ' ', 'ind2': ' ', 'subs': subfields}]
示例#4
0
def process_language(v):
    try:
        l = split_subfields(v)
        res = []
        for lang in l:
            res.append({'tag': '337', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': languages[lang]}})
        return res
    except Exception as e:
        print e
        return []
示例#5
0
def calc_nonfiling_ind(v, lang):
    if lang == None:
        lang = ''
    lang = split_subfields(lang)[0]
    nonfiling = [e for e in nonfiling_words.get(languages[lang], []) + nonfiling_chars]
    a = v.lower()
    a = a.encode('utf8')
    ind = 0
    while a[ind:].startswith(tuple(nonfiling)):
        for n in nonfiling:
            if a[ind:].startswith(n):
                ind = ind + len(n)
    return ind
示例#6
0
def process_ROUTE(x):
    l = split_subfields(x)
    l = [l[i:i+3] for i in xrange(0, len(l), 3)]
    res = []
    i = 1
    for routee in l:
        subs = {}
        subs['a'] = str(i)
        subs['b'] = routee[0]
        if len(routee)>1:
            subs['c'] = routee[1]
        if len(routee)>2:
            subs['d'] = routee[2]
        res.append({'tag': '122', 'ind1': ' ', 'ind2': ' ', 'subs': subs})
        i = i + 1
    return res
示例#7
0
def process_ISSUE(x):
    issues = split_subfields(x, as_dict=True)
    res = []
    template = {
    '8': '1',
    'a': 'v.',
    'b': 'no.',
    'i': '(year)',
    'j': '(month)',
    }
    res.append({'tag': '853', 'ind1': ' ', 'ind2': '3', 'subs': template})
    for issue in issues:
        subs = {}
        for s, v in issue.items():
            if s in fix_ISSUES:
                (k, value) = fix_ISSUES[s](v)
                subs[k] = value
        res.append({'tag': '863', 'ind1': ' ', 'ind2': ' ', 'subs': subs})
    return res
示例#8
0
def process_subject(v):
    letters = ['a', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x']
    l = split_subfields(v)
    res = []
    for line in l:
        k = [ e.strip() for e in line.split('--')]

        # subs = {}
        subs = []
        for i in range(len(k)):
            try:
                subject = k[i]
                letter = letters[i]
                subs.append(letter)
                subs.append(subject.strip('|'))
                # subs[letter] = subject.strip('|')
            except Exception as e:
                if i > 10:
                    print e
                    print (str(i) + '\t' + str(v)).encode('utf-8')
        res.append({'tag': '650', 'ind1': ' ', 'ind2': '0', 'subs': subs})
    # print res
    # print '==================================='
    return res
示例#9
0
    'Visual item': 'Monographic item',
    'Test Type': 'Monographic item',
}
itype_fix = {
    'VIDEORECORDING': 'Videorecording',
    'Motion picture': 'Videorecording',
    'Conference publication': 'Proceeding',
    'PROCEEDING': 'Proceeding',
    'PRINT & ELECTRONIC': 'Print & Electronic',
}

mapping = defaultdict(lambda: None)

mapping['RECID'] = lambda x: [{'tag': '999', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
mapping['ID'] =    lambda x: [{'tag': '999', 'ind1': ' ', 'ind2': ' ', 'subs': {'b': x}}]
mapping['RTYPE'] = lambda x: [{'tag': '998', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': rtype_fix.get(e, e)}} for e in split_subfields(x)]
mapping['MTYPE'] = lambda x: [{'tag': '998', 'ind1': ' ', 'ind2': ' ', 'subs': {'b': mtype_fix.get(e, e)}} for e in split_subfields(x)]
mapping['ITYPE'] = lambda x: [{'tag': '998', 'ind1': ' ', 'ind2': ' ', 'subs': {'c': itype_fix.get(e, e)}} for e in split_subfields(x)]
mapping['NUM'] =   process_nums
mapping['UDC'] =   lambda x: [{'tag': '080', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)]
mapping['CUT'] =   lambda x: [{'tag': '080', 'ind1': ' ', 'ind2': ' ', 'subs': {'b': e}} for e in split_subfields(x)]
mapping['AU'] =    process_author #lambda x: [{'tag': '100', 'ind1': '1', 'ind2': ' ', 'subs': {'a' : e}} for e in split_subfields(x)]
mapping['CAU'] =   process_corporate_author #lambda x: [{'tag': '110', 'ind1': ' ', 'ind2': '2', 'subs': {'a': x}}]
mapping['EDIT'] =  lambda x: [{'tag': '700', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e, 'e': 'editor'}} for e in split_subfields(x)]
mapping['TI'] =    process_title #lambda x: [{'tag': '245', 'ind1': '0', 'ind2': '0', 'subs': {'a' : e}} for e in split_subfields(x)]
mapping['ATI'] =   lambda x: [{'tag': '246', 'ind1': '3', 'ind2': '0', 'subs': {'a': e}} for e in split_subfields(x)]
mapping['PUB'] =   process_publication #lambda x: [{'tag': '264', 'ind1': ' ', 'ind2': '1', 'subs': {'a' : e}} for e in split_subfields(x)]
mapping['BDATE'] = lambda x: [{'tag': '264', 'ind1': ' ', 'ind2': '1', 'subs': {'c': e}} for e in split_subfields(x)]
mapping['DES'] =   process_description
mapping['LANG'] =  lambda x: [{'tag': '041', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': languages[e]}} for e in split_subfields(x)]
mapping['SERIE'] = lambda x: [{'tag': '490', 'ind1': '0', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)]
示例#10
0
def process_HOLD(x):
    res = [{'tag': '866', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)]
    if len(res) > 1:
        return res[-1]
    else:
        return res
示例#11
0
    return res

holdings = get_full_dict('T.SERHOLD.json')
serials =  get_full_dict('T.SERIALS.json')
ALL = holdings
# ALL = {}
# for (k, v) in holdings.items():
#     new = v.copy()
#     for (i, j) in serials[k].items():
#         new[i] = j
#     ALL[k] = new

mapping = defaultdict(lambda: None)
mapping['ACQNO'] = lambda x: [{'tag': '100', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
# mapping['ACTIV'] = lambda x: [{'tag': '101', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}]
mapping['AU'] =    lambda x: [{'tag': '102', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)]
mapping['CITED'] = lambda x: [{'tag': '103', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)]
mapping['COPY'] =  lambda x: [{'tag': '104', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
# mapping['CTI'] =   lambda x: [{'tag': '105', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}]
mapping['DES'] =   lambda x: [{'tag': '106', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
mapping['ED'] =    lambda x: [{'tag': '107', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
# mapping['EXCLM'] = lambda x: [{'tag': '108', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}]
mapping['EXP'] =   lambda x: [{'tag': '109', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
mapping['FREQ'] =  lambda x: [{'tag': '110', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': fix_FREQ.get(x, x)}}]
mapping['HOLD'] =  lambda x: [{'tag': '111', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)]
mapping['ISSN'] =  lambda x: [{'tag': '112', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
mapping['ISSUE'] = process_ISSUE #lambda x: [{'tag': '113', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}]
mapping['LANG'] =  lambda x: [{'tag': '114', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': languages[e.title()]}} for e in flatten_list([a.split('/') for a in split_subfields(x)])]
mapping['OS'] =    lambda x: [{'tag': '115', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
mapping['PDATE'] = lambda x: [{'tag': '116', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}]
mapping['PNOTE'] = lambda x: [{'tag': '117', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': '; '.join(x.strip(whitespace+'|').split('\n        |'))}}]