def process_TOC(x): res = [] for e in split_subfields(x): if len(e)<9000: e = e.replace("--\n", '-- ') res.append({'tag': '505', 'ind1': '0', 'ind2': ' ', 'subs': {'a': e}}) return res
def process_STAMP(x): parts = split_subfields(x) subfields = {} subfields['a'] = parts[0] if len(parts) > 1: subfields['b'] = parts[1] if len(parts) > 2: subfields['c'] = parts[2] return [{'tag': '129', 'ind1': ' ', 'ind2': ' ', 'subs': subfields}]
def _process_UP(x): parts = split_subfields(x) subfields = {} subfields['a'] = parts[0] if len(parts) == 2: subfields['b'] = format_date(parts[1].lower(), '%d %b %Y') elif len(parts) > 2: subfields['b'] = format_date(parts[1], '%Y %m %d') subfields['c'] = parts[2] return [{'tag': tag_number, 'ind1': ' ', 'ind2': ' ', 'subs': subfields}]
def process_language(v): try: l = split_subfields(v) res = [] for lang in l: res.append({'tag': '337', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': languages[lang]}}) return res except Exception as e: print e return []
def calc_nonfiling_ind(v, lang): if lang == None: lang = '' lang = split_subfields(lang)[0] nonfiling = [e for e in nonfiling_words.get(languages[lang], []) + nonfiling_chars] a = v.lower() a = a.encode('utf8') ind = 0 while a[ind:].startswith(tuple(nonfiling)): for n in nonfiling: if a[ind:].startswith(n): ind = ind + len(n) return ind
def process_ROUTE(x): l = split_subfields(x) l = [l[i:i+3] for i in xrange(0, len(l), 3)] res = [] i = 1 for routee in l: subs = {} subs['a'] = str(i) subs['b'] = routee[0] if len(routee)>1: subs['c'] = routee[1] if len(routee)>2: subs['d'] = routee[2] res.append({'tag': '122', 'ind1': ' ', 'ind2': ' ', 'subs': subs}) i = i + 1 return res
def process_ISSUE(x): issues = split_subfields(x, as_dict=True) res = [] template = { '8': '1', 'a': 'v.', 'b': 'no.', 'i': '(year)', 'j': '(month)', } res.append({'tag': '853', 'ind1': ' ', 'ind2': '3', 'subs': template}) for issue in issues: subs = {} for s, v in issue.items(): if s in fix_ISSUES: (k, value) = fix_ISSUES[s](v) subs[k] = value res.append({'tag': '863', 'ind1': ' ', 'ind2': ' ', 'subs': subs}) return res
def process_subject(v): letters = ['a', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] l = split_subfields(v) res = [] for line in l: k = [ e.strip() for e in line.split('--')] # subs = {} subs = [] for i in range(len(k)): try: subject = k[i] letter = letters[i] subs.append(letter) subs.append(subject.strip('|')) # subs[letter] = subject.strip('|') except Exception as e: if i > 10: print e print (str(i) + '\t' + str(v)).encode('utf-8') res.append({'tag': '650', 'ind1': ' ', 'ind2': '0', 'subs': subs}) # print res # print '===================================' return res
'Visual item': 'Monographic item', 'Test Type': 'Monographic item', } itype_fix = { 'VIDEORECORDING': 'Videorecording', 'Motion picture': 'Videorecording', 'Conference publication': 'Proceeding', 'PROCEEDING': 'Proceeding', 'PRINT & ELECTRONIC': 'Print & Electronic', } mapping = defaultdict(lambda: None) mapping['RECID'] = lambda x: [{'tag': '999', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] mapping['ID'] = lambda x: [{'tag': '999', 'ind1': ' ', 'ind2': ' ', 'subs': {'b': x}}] mapping['RTYPE'] = lambda x: [{'tag': '998', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': rtype_fix.get(e, e)}} for e in split_subfields(x)] mapping['MTYPE'] = lambda x: [{'tag': '998', 'ind1': ' ', 'ind2': ' ', 'subs': {'b': mtype_fix.get(e, e)}} for e in split_subfields(x)] mapping['ITYPE'] = lambda x: [{'tag': '998', 'ind1': ' ', 'ind2': ' ', 'subs': {'c': itype_fix.get(e, e)}} for e in split_subfields(x)] mapping['NUM'] = process_nums mapping['UDC'] = lambda x: [{'tag': '080', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)] mapping['CUT'] = lambda x: [{'tag': '080', 'ind1': ' ', 'ind2': ' ', 'subs': {'b': e}} for e in split_subfields(x)] mapping['AU'] = process_author #lambda x: [{'tag': '100', 'ind1': '1', 'ind2': ' ', 'subs': {'a' : e}} for e in split_subfields(x)] mapping['CAU'] = process_corporate_author #lambda x: [{'tag': '110', 'ind1': ' ', 'ind2': '2', 'subs': {'a': x}}] mapping['EDIT'] = lambda x: [{'tag': '700', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e, 'e': 'editor'}} for e in split_subfields(x)] mapping['TI'] = process_title #lambda x: [{'tag': '245', 'ind1': '0', 'ind2': '0', 'subs': {'a' : e}} for e in split_subfields(x)] mapping['ATI'] = lambda x: [{'tag': '246', 'ind1': '3', 'ind2': '0', 'subs': {'a': e}} for e in split_subfields(x)] mapping['PUB'] = process_publication #lambda x: [{'tag': '264', 'ind1': ' ', 'ind2': '1', 'subs': {'a' : e}} for e in split_subfields(x)] mapping['BDATE'] = lambda x: [{'tag': '264', 'ind1': ' ', 'ind2': '1', 'subs': {'c': e}} for e in split_subfields(x)] mapping['DES'] = process_description mapping['LANG'] = lambda x: [{'tag': '041', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': languages[e]}} for e in split_subfields(x)] mapping['SERIE'] = lambda x: [{'tag': '490', 'ind1': '0', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)]
def process_HOLD(x): res = [{'tag': '866', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)] if len(res) > 1: return res[-1] else: return res
return res holdings = get_full_dict('T.SERHOLD.json') serials = get_full_dict('T.SERIALS.json') ALL = holdings # ALL = {} # for (k, v) in holdings.items(): # new = v.copy() # for (i, j) in serials[k].items(): # new[i] = j # ALL[k] = new mapping = defaultdict(lambda: None) mapping['ACQNO'] = lambda x: [{'tag': '100', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] # mapping['ACTIV'] = lambda x: [{'tag': '101', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}] mapping['AU'] = lambda x: [{'tag': '102', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)] mapping['CITED'] = lambda x: [{'tag': '103', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)] mapping['COPY'] = lambda x: [{'tag': '104', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] # mapping['CTI'] = lambda x: [{'tag': '105', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}] mapping['DES'] = lambda x: [{'tag': '106', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] mapping['ED'] = lambda x: [{'tag': '107', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] # mapping['EXCLM'] = lambda x: [{'tag': '108', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}] mapping['EXP'] = lambda x: [{'tag': '109', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] mapping['FREQ'] = lambda x: [{'tag': '110', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': fix_FREQ.get(x, x)}}] mapping['HOLD'] = lambda x: [{'tag': '111', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': e}} for e in split_subfields(x)] mapping['ISSN'] = lambda x: [{'tag': '112', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] mapping['ISSUE'] = process_ISSUE #lambda x: [{'tag': '113', 'ind1': ' ', 'ind2': ' ', 'subs': {'a' : x}}] mapping['LANG'] = lambda x: [{'tag': '114', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': languages[e.title()]}} for e in flatten_list([a.split('/') for a in split_subfields(x)])] mapping['OS'] = lambda x: [{'tag': '115', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] mapping['PDATE'] = lambda x: [{'tag': '116', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': x}}] mapping['PNOTE'] = lambda x: [{'tag': '117', 'ind1': ' ', 'ind2': ' ', 'subs': {'a': '; '.join(x.strip(whitespace+'|').split('\n |'))}}]