def extract_entries(rows): for cols in rows: labels = {} for (i,lab) in enumerate(header): if lab != None and (cols[i] == '1' or cols[i] == '0'): labels[lab] = bool(int(cols[i])) ids = [] if options.idfields != None: ids = [myutils.int_if(cols[i]) for i in options.idfields] else: ids = [idbase] text = None ent = {'entry': {'id': dict(zip([header[i] for i in options.idfields], ids))}} if options.labels != None and labels != {}: ent.update({'labels': labels}) ent['entry'].update({'content': {'added': [], 'removed': []}, 'comment': ''}) if options.textfield != None: ent['entry']['content']['added'] = [cols[options.textfield].decode('UTF-8')] yield (cols,ent)
db = collection['talkpage_diffs_raw'] if options.idfields != None: digits = re.compile('\d+') table = filter(lambda x: reduce(lambda s,y: s and y, [digits.match(x[i]) for i in options.idfields]), table) if options.overwrite and options.idfields != None: # get existing entries existings = {} query = {} for i in options.idfields: query['entry.id.' + header[i]] = {'$exists': True} query['entry.content'] = {'$exists': True} for x in db.find(query, {'entry.id': 1, 'entry.content': 1}): existings[tuple([x['entry']['id'][header[i]] for i in options.idfields])] = True table = filter(lambda x: not existings.has_key(tuple([myutils.int_if(x[i]) for i in options.idfields])), table) if options.overwrite and options.idfields == None: print >>sys.stderr, 'overwrite requires idfields' def extract_entries(rows): for cols in rows: labels = {} for (i,lab) in enumerate(header): if lab != None and (cols[i] == '1' or cols[i] == '0'): labels[lab] = bool(int(cols[i])) ids = [] if options.idfields != None: ids = [myutils.int_if(cols[i]) for i in options.idfields] else: ids = [idbase]