Exemplo n.º 1
0
 def extract_entries(rows):
     for cols in rows:
         labels = {}
         for (i,lab) in enumerate(header):
             if lab != None and (cols[i] == '1' or cols[i] == '0'):
                 labels[lab] = bool(int(cols[i]))
         ids = []
         if options.idfields != None:
             ids = [myutils.int_if(cols[i]) for i in options.idfields]
         else:
             ids = [idbase]
         
         text = None
         ent = {'entry': {'id': dict(zip([header[i] for i in options.idfields], ids))}}
         if options.labels != None and labels != {}:
             ent.update({'labels': labels})
         ent['entry'].update({'content': {'added': [],
                                          'removed': []},
                              'comment': ''})
         if options.textfield != None:
             ent['entry']['content']['added'] = [cols[options.textfield].decode('UTF-8')]
         yield (cols,ent)
Exemplo n.º 2
0
    db = collection['talkpage_diffs_raw']

    if options.idfields != None:
        digits = re.compile('\d+')
        table = filter(lambda x: reduce(lambda s,y: s and y, [digits.match(x[i]) for i in options.idfields]), table)

    if options.overwrite and options.idfields != None:
        # get existing entries
        existings = {}
        query = {}
        for i in options.idfields:
            query['entry.id.' + header[i]] = {'$exists': True}
        query['entry.content'] = {'$exists': True}
        for x in db.find(query, {'entry.id': 1, 'entry.content': 1}):
            existings[tuple([x['entry']['id'][header[i]] for i in options.idfields])] = True
        table = filter(lambda x: not existings.has_key(tuple([myutils.int_if(x[i]) for i in options.idfields])), table)
    if options.overwrite and options.idfields == None:
        print >>sys.stderr, 'overwrite requires idfields'

    def extract_entries(rows):
        for cols in rows:
            labels = {}
            for (i,lab) in enumerate(header):
                if lab != None and (cols[i] == '1' or cols[i] == '0'):
                    labels[lab] = bool(int(cols[i]))
            ids = []
            if options.idfields != None:
                ids = [myutils.int_if(cols[i]) for i in options.idfields]
            else:
                ids = [idbase]