def retrieve_many(legislature=max(LEGISLATURES), limit=300, force=False): dir_ = PDF.LOCAL_DIR % {'legislature':legislature} if not os.path.exists(dir_): os.makedirs(dir_) fnames_existing = os.listdir(dir_) if force or not fnames_existing: start = 0 else: start = max(PDF.from_filename(f).ref for f in fnames_existing) refs = xrange(start + 1, start + limit) pdfs = map_async(lambda ref: PDF(ref, legislature).retrieve(), refs) retrieved = [pdf.ref for pdf in filter(None, pdfs)] if retrieved: logger.info(u'copié %i pdfs (legislature %s, refs %s à %s)' % \ (len(retrieved), legislature, retrieved[0], retrieved[-1])) return retrieved
def parse(refs=None, force=False, limit=None): if refs is None: q = Session.query(ObjetParlementaire) #.options(defer('description', 'beschreibung')) if not force: q = q.filter(ObjetParlementaire.etat==None) if limit: q = q.limit(limit) refs = [o.ref for o in q] dicts = map_async(parse_objet, refs) for ref, vals in zip(refs, dicts): o = ObjetParlementaire.get_or_create(ref) for k,v in vals.iteritems(): setattr(o, k, v) Session.commit() logger.info('Updated %i `ObjetParlementaire`s' % len(refs))
try: matches = lookup_bio_id(*args) except Exception, e: logger.info('bio_id error: %s' % e) return nb = len(set(matches)) if nb == 1: pers.bio_id = bio_id = matches[0] return bio_id elif nb == 0: logger.info('bio_id - no match for %s', args) else: logger.info('bio_id - %i matches for %s (%s)', nb,args,matches) bio_ids = map_async(update_one, q, poolsize=100) Session.commit() logger.info('Updated %i bio-ids from web.' % len(filter(None, bio_ids))) if use_lookup_table: nb_from_lt = 0 for ident, i in bio_ids_lookup_table.iteritems(): p = db.Personne.query.get(ident) if force or p.bio_id is None: p.bio_id = i logger.info('bio_id - applying lookup table value (%s) for %s', (i, p)) nb_from_lt += 1 Session.commit() logger.info('Updated %i bio-ids from lookup table.' % nb_from_lt)