def ocr_answer(answer_id): answer = models.Answer.query.get(answer_id) pages = ocr_url(answer.pdf_url) answer.text = '\n\n'.join(pages) models.db.session.add(answer) models.db.session.commit() logger.info("done OCR for %s (%d pages)", answer, len(pages))
def ocr_proposal(proposal_id, autoanalyze=False): proposal = models.Proposal.query.get(proposal_id) pages = ocr_url(proposal.pdf_url) proposal.text = '\n\n'.join(pages) models.db.session.commit() logger.info("done OCR for %s (%d pages)", proposal, len(pages)) if autoanalyze: sponsorships = proposal.sponsorships.all() logger.info("scheduling analysis for %d mandates", len(sponsorships)) for sp in sponsorships: mandate = sp.mandate if not mandate.minority: if (mandate.county is None or mandate.county.geonames_code is None): continue analyze_sponsorship.delay(sp.id)
def ocr_question(question_id, autoanalyze=False): question = models.Question.query.get(question_id) pages = ocr_url(question.pdf_url) question.text = '\n\n'.join(pages) models.db.session.add(question) models.db.session.commit() logger.info("done OCR for %s (%d pages)", question, len(pages)) if autoanalyze: asked = question.asked.all() logger.info("scheduling analysis for %d mandates", len(asked)) for ask in asked: mandate = ask.mandate if not mandate.minority: if (mandate.county is None or mandate.county.geonames_code is None): continue analyze.delay(ask.id)