def _extract_pdf_forms(self, fname): """Extracts interactive form fields data from a PDF file. Parameters: fname (str): Path to PDF file. Returns: dict: Form fields data extracted. """ f = PdfFileReader(fname) return f.getFields()
def getFormFields(filepath): """ Get field information of pdf file :param filepath: filepath to pdf file :return: list with tuples of (field name, field type) as counter to find multi fields """ fields = [] with open(filepath, 'rb') as f: reader = PdfFileReader(f) for k, v in reader.getFields().iteritems(): if re.search('^\s*[0-9]', v['/T']): fields.append(("_".join(v['/T'].split('_')[1:]), FIELDTYPES[v['/FT']])) else: fields.append((v['/T'], FIELDTYPES[v['/FT']])) ret = OrderedDict() for k, v in iter(sorted(Counter(fields).iteritems())): ret[k[0].encode('utf-8')] = (k[1].encode('utf-8'), v) return ret
def getFormFields(filepath): """ Get field information of pdf file :param filepath: filepath to pdf file :return: list with tuples of (field name, field type) as counter to find multi fields """ fields = [] with open(filepath, 'rb') as f: reader = PdfFileReader(f) for k, v in reader.getFields().iteritems(): if re.search('^\s*[0-9]', v['/T']): fields.append( ("_".join(v['/T'].split('_')[1:]), FIELDTYPES[v['/FT']])) else: fields.append((v['/T'], FIELDTYPES[v['/FT']])) ret = OrderedDict() for k, v in iter(sorted(Counter(fields).iteritems())): ret[k[0].encode('utf-8')] = (k[1].encode('utf-8'), v) return ret
def draw_rectangles_for_solution(self, f_in, f_out, solution, points): """Drawing green filled rectangles near the correct answers for every problem, in order to indicate the correct solution. It calculates and writes the total score achieved too. Parameters: f_in (str): Path to the input PDF file. f_out (str): Path to the output PDF file. solution (dict): The solution (correct answers) corresponding to the input PDF file (f_in). points (float): Total points achieved. """ pr = PdfFileReader(f_in) dest = pr.getNamedDestinations() fields = pr.getFields() # """IT IS NOT WORKING IF THIS COMES FIRST:""" # a = PdfAnnotator(f_in) # for p in range(pr.getNumPages()): # for dk, dv in dest.items(): # if pr.getDestinationPageNumber(dv) == p and dk.startswith('ht_'): # inds = [int(ind) for ind in dk[3:].split(':')] # # if inds[2] in solution[inds[1]][1]: # if inds[4] in solution[inds[1]][1]: # # using some hard-coded values: # a.add_annotation('square', # Location(x1=float(dv['/Left']), y1=float(dv['/Top']), x2=float(dv['/Left'])+5, y2=float(dv['/Top'])+5, page=p), # Appearance(stroke_color=(0, 1, 0), stroke_width=5),) # a.write(f_out) pw = PdfFileWriter() # pr = PdfFileReader(f_out, strict=False) pr = PdfFileReader(f_in, strict=False) pw.appendPagesFromReader(pr) pw._root_object.update( {NameObject("/AcroForm"): pr.trailer["/Root"]["/AcroForm"]}) pw._root_object["/AcroForm"].update( {NameObject("/NeedAppearances"): BooleanObject(True)}) for p in range(pr.getNumPages()): self._update_page_form_checkbox_values(pw.getPage(p), { fk: fv['/V'] for fk, fv in fields.items() if '/V' in fv.keys() }) # sometimes '/V' disappears from the keys self._update_page_form_checkbox_values(pw.getPage(0), {'points': str(points)}) f = codecs.open(f_out, 'wb') pw.write(f) f.close() a = PdfAnnotator(f_out) for p in range(pr.getNumPages()): for dk, dv in dest.items(): if pr.getDestinationPageNumber(dv) == p and dk.startswith( 'ht_'): inds = [int(ind) for ind in dk[3:].split(':')] # if inds[2] in solution[inds[1]][1]: if inds[4] in solution[inds[1]][1]: # using some hard-coded values: a.add_annotation( 'square', Location(x1=float(dv['/Left']), y1=float(dv['/Top']), x2=float(dv['/Left']) + 5, y2=float(dv['/Top']) + 5, page=p), Appearance(stroke_color=(0, 1, 0), stroke_width=5), ) a.write(f_out)