Пример #1
0
 def _extract_pdf_forms(self, fname):
     """Extracts interactive form fields data from a PDF file.
     
     Parameters:
         fname (str): Path to PDF file.
     
     Returns:
         dict: Form fields data extracted.
     """
     f = PdfFileReader(fname)
     return f.getFields()
Пример #2
0
def getFormFields(filepath):
    """
    Get field information of pdf file
    :param filepath: filepath to pdf file
    :return: list with tuples of (field name, field type) as counter to find multi fields
    """
    fields = []
    with open(filepath, 'rb') as f:
        reader = PdfFileReader(f)

        for k, v in reader.getFields().iteritems():
            if re.search('^\s*[0-9]', v['/T']):
                fields.append(("_".join(v['/T'].split('_')[1:]), FIELDTYPES[v['/FT']]))
            else:
                fields.append((v['/T'], FIELDTYPES[v['/FT']]))

        ret = OrderedDict()
        for k, v in iter(sorted(Counter(fields).iteritems())):
            ret[k[0].encode('utf-8')] = (k[1].encode('utf-8'), v)
        return ret
Пример #3
0
def getFormFields(filepath):
    """
    Get field information of pdf file
    :param filepath: filepath to pdf file
    :return: list with tuples of (field name, field type) as counter to find multi fields
    """
    fields = []
    with open(filepath, 'rb') as f:
        reader = PdfFileReader(f)

        for k, v in reader.getFields().iteritems():
            if re.search('^\s*[0-9]', v['/T']):
                fields.append(
                    ("_".join(v['/T'].split('_')[1:]), FIELDTYPES[v['/FT']]))
            else:
                fields.append((v['/T'], FIELDTYPES[v['/FT']]))

        ret = OrderedDict()
        for k, v in iter(sorted(Counter(fields).iteritems())):
            ret[k[0].encode('utf-8')] = (k[1].encode('utf-8'), v)
        return ret
Пример #4
0
    def draw_rectangles_for_solution(self, f_in, f_out, solution, points):
        """Drawing green filled rectangles near the correct answers for every problem,
        in order to indicate the correct solution.
        It calculates and writes the total score achieved too.
        
        Parameters:
            f_in (str): Path to the input PDF file.
            f_out (str): Path to the output PDF file.
            solution (dict): The solution (correct answers) corresponding to the input PDF file (f_in).
            points (float): Total points achieved.
        """
        pr = PdfFileReader(f_in)
        dest = pr.getNamedDestinations()
        fields = pr.getFields()

        # """IT IS NOT WORKING IF THIS COMES FIRST:"""
        #         a = PdfAnnotator(f_in)
        #         for p in range(pr.getNumPages()):
        #             for dk, dv in dest.items():
        #                 if pr.getDestinationPageNumber(dv) == p and dk.startswith('ht_'):
        #                     inds = [int(ind) for ind in dk[3:].split(':')]
        # #                     if inds[2] in solution[inds[1]][1]:
        #                     if inds[4] in solution[inds[1]][1]:
        #                         # using some hard-coded values:
        #                         a.add_annotation('square',
        #                                          Location(x1=float(dv['/Left']), y1=float(dv['/Top']), x2=float(dv['/Left'])+5, y2=float(dv['/Top'])+5, page=p),
        #                                          Appearance(stroke_color=(0, 1, 0), stroke_width=5),)
        #         a.write(f_out)

        pw = PdfFileWriter()
        #         pr = PdfFileReader(f_out, strict=False)
        pr = PdfFileReader(f_in, strict=False)
        pw.appendPagesFromReader(pr)
        pw._root_object.update(
            {NameObject("/AcroForm"): pr.trailer["/Root"]["/AcroForm"]})
        pw._root_object["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})
        for p in range(pr.getNumPages()):
            self._update_page_form_checkbox_values(pw.getPage(p), {
                fk: fv['/V']
                for fk, fv in fields.items() if '/V' in fv.keys()
            })  # sometimes '/V' disappears from the keys
        self._update_page_form_checkbox_values(pw.getPage(0),
                                               {'points': str(points)})
        f = codecs.open(f_out, 'wb')
        pw.write(f)
        f.close()

        a = PdfAnnotator(f_out)
        for p in range(pr.getNumPages()):
            for dk, dv in dest.items():
                if pr.getDestinationPageNumber(dv) == p and dk.startswith(
                        'ht_'):
                    inds = [int(ind) for ind in dk[3:].split(':')]
                    #                     if inds[2] in solution[inds[1]][1]:
                    if inds[4] in solution[inds[1]][1]:
                        # using some hard-coded values:
                        a.add_annotation(
                            'square',
                            Location(x1=float(dv['/Left']),
                                     y1=float(dv['/Top']),
                                     x2=float(dv['/Left']) + 5,
                                     y2=float(dv['/Top']) + 5,
                                     page=p),
                            Appearance(stroke_color=(0, 1, 0), stroke_width=5),
                        )
        a.write(f_out)