Exemplo n.º 1
0
def get_fields(the_file, include_attributes=False):
  """ Get the list of fields needed inside a template file (PDF or Docx Jinja tags)"""
  if isinstance(the_file,DAFileList):
    if the_file[0].mimetype == 'application/pdf':
      return [field[0] for field in the_file[0].get_pdf_fields()]
  else:
    if the_file.mimetype == 'application/pdf':
      return [field[0] for field in the_file.get_pdf_fields()]

  result_file = word_to_markdown(the_file.path(), 'docx')
  if result_file is None:
    # fields = word("Error: no fields could be found in the file")
    return []
  else:
    with open(result_file.name, 'rU', encoding='utf-8') as fp:
      result = fp.read()
      fields = set()
      addresses = r"(\b\S*)(((\.address_block\(\))|(\.address\.on_one_line())))"
      methods = r"(.*)(\..*\(\))"
      # look for variables inside {{ }} tags
      for variable in re.findall(r'{{ *([^\} ]+) *}}', result): # look for all regular fields
        variable = variable.replace("\\","")
        # test if it's a method. if so, scan inside it for variables mentioned
        matches = re.match(methods, variable) 
        if matches:
          fields.add(matches.groups()[0])
        else:           
          fields.add(variable)
        
        # check for implicit reference to address fields in common methods
        matches = re.match(addresses, variable)
        if matches:
          fields.add(matches.groups()[0] + '.address.address')

      # look for all variables inside {% %} tags            
      for variable in re.findall(r'{%[a-z]* for [A-Za-z\_][A-Za-z0-9\_]* in *([^\} ]+) *%}', result): 
        variable = variable.replace("\\","")
        # same test for method as above
        matches = re.match(methods, variable) 
        if matches:
          fields.add(matches.groups()[0])
        else:           
          fields.add(variable)
        del matches
    return [x for x in fields if not "(" in x] # strip out functions/method calls
Exemplo n.º 2
0
 def is_fillable_docx(self, filename):
     extension, mimetype = get_ext_and_mimetype(filename)
     if extension != "docx":
         return False
     if not self.file_exists(filename):
         return False
     path = self.get_file(filename)
     result_file = word_to_markdown(path, 'docx')
     if result_file is None:
         return False
     with open(result_file.name, 'r', encoding='utf-8') as fp:
         result = fp.read()
     fields = set()
     for variable in re.findall(r'{{ *([^\} ]+) *}}', result):
         fields.add(docx_variable_fix(variable))
     for variable in re.findall(r'{%[a-z]* for [A-Za-z\_][A-Za-z0-9\_]* in *([^\} ]+) *%}', result):
         fields.add(docx_variable_fix(variable))
     return bool(len(fields) > 0)
Exemplo n.º 3
0
 def is_fillable_docx(self, filename):
     extension, mimetype = get_ext_and_mimetype(filename)
     if extension != "docx":
         return False
     if not self.file_exists(filename):
         return False
     path = self.get_file(filename)
     result_file = word_to_markdown(path, 'docx')
     if result_file is None:
         return False
     with open(result_file.name, 'rU', encoding='utf-8') as fp:
         result = fp.read()
     fields = set()
     for variable in re.findall(r'{{ *([^\} ]+) *}}', result):
         fields.add(docx_variable_fix(variable))
     for variable in re.findall(r'{%[a-z]* for [A-Za-z\_][A-Za-z0-9\_]* in *([^\} ]+) *%}', result):
         fields.add(docx_variable_fix(variable))
     if len(fields):
         return True
     return False
Exemplo n.º 4
0
 def convert_file_to_md(self, filename, convert_variables=True):
     extension, mimetype = get_ext_and_mimetype(filename)
     if (mimetype and mimetype in convertible_mimetypes):
         the_format = convertible_mimetypes[mimetype]
     elif extension and extension in convertible_extensions:
         the_format = convertible_extensions[extension]
     else:
         return None
     if not self.file_exists(filename):
         return None
     path = self.get_file(filename)
     temp_file = word_to_markdown(path, the_format)
     if temp_file is None:
         return None
     out_filename = os.path.splitext(filename)[0] + '.md'
     if convert_variables:
         with open(temp_file.name, 'rU', encoding='utf-8') as fp:
             self.write_file(out_filename, replace_square_brackets.sub(fix_variable_name, fp.read()))
     else:
         shutil.copyfile(temp_file.name, self.get_file(out_filename))
     return out_filename
Exemplo n.º 5
0
 def convert_file_to_md(self, filename, convert_variables=True):
     extension, mimetype = get_ext_and_mimetype(filename)
     if (mimetype and mimetype in convertible_mimetypes):
         the_format = convertible_mimetypes[mimetype]
     elif extension and extension in convertible_extensions:
         the_format = convertible_extensions[extension]
     else:
         return None
     if not self.file_exists(filename):
         return None
     path = self.get_file(filename)
     temp_file = word_to_markdown(path, the_format)
     if temp_file is None:
         return None
     out_filename = os.path.splitext(filename)[0] + '.md'
     if convert_variables:
         with open(temp_file.name, 'rU', encoding='utf-8') as fp:
             self.write_file(out_filename, replace_square_brackets.sub(fix_variable_name, fp.read()))
     else:
         shutil.copyfile(temp_file.name, self.get_file(out_filename))
     return out_filename