Пример #1
0
 def _import_documents(self, pdf_dir):
     doc_dir = os.path.join(pdf_dir, 'documents')
     for data in document_data.values():
         title = data['title']
         filename = data['file']
         self.logger.debug('Creating document {}'.format(
             title.encode('utf-8')))
         doc = Document(title=title)
         with open(os.path.join(doc_dir, filename), 'rb') as fh:
             document_file = File(fh)
             doc.file.save(filename, document_file)
         doc.save()
Пример #2
0
 def _import_documents (self, pdf_dir):
     doc_dir = os.path.join(pdf_dir, 'documents')
     for data in document_data.values():
         title = data['title']
         filename = data['file']
         self.logger.debug('Creating document {}'.format(
             title.encode('utf-8')))
         doc = Document(title=title)
         with open(os.path.join(doc_dir, filename), 'rb') as fh:
             document_file = File(fh)
             doc.file.save(filename, document_file)
         doc.save()
Пример #3
0
def dummy_wagtail_doc(request):
    if not Collection.objects.exists():  # pragma: no cover
        Collection.add_root()

    doc = Document(title='hello')
    doc.file.save('foo.txt', ContentFile('foo', 'foo.txt'))
    doc.save()
    doc = Document.objects.get(pk=doc.pk)  # Reload to ensure the upload took

    def nuke():
        try:  # Try cleaning up so `/var/media` isn't full of foo
            doc.file.delete()
            doc.delete()
        except:  # pragma: no cover
            pass

    request.addfinalizer(nuke)
    return doc
Пример #4
0
def savefiles(msg, simulate):
    """
    Extract parts from  msg (which is an email.message_from_string(str) instance)
    and send them to the database.
    NOTES:
    - uses only the first found email address to assume recipient

    TODO stuff
    - reject if From: is empty
    """
    part_counter = 1
    subject = get_subject(msg)
    tos = get_recipient(msg)
    msg_id = msg.get('message-id', '')
    froms = msg.get_all('from', [])
    print(subject, tos, froms)
    p = re.compile('([\w\.\-]+@[\w\.\-]+)')
    try:  # May raise in some cases IndexError: list index out of range
        matches = p.findall(froms[0])
        print("MATS", matches[0])
        sender_nick = matches[0].split(".")[0].title()  # Use all before first '.'
    except:
        print("ERROR: No From header %s" % (msg_id))
        return False
    if len(tos) == 0:
        print("ERROR: No Tos found %s" % (msg_id))
        return False
    p = re.compile('([\w]+)\.([\w]+)@')  # e.g. [email protected]
    matches = p.findall(tos[0])
    if len(matches) > 0:
        username = matches[0][0].title()
        key = matches[0][1].lower()
    else:
        print("ERROR: No user.authkey found from %s %s" % (tos[0], msg_id))
        # return False
    # try:
    #     user = User.objects.get(username=username.lower())
    # except User.DoesNotExist:
    #     print("User.DoesNotExist !", username)
    #     log.warning("User.DoesNotExist: '%s'" % username)
    #     return False

    parts_not_to_save = ["multipart/mixed",
                         "multipart/alternative",
                         "multipart/related",
                         "text/plain",
                         ]
    if simulate:  # Print lots of debug stuff
        print('=========\nMetadata:\n=========')
        print('''Subject: %s\nUsername: \nFrom: %s\nTo: %s\nM-id: %s\n''' % (
                subject, ','.join(froms), ','.join(tos), msg_id))
        print('=========\nParts:\n=========')
    saved_parts = 0
    log.info("Walking through message parts")
    bodies = []
    index_page = NewssheetIndexPage.objects.live()[0]
    # print(index_page)
    page = NewssheetPage(
        title=subject,
        date=datetime.datetime.today(),
        live=True,
    )
    page.slug = slugify(page.title)
    # print(dir(page))
    # page.save()
    page.unpublish(commit=False)
    newssheet = index_page.add_child(instance=page)
    newssheet.save_revision(submitted_for_moderation=True)
    # newssheet.unpublish()
    print(newssheet)
    for part in msg.walk():
        part_content_type = part.get_content_type()
        filename, filedata = handle_part(part)
        if part_content_type == "text/plain":
            pl = part.get_payload(decode=True)
            pl = pl.decode()  #(encoding=word[1])
            bodies.append(pl)
            # if part['Content-Transfer-Encoding'] == 'base64':
            #     bodies.append(pl)
            # elif part['Content-Transfer-Encoding'] == 'quoted-printable':
            #     bodies.append(pl)
            # elif part['Content-Transfer-Encoding'] == 'jotain muuta':
            #     bodies.append(pl)
        print(bodies)
        if part_content_type in parts_not_to_save or filename is None:
            # print "NOT SAVING", part_content_type
            log_msg = "Not saving '%s', filename '%s'." % (part_content_type, filename)
            log.info(log_msg)
            if simulate: print(log_msg)  # Print lots of debug stuff
            continue
            #print filedata, type(filedata), len(filedata)
        if filedata is None or len(filedata) == 0:
            log_msg = "Not saving '%s', filename '%s', file has no data" % (part_content_type, filename)
            log.warning(log_msg)
            if simulate:
                print(log_msg)  # Print lots of debug stuff
            continue
        log_msg = u'Saving: %s (%s)' % (filename, part_content_type)
        log.info(log_msg)
        if simulate:
            print(log_msg)  # Print lots of debug stuff
        # Saving attachemnts
        from wagtail.wagtaildocs.models import Document
        attachment = Document(title=filename)
        attachment.file.save(filename, ContentFile(filedata))
        attachment.save()
        na = NewssheetPageAttachments(attachment=attachment, text=filename,
                                      page=newssheet)
        na.save()
        # newssheet.attachments.add(na)
    newssheet.body = '\n\n'.join(bodies)
    newssheet.save_revision()
    return saved_parts