Exemplo n.º 1
0
def basic_asset_for_element(asset_el):
    atom_id = asset_el.findtext('{http://www.w3.org/2005/Atom}id')
    logging.debug('Parsing asset %s', atom_id)

    try:
        asset = Post.objects.get(atom_id=atom_id)
    except Post.DoesNotExist:
        asset = Post(atom_id=atom_id)
    asset.imported = True

    publ = asset_el.findtext('{http://www.w3.org/2005/Atom}published')
    publ_dt = datetime.strptime(publ, '%Y-%m-%dT%H:%M:%SZ')
    asset.created = publ_dt

    content_el = asset_el.find('{http://www.w3.org/2005/Atom}content')
    content_type = content_el.get('type')
    if content_type == 'html':
        asset.html = content_el.text
    elif content_type == 'xhtml':
        html_el = content_el.find('{http://www.w3.org/1999/xhtml}div')
        html = html_el.text or u''
        html += u''.join(ElementTree.tostring(el) for el in html_el.getchildren())
        asset.html = html

    author_el = asset_el.find('{http://www.w3.org/2005/Atom}author')
    author_name = author_el.findtext('{http://www.w3.org/2005/Atom}name')
    openid = author_el.findtext('{http://www.w3.org/2005/Atom}uri')
    # Import "gone" folks' comments anonymously.
    if openid != 'http://www.vox.com/gone/':
        asset.author = person_for_openid(openid, author_name).user

    return asset
Exemplo n.º 2
0
    def import_events(self, source, atomid_prefix, foafsource):
        tree = ElementTree.parse(source)

        username = tree.getroot().get('username')
        server = tree.getroot().get('server')
        server_domain = '.'.join(server.rsplit('.', 2)[1:])
        openid_for = partial(self.generate_openid, server_domain)
        if atomid_prefix is None:
            atomid_prefix = 'urn:lj:%s:atom1:%s:' % (server_domain, username)

        author_openid = openid_for(username)
        post_author = self.make_my_openid(author_openid)

        # First, if there's a FOAF, learn all my friends' names and faces.
        if foafsource:
            self.import_foaf(foafsource, server_domain)

        # Next import all my userpics.
        avatars = dict()
        for userpic in tree.findall('/userpics/user/userpic'):
            keyword = userpic.get('keyword')
            logging.debug("Importing userpic %r", keyword)
            try:
                avatar = Avatar.objects.get(user=post_author, name=keyword)
            except Avatar.DoesNotExist:
                data64 = userpic.text
                data = b64decode(data64)
                avatar = Avatar(user=post_author, name=keyword)
                avatar.image.save(slugify(keyword) or 'userpic', ContentFile(data), save=True)
            avatars[keyword] = avatar

        # Now update groups and friends, so we can knit the posts together right.
        group_objs = dict()
        for group in tree.findall('/friends/group'):
            groupid = int(group.findtext('id'))
            name = group.findtext('name')

            tag = '%sgroup:%d' % (atomid_prefix, groupid)
            group_obj, created = bee.models.TrustGroup.objects.get_or_create(user=post_author, tag=tag,
                defaults={'display_name': name})
            group_objs[groupid] = group_obj

        all_friends_tag = '%sfriends' % atomid_prefix
        all_friends_group, created = bee.models.TrustGroup.objects.get_or_create(
            user=post_author, tag=all_friends_tag, defaults={'display_name': 'LiveJournal friends'})

        for friend in tree.findall('/friends/friend'):
            friendname = friend.findtext('username')
            openid = openid_for(friendname)

            ident_person = self.person_for_openid(openid, username=friendname, display_name=friend.findtext('fullname'))

            # Update their groups.
            group_ids = tuple(int(groupnode.text) for groupnode in friend.findall('groups/group'))
            logging.debug("Setting %s's groups to %r", friendname, group_ids)
            ident_person.groups = [all_friends_group] + [group_objs[groupid] for groupid in group_ids]

        # Import the posts.
        for event in tree.findall('/events/event'):
            ditemid = event.get('ditemid')
            logging.debug('Parsing event %s', ditemid)
            atom_id = '%s%s' % (atomid_prefix, ditemid)

            try:
                post = Post.objects.get(atom_id=atom_id)
            except Post.DoesNotExist:
                post = Post(atom_id=atom_id)

            event_props = {}
            for prop in event.findall('props/prop'):
                key = prop.get('name')
                val = prop.get('value')
                event_props[key] = val

            subject = event.findtext('subject')
            post.title = striptags(subject) if subject else ''
            post.author = post_author

            publ = event.findtext('date')
            assert publ, 'event has no date :('
            publ_dt = datetime.strptime(publ, '%Y-%m-%d %H:%M:%S')
            # TODO: is this in the account's timezone or what?
            post.published = publ_dt

            content_root = BeautifulSoup(event.findtext('event'), selfClosingTags=('lj',))
            self.apply_lj_tags(content_root)
            # TODO: handle opt_nocomments prop
            # TODO: put music and mood in the post content
            # TODO: handle taglist prop

            post.html = str(content_root).decode('utf8')
            # Add line breaks to the post if it's not preformatted.
            if not int(event_props.get('opt_preformatted', 0)):
                post.html = self.html_text_transform(post.html)
            post.html, assets = self.import_images_for_post_html(post)

            pic_keyword = event_props.get('picture_keyword')
            if pic_keyword and pic_keyword in avatars:
                post.avatar = avatars[pic_keyword]

            if not post.slug:
                def possible_slugs():
                    yield post.title
                    post_text = striptags(post.html)
                    slug_source = truncate_words(post_text, 7, end_text='')
                    yield slug_source

                post.slug = self.unused_slug_for_post(post, possible_slugs())

            # Pre-save the post in case we want to assign trust groups.
            post_is_new = not post.pk
            post.save()

            for asset in assets:
                asset.posts.add(post)

            legacy_url = urljoin(author_openid, '%s.html' % ditemid)
            legacy_url_parts = urlsplit(legacy_url)
            bee.models.PostLegacyUrl.objects.get_or_create(post=post,
                defaults={'netloc': legacy_url_parts.netloc, 'path': legacy_url_parts.path})

            if post_is_new:
                security = event.get('security')
                if security == 'private':
                    logging.debug('Oh ho post %s is all fancy private', ditemid)
                    post.private = True
                elif security == 'usemask':
                    bin = lambda s: str(s) if s<=1 else bin(s>>1) + str(s&1)

                    mask = int(event.get('allowmask'))
                    logging.debug('Post %s has mask %s?', ditemid, bin(mask))

                    if mask == 1:
                        mask_groups = [all_friends_group]
                        # Plus all the other bits are 0, so we'll add no other groups.
                    else:
                        mask_groups = list()

                    for i in range(1, 30):
                        mask = mask >> 1
                        if mask == 0:
                            break
                        logging.debug('    Remaining mask %s', bin(mask))
                        if mask & 0x01:
                            logging.debug('    Yay %s has group %d!', ditemid, i)
                            if i in group_objs:
                                logging.debug('    And group %d exists woohoo!!', i)
                                mask_groups.append(group_objs[i])

                    logging.debug('So post %s gets %d groups', ditemid, len(mask_groups))
                    post.private = True
                    post.private_to = mask_groups
                else:
                    # Public!
                    post.private = False
                    post.private_to = []

                post.save()

            logging.info('Saved new post %s (%s) as #%d', ditemid, post.title, post.pk)

            # Import the comments.
            for comment in event.findall('comments/comment'):
                self.import_comment(comment, post, openid_for)
Exemplo n.º 3
0
    def import_events(self, source, atomid_prefix, foafsource):
        tree = ElementTree.parse(source)

        username = tree.getroot().get('username')
        server = tree.getroot().get('server')
        server_domain = '.'.join(server.rsplit('.', 2)[1:])
        openid_for = partial(self.generate_openid, server_domain)
        if atomid_prefix is None:
            atomid_prefix = 'urn:lj:%s:atom1:%s:' % (server_domain, username)

        post_author = self.make_my_openid(openid_for(username))

        # First, if there's a FOAF, learn all my friends' names and faces.
        if foafsource:
            self.import_foaf(foafsource, server_domain)

        # Next import all my userpics.
        avatars = dict()
        for userpic in tree.findall('/userpics/user/userpic'):
            keyword = userpic.get('keyword')
            logging.debug("Importing userpic %r", keyword)
            try:
                avatar = Avatar.objects.get(user=post_author, name=keyword)
            except Avatar.DoesNotExist:
                data64 = userpic.text
                data = b64decode(data64)
                avatar = Avatar(user=post_author, name=keyword)
                avatar.image.save(slugify(keyword) or 'userpic', ContentFile(data), save=True)
            avatars[keyword] = avatar

        # Now update groups and friends, so we can knit the posts together right.
        group_objs = dict()
        for group in tree.findall('/friends/group'):
            groupid = int(group.findtext('id'))
            name = group.findtext('name')

            tag = '%sgroup:%d' % (atomid_prefix, groupid)
            group_obj, created = bee.models.TrustGroup.objects.get_or_create(user=post_author, tag=tag,
                defaults={'display_name': name})
            group_objs[groupid] = group_obj

        all_friends_tag = '%sfriends' % atomid_prefix
        all_friends_group, created = bee.models.TrustGroup.objects.get_or_create(
            user=post_author, tag=all_friends_tag, defaults={'display_name': 'LiveJournal friends'})

        for friend in tree.findall('/friends/friend'):
            friendname = friend.findtext('username')
            openid = openid_for(friendname)

            ident_person = self.person_for_openid(openid, friend.findtext('fullname'))

            # Update their groups.
            group_ids = tuple(int(groupnode.text) for groupnode in friend.findall('groups/group'))
            logging.debug("Setting %s's groups to %r", friendname, group_ids)
            ident_person.groups = [all_friends_group] + [group_objs[groupid] for groupid in group_ids]

        # Import the posts.
        for event in tree.findall('/events/event'):
            ditemid = event.get('ditemid')
            logging.debug('Parsing event %s', ditemid)
            atom_id = '%s%s' % (atomid_prefix, ditemid)

            try:
                post = Post.objects.get(atom_id=atom_id)
            except Post.DoesNotExist:
                post = Post(atom_id=atom_id)

            event_props = {}
            for prop in event.findall('props/prop'):
                key = prop.get('name')
                val = prop.get('value')
                event_props[key] = val

            subject = event.findtext('subject')
            post.title = striptags(subject) if subject else ''
            post.author = post_author

            publ = event.findtext('date')
            assert publ, 'event has no date :('
            publ_dt = datetime.strptime(publ, '%Y-%m-%d %H:%M:%S')
            # TODO: is this in the account's timezone or what?
            post.published = publ_dt

            content_root = BeautifulSoup(event.findtext('event'))
            # Add line breaks to the post if it's not preformatted.
            if not int(event_props.get('opt_preformatted', 0)):
                self.format_soup(content_root)
            # Remove any lj-raw tags.
            for el in content_root.findAll(re.compile(r'lj-(?:raw|cut)')):
                # Replace it with its children.
                el_parent = el.parent
                el_index = el_parent.contents.index(el)
                el.extract()
                for child in reversed(list(el.contents)):
                    el_parent.insert(el_index, child)
            # TODO: handle opt_nocomments prop
            # TODO: put music and mood in the post content
            # TODO: handle taglist prop
            post.html = str(content_root)

            pic_keyword = event_props.get('picture_keyword')
            if pic_keyword and pic_keyword in avatars:
                post.avatar = avatars[pic_keyword]

            if not post.slug:
                def gunk_slugs():
                    chars = string.letters + string.digits + string.digits
                    while True:
                        gunk = ''.join(random.choice(chars) for i in range(7))
                        yield gunk

                def possible_slugs():
                    slug_source = post.title
                    if not slug_source:
                        post_text = striptags(post.html)
                        slug_source = truncate_words(post_text, 7, end_text='')
                    if not slug_source:
                        for gunk in gunk_slugs():
                            yield gunk
                    yield slugify(slug_source)
                    for gunk in gunk_slugs():
                        possible_unique = u'%s %s' % (slug_source, gunk)
                        yield slugify(possible_unique)

                other_posts = post.author.posts_authored.all()
                if post.id:
                    other_posts = other_posts.exclude(id=post.id)
                def is_slug_used(slug):
                    return other_posts.filter(slug=slug).exists()

                unused_slugs = ifilterfalse(is_slug_used, possible_slugs())
                post.slug = unused_slugs.next()  # only need the first that's not used

            # Pre-save the post in case we want to assign trust groups.
            post.save()

            security = event.get('security')
            if security == 'private':
                logging.debug('Oh ho post %s is all fancy private', ditemid)
                post.private = True
            elif security == 'usemask':
                bin = lambda s: str(s) if s<=1 else bin(s>>1) + str(s&1)

                mask = int(event.get('allowmask'))
                logging.debug('Post %s has mask %s?', ditemid, bin(mask))

                if mask == 1:
                    mask_groups = [all_friends_group]
                    # Plus all the other bits are 0, so we'll add no other groups.
                else:
                    mask_groups = list()

                for i in range(1, 30):
                    mask = mask >> 1
                    if mask == 0:
                        break
                    logging.debug('    Remaining mask %s', bin(mask))
                    if mask & 0x01:
                        logging.debug('    Yay %s has group %d!', ditemid, i)
                        if i in group_objs:
                            logging.debug('    And group %d exists woohoo!!', i)
                            mask_groups.append(group_objs[i])

                logging.debug('So post %s gets %d groups', ditemid, len(mask_groups))
                post.private = True
                post.private_to = mask_groups
            else:
                # Public!
                post.private = False
                post.private_to = []

            post.save()
            logging.info('Saved new post %s (%s) as #%d', ditemid, post.title, post.pk)

            # Import the comments.
            for comment in event.findall('comments/comment'):
                self.import_comment(comment, post, openid_for)