Пример #1
0
    def create_tags(self, tags):
        """
        accept a list of tags and create them if they don't already exist
        """
        logger.info("Creating tags")
        #if self.isAsync:
        #    self.zip_handler.update_state(task_id=self.zip_handler.request.id, state='IMPORTING', meta={ 'import_percent': 'Creating tags...' })
        with transaction.commit_on_success():
            tag_obj_list = []
            tag_update_list = []  #for existing, set locked if preloaded
            all_tags = Tag.objects.all()
            for tag in tags:

                tag_clean = u_slugify(tag)

                if tag_clean:  #i.e. not empty string
                    if not all_tags.filter(text=tag_clean).exists():
                        tag_obj_list.append(
                            Tag(text=tag_clean, locked=self.preloaded))
                    else:
                        Tag.objects.filter(text=tag_clean).update(locked=True)
            try:
                if tag_obj_list:
                    Tag.objects.bulk_create(tag_obj_list)
            except:
                logger.info("Error during tag bulk create")
            else:
                logger.info("Complete")
Пример #2
0
    def create_tag_relationships(self, tag_to_ci_map, id_list=False):
        """
        accept a list of content item to tag mappings like: [ [ ci_id, [ tag, ... ] ], ... ] and bulk_create the relationships
        id_list being true means we'll get a tag list of ids instead of names
        """
        TagThroughModel = ContentItem.tags.through

        all_tags_lookup = {}
        tags_to_add = []
        tag_pairs_added = []

        if not id_list:
            for tag in Tag.objects.values('id', 'text'):
                all_tags_lookup[tag['text']] = tag['id']

        # create tag memberships
        for pairings in tag_to_ci_map:
            ci_id = pairings[0]
            tags = pairings[1]

            if tags is not None:
                for tag in tags:
                    if id_list:
                        tag_id = tag
                    else:
                        tag_id = all_tags_lookup.get(u_slugify(tag), None)
                    id_pair = (ci_id, tag_id)
                    if id_pair not in tag_pairs_added and tag_id is not None:  # Avoid triggering a duplication IntegrityError #And Null id integrity error
                        ttm = TagThroughModel(contentitem_id=ci_id,
                                              tag_id=tag_id)
                        tags_to_add.append(ttm)
                        tag_pairs_added.append(id_pair)

        logger.info("Adding tag relationships to database")
        try:
            TagThroughModel.objects.bulk_create(tags_to_add)
        except Exception as e:
            logger.info("Error during tag relationship bulk create")
            logger.exception(e)
        else:
            logger.info("Complete")
Пример #3
0
    def parse_manifests(self, manifest_xml):
        """
        accept a xml file to parse
        """
        soup = BeautifulSoup(manifest_xml, 'xml')

        data = {'manifests': [], 'tags': [], 'categories': []}

        for manifest in soup.find_all('manifest'):
            title = None
            description = None
            keywords = None
            categories = None

            m_title, m_description, m_keywords, m_categories = self.pull_metadata(
                manifest.metadata)

            m = {
                'package': {
                    'title': m_title,
                    'description': m_description,
                    'identifier': manifest['identifier'],
                    'version':
                    manifest['version'] if 'version' in manifest else '0.0',
                    'categories': m_categories,
                },
                'resources': [],
            }

            organizations = manifest.find('organizations')

            if 'default' in organizations:
                o_default = organizations['default']
                if o_default is not None:
                    organization = organizations.find(identifier=o_default)
            else:
                organization = organizations.find('organization')

            for resource in manifest.find_all('resource'):
                item = None

                # check for href!
                r_href = resource['href']
                if r_href is None:
                    continue

                r_identifier = resource['identifier']
                position = None

                if organization is not None:
                    position = 0
                    # check if this resource is in org
                    item = organization.find('item',
                                             identifierref=r_identifier)
                    if item is None:
                        continue

                    title, description, keywords, categories = self.pull_metadata(
                        item.metadata)
                    position = len(item.find_previous_siblings('item'))

                    # Use the <title> tag if there was no md
                    if title is None:
                        title = item.title.string

                    # If there was no <item> metadata, then use <organization>
                    if not all([title, description, keywords]):
                        o_title, o_description, o_keywords, o_categories = self.pull_metadata(
                            organization.metadata)

                    if keywords is None: keywords = o_keywords
                    if categories is None: categories = o_categories

                if not all([title, description, keywords]):
                    r_title, r_description, r_keywords, r_categories = self.pull_metadata(
                        resource.metadata)

                # If there was no md yet, then use <resource>'s'
                if title is None: title = r_title
                if description is None: description = r_description
                if keywords is None: keywords = r_keywords
                if categories is None: categories = r_categories

                # Manifest is the source of last resort for keywords and categories
                if keywords is None: keywords = m_keywords
                if categories is None: categories = m_categories

                # Limit size of categories
                if categories is not None and len(categories) > 3:
                    categories = categories[:3]

                # slugify tags
                slug_keywords = []
                if keywords is not None:
                    for tag in keywords['list']:

                        logger.info('--------TAG--------')
                        logger.info(tag)
                        slugged_tag = u_slugify(tag)
                        logger.info(slugged_tag)
                        slug_keywords.append(slugged_tag)

                    keywords = {
                        'ids': False,
                        'list': slug_keywords,
                    }

                r = {
                    'identifier': r_identifier,
                    'path': r_href,
                    'title': title,
                    'description': description,
                    'tags': keywords,
                    'categories': categories,
                    'position': position,
                }

                m['resources'].append(r)
                if keywords is not None:
                    data['tags'] = list(set(data['tags'] + keywords['list']))

                if categories is not None:
                    if 'list' in categories:
                        if not all(x is None for x in categories['list']):
                            data['categories'].append(categories['list'])

            data['manifests'].append(m)

        # Remove duplicate categories
        cat_sorted = sorted(data['categories'])
        cat_clean = [
            cat_sorted[i] for i in range(len(cat_sorted))
            if i == 0 or cat_sorted[i] != cat_sorted[i - 1]
        ]
        data['categories'] = cat_clean

        return data