def create_tags(self, tags): """ accept a list of tags and create them if they don't already exist """ logger.info("Creating tags") #if self.isAsync: # self.zip_handler.update_state(task_id=self.zip_handler.request.id, state='IMPORTING', meta={ 'import_percent': 'Creating tags...' }) with transaction.commit_on_success(): tag_obj_list = [] tag_update_list = [] #for existing, set locked if preloaded all_tags = Tag.objects.all() for tag in tags: tag_clean = u_slugify(tag) if tag_clean: #i.e. not empty string if not all_tags.filter(text=tag_clean).exists(): tag_obj_list.append( Tag(text=tag_clean, locked=self.preloaded)) else: Tag.objects.filter(text=tag_clean).update(locked=True) try: if tag_obj_list: Tag.objects.bulk_create(tag_obj_list) except: logger.info("Error during tag bulk create") else: logger.info("Complete")
def create_tag_relationships(self, tag_to_ci_map, id_list=False): """ accept a list of content item to tag mappings like: [ [ ci_id, [ tag, ... ] ], ... ] and bulk_create the relationships id_list being true means we'll get a tag list of ids instead of names """ TagThroughModel = ContentItem.tags.through all_tags_lookup = {} tags_to_add = [] tag_pairs_added = [] if not id_list: for tag in Tag.objects.values('id', 'text'): all_tags_lookup[tag['text']] = tag['id'] # create tag memberships for pairings in tag_to_ci_map: ci_id = pairings[0] tags = pairings[1] if tags is not None: for tag in tags: if id_list: tag_id = tag else: tag_id = all_tags_lookup.get(u_slugify(tag), None) id_pair = (ci_id, tag_id) if id_pair not in tag_pairs_added and tag_id is not None: # Avoid triggering a duplication IntegrityError #And Null id integrity error ttm = TagThroughModel(contentitem_id=ci_id, tag_id=tag_id) tags_to_add.append(ttm) tag_pairs_added.append(id_pair) logger.info("Adding tag relationships to database") try: TagThroughModel.objects.bulk_create(tags_to_add) except Exception as e: logger.info("Error during tag relationship bulk create") logger.exception(e) else: logger.info("Complete")
def parse_manifests(self, manifest_xml): """ accept a xml file to parse """ soup = BeautifulSoup(manifest_xml, 'xml') data = {'manifests': [], 'tags': [], 'categories': []} for manifest in soup.find_all('manifest'): title = None description = None keywords = None categories = None m_title, m_description, m_keywords, m_categories = self.pull_metadata( manifest.metadata) m = { 'package': { 'title': m_title, 'description': m_description, 'identifier': manifest['identifier'], 'version': manifest['version'] if 'version' in manifest else '0.0', 'categories': m_categories, }, 'resources': [], } organizations = manifest.find('organizations') if 'default' in organizations: o_default = organizations['default'] if o_default is not None: organization = organizations.find(identifier=o_default) else: organization = organizations.find('organization') for resource in manifest.find_all('resource'): item = None # check for href! r_href = resource['href'] if r_href is None: continue r_identifier = resource['identifier'] position = None if organization is not None: position = 0 # check if this resource is in org item = organization.find('item', identifierref=r_identifier) if item is None: continue title, description, keywords, categories = self.pull_metadata( item.metadata) position = len(item.find_previous_siblings('item')) # Use the <title> tag if there was no md if title is None: title = item.title.string # If there was no <item> metadata, then use <organization> if not all([title, description, keywords]): o_title, o_description, o_keywords, o_categories = self.pull_metadata( organization.metadata) if keywords is None: keywords = o_keywords if categories is None: categories = o_categories if not all([title, description, keywords]): r_title, r_description, r_keywords, r_categories = self.pull_metadata( resource.metadata) # If there was no md yet, then use <resource>'s' if title is None: title = r_title if description is None: description = r_description if keywords is None: keywords = r_keywords if categories is None: categories = r_categories # Manifest is the source of last resort for keywords and categories if keywords is None: keywords = m_keywords if categories is None: categories = m_categories # Limit size of categories if categories is not None and len(categories) > 3: categories = categories[:3] # slugify tags slug_keywords = [] if keywords is not None: for tag in keywords['list']: logger.info('--------TAG--------') logger.info(tag) slugged_tag = u_slugify(tag) logger.info(slugged_tag) slug_keywords.append(slugged_tag) keywords = { 'ids': False, 'list': slug_keywords, } r = { 'identifier': r_identifier, 'path': r_href, 'title': title, 'description': description, 'tags': keywords, 'categories': categories, 'position': position, } m['resources'].append(r) if keywords is not None: data['tags'] = list(set(data['tags'] + keywords['list'])) if categories is not None: if 'list' in categories: if not all(x is None for x in categories['list']): data['categories'].append(categories['list']) data['manifests'].append(m) # Remove duplicate categories cat_sorted = sorted(data['categories']) cat_clean = [ cat_sorted[i] for i in range(len(cat_sorted)) if i == 0 or cat_sorted[i] != cat_sorted[i - 1] ] data['categories'] = cat_clean return data