def articles_by_tag(user, tag): '''Find articles in Solr based on a :class:`~django.contrib.auth.models.User` and their :class:`~openemory.accounts.models.Bookmark` s. Calls :meth:`pids_by_tag` to find the pids of bookmarked objects for the specified user and tag, and then queries Solr to get display information for those objects. ''' solr = solr_interface() pidfilter = None # find any objects with pids bookmarked by the user # - generates a filter that looks like Q(pid=pid1) | Q(pid=pid2) | Q(pid=pid3) tagged_pids = pids_by_tag(user, tag) # if no pids are found, just return an empty list if not tagged_pids: return [] for pid in tagged_pids: if pidfilter is None: pidfilter = solr.Q(pid=pid) else: pidfilter |= solr.Q(pid=pid) solrquery = solr.query(pidfilter) \ .field_limit(ARTICLE_VIEW_FIELDS) \ .sort_by('-last_modified') # best option ? # return solrquery instead of calling execute so the result can be # paginated return solrquery
def statistics(request): '''`Template context processor <https://docs.djangoproject.com/en/dev/ref/settings/#template-context-processors>`_ to add publication statistics to page context under the name ARTICLE_STATISTICS. The object has five properties: ``total_articles``, ``year_views``, ``year_downloads``, ``total_views``, and ``total_downloads``.''' solr_query = solr_interface().query() \ .filter(content_model=Publication.ARTICLE_CONTENT_MODEL, state='A') \ .paginate(rows=0) article_count = solr_query.execute().result.numFound stats = dict(total_articles=article_count) total_qs = ArticleStatistics.objects.all() total_stats = total_qs.aggregate(total_views=Sum('num_views'), total_downloads=Sum('num_downloads')) stats.update(total_stats) year_qs = ArticleStatistics.objects.filter(year=date.today().year) year_stats = year_qs.aggregate(year_views=Sum('num_views'), year_downloads=Sum('num_downloads')) stats.update(year_stats) return { 'ARTICLE_STATISTICS': stats }
def articles_by_tag(user, tag): """Find articles in Solr based on a :class:`~django.contrib.auth.models.User` and their :class:`~openemory.accounts.models.Bookmark` s. Calls :meth:`pids_by_tag` to find the pids of bookmarked objects for the specified user and tag, and then queries Solr to get display information for those objects. """ solr = solr_interface() pidfilter = None # find any objects with pids bookmarked by the user # - generates a filter that looks like Q(pid=pid1) | Q(pid=pid2) | Q(pid=pid3) tagged_pids = pids_by_tag(user, tag) # if no pids are found, just return an empty list if not tagged_pids: return [] for pid in tagged_pids: if pidfilter is None: pidfilter = solr.Q(pid=pid) else: pidfilter |= solr.Q(pid=pid) solrquery = solr.query(pidfilter).field_limit(PUBLICATION_VIEW_FIELDS).sort_by("-last_modified") # best option ? # return solrquery instead of calling execute so the result can be # paginated return solrquery
def statistics(request): '''`Template context processor <https://docs.djangoproject.com/en/dev/ref/settings/#template-context-processors>`_ to add publication statistics to page context under the name ARTICLE_STATISTICS. The object has five properties: ``total_articles``, ``year_views``, ``year_downloads``, ``total_views``, and ``total_downloads``.''' solr_query = solr_interface().query() \ .filter(content_model=Article.ARTICLE_CONTENT_MODEL, state='A') \ .paginate(rows=0) article_count = solr_query.execute().result.numFound stats = dict(total_articles=article_count) total_qs = ArticleStatistics.objects.all() total_stats = total_qs.aggregate(total_views=Sum('num_views'), total_downloads=Sum('num_downloads')) stats.update(total_stats) year_qs = ArticleStatistics.objects.filter(year=date.today().year) year_stats = year_qs.aggregate(year_views=Sum('num_views'), year_downloads=Sum('num_downloads')) stats.update(year_stats) return {'ARTICLE_STATISTICS': stats}
def _find_articles(self): '''Query Solr to find articles by this author. Returns a solr query filtered by owner and content model, and fields limited to the standard view fields. Internal method with common functionality for :meth:`recent_articles` and :meth:`unpublished_articles`. ''' solr = solr_interface() return solr.query(owner=self.user.username) \ .filter(content_model=Article.ARTICLE_CONTENT_MODEL) \ .field_limit(ARTICLE_VIEW_FIELDS)
def view_department(request, id): '''View a list of faculty (or non-faculty users with profiles) in a single department. :param id: department id ''' # get a list of people by department code dep_id = id solr = solr_interface() people = solr.query(department_id=id) \ .filter(record_type=EsdPerson.record_type) \ .sort_by('last_name') \ .paginate(rows=150).execute() # filter = request.GET['filter'] if 'filter' in request.GET else '' # q = solr.query(department_id=id).filter(content_model=Article.ARTICLE_CONTENT_MODEL, state='A').facet_by('creator_sorting', mincount=1, limit=-1, sort='index', prefix=filter.lower()) # result = q.paginate(rows=0).execute() # print result # facets = result.facet_counts.facet_fields['creator_sorting'] # #removes name from field for proper presentation # facets = [(name.split("|")[1], count) for name, count in facets] if len(people): division = people[0]['division_name'] depts = people[0]['department_name'] # department_name is a list since an article can have 0..n. An # EsdPerson, though, only has one, so just grab the first. dept = depts[0] if depts else '' # shorten department name for display, since we have division # name for context if ':' in dept: dept = dept[dept.rfind(':')+1:].strip() else: # it's possible no profile users were found (unlikely with real data) # if no users were found, look up department code to get # division & department names deptinfo = EsdPerson.objects.filter(department_id=id)\ .only('department_name', 'division_name').distinct() # no department found for that id - 404 if not deptinfo: raise Http404 deptinfo = deptinfo[0] division = deptinfo.division_name dept = deptinfo.department_shortname return render(request, 'accounts/department.html', {'esdpeople': people, 'department': dept, 'division': division, 'dep_id':dep_id})
def statistics(request): '''`Template context processor <https://docs.djangoproject.com/en/dev/ref/settings/#template-context-processors>`_ to add account and session statistics to page context under the name ACCOUNT_STATISTICS. The object currently has only one property: ``total_users``.''' solr_query = solr_interface().query() \ .filter(record_type=EsdPerson.record_type) \ .paginate(rows=0) faculty_count = solr_query.execute().result.numFound stats = {'total_users': faculty_count} return {'ACCOUNT_STATISTICS': stats}
def statistics(request): '''`Template context processor <https://docs.djangoproject.com/en/dev/ref/settings/#template-context-processors>`_ to add account and session statistics to page context under the name ACCOUNT_STATISTICS. The object currently has only one property: ``total_users``.''' solr_query = solr_interface().query() \ .filter(record_type=EsdPerson.record_type) \ .paginate(rows=0) faculty_count = solr_query.execute().result.numFound stats = { 'total_users': faculty_count } return { 'ACCOUNT_STATISTICS': stats }
def view_department(request, id): '''View a list of faculty (or non-faculty users with profiles) in a single department. :param id: department id ''' # get a list of people by department code solr = solr_interface() people = solr.query(department_id=id) \ .filter(record_type=EsdPerson.record_type) \ .sort_by('last_name') \ .paginate(rows=150).execute() if len(people): division = people[0]['division_name'] depts = people[0]['department_name'] # department_name is a list since an article can have 0..n. An # EsdPerson, though, only has one, so just grab the first. dept = depts[0] if depts else '' # shorten department name for display, since we have division # name for context if ':' in dept: dept = dept[dept.rfind(':') + 1:].strip() else: # it's possible no profile users were found (unlikely with real data) # if no users were found, look up department code to get # division & department names deptinfo = EsdPerson.objects.filter(department_id=id)\ .only('department_name', 'division_name').distinct() # no department found for that id - 404 if not deptinfo: raise Http404 deptinfo = deptinfo[0] division = deptinfo.division_name dept = deptinfo.department_shortname return render(request, 'accounts/department.html', { 'esdpeople': people, 'department': dept, 'division': division })
def faculty_autocomplete(request): term = request.GET.get('term', '') # handle multiple terms and strip off commas # e.g., if user searches for "lastname, firstname" terms = [t.strip(',') for t in term.lower().split() if t] # TODO: consider using eulcommon.searchutil here solr = solr_interface() # do an OR search for partial or exact matches in the full name term_filter = solr.Q() for t in terms: # exact match or partial match (exact word with * does not match) term_filter |= solr.Q(ad_name=t) | solr.Q(ad_name='%s*' % t) r = solr.query(term_filter).filter(record_type=EsdPerson.record_type) \ .field_limit(['username', 'first_name', 'last_name', 'department_name', 'ad_name'], score=True) \ .sort_by('-score').sort_by('ad_name_sort') \ .paginate(rows=10).execute() # NOTE: may want to cut off based on some relevance score, # (e.g., if score is below 0.5 and there is at least one good match, # omit the less relevant items) # for u2 in r2: # print u2 print r suggestions = [ {'label': u['ad_name'], # directory name in lastname, firstname format 'description': u.get('department_name', ''), # may be suppressed 'username': u['username'], # first name is missing in some cases-- don't error if it's not present # NOTE: if first name is missing, name may be listed/filled in wrong 'first_name': u.get('first_name', ''), 'last_name': u['last_name'], 'affiliation': 'Emory University'} for u in r ] return HttpResponse(json_serializer.encode(suggestions), content_type='application/json')
def handle(self, verbosity=1, *args, **options): self.verbosity = int(verbosity) if self.verbosity >= self.v_normal: print 'Indexing ESD data for %d faculty members in Solr' % \ (EsdPerson.faculty.all().count(),) try: solr_url = options.get('index_url', None) self.solr = solr_interface(solr_url) except socket.error as se: raise CommandError('Failed to connect to Solr (%s)' % se) try: self.update_faculty_index() except SolrError as se: if 'unknown field' in str(se): raise CommandError('Solr unknown field error ' + '(check that local schema matches running instance)') raise CommandError('Solr error (%s)' % se)
def departments(request): '''List department names based on Faculty information in ESD, grouped by division name.''' solr = solr_interface() div_dept_field = 'division_dept_id' r = solr.query(record_type=EsdPerson.record_type) \ .facet_by(div_dept_field, limit=-1, sort='index') \ .paginate(rows=0) \ .execute() div_depts = r.facet_counts.facet_fields[div_dept_field] # division_dept_id field is indexed in Solr as # division_name|division_code|department_shortname|department_id # split out and convert to list of dict depts = [] for d, total in div_depts: dept = EsdPerson.split_department(d) dept['total'] = total depts.append(dept) return render(request, 'accounts/departments.html', {'departments': depts})
def handle(self, verbosity=1, *args, **options): self.verbosity = int(verbosity) if self.verbosity >= self.v_normal: print 'Indexing ESD data for %d faculty members in Solr' % \ (EsdPerson.faculty.all().count(),) try: solr_url = options.get('index_url', None) self.solr = solr_interface(solr_url) except socket.error as se: raise CommandError('Failed to connect to Solr (%s)' % se) try: self.update_faculty_index() except SolrError as se: if 'unknown field' in str(se): raise CommandError( 'Solr unknown field error ' + '(check that local schema matches running instance)') raise CommandError('Solr error (%s)' % se)
def view_department(request, id): '''View a list of faculty (or non-faculty users with profiles) in a single department. :param id: department id ''' # get a list of people by department code solr = solr_interface() people = solr.query(department_id=id) \ .filter(record_type=EsdPerson.record_type) \ .sort_by('last_name') \ .paginate(rows=150).execute() if len(people): division = people[0]['division_name'] depts = people[0]['department_name'] # department_name is a list since an article can have 0..n. An # EsdPerson, though, only has one, so just grab the first. dept = depts[0] if depts else '' # shorten department name for display, since we have division # name for context if ':' in dept: dept = dept[dept.rfind(':')+1:].strip() else: # it's possible no profile users were found (unlikely with real data) # if no users were found, look up department code to get # division & department names deptinfo = EsdPerson.objects.filter(department_id=id)\ .only('department_name', 'division_name').distinct() # no department found for that id - 404 if not deptinfo: raise Http404 deptinfo = deptinfo[0] division = deptinfo.division_name dept = deptinfo.department_shortname return render(request, 'accounts/department.html', {'esdpeople': people, 'department': dept, 'division': division})
def faculty_autocomplete(request): term = request.GET.get('term', '') # handle multiple terms and strip off commas # e.g., if user searches for "lastname, firstname" terms = [t.strip(',') for t in term.lower().split() if t] # TODO: consider using eulcommon.searchutil here solr = solr_interface() # do an OR search for partial or exact matches in the full name term_filter = solr.Q() for t in terms: # exact match or partial match (exact word with * does not match) term_filter |= solr.Q(ad_name=t) | solr.Q(ad_name='%s*' % t) r = solr.query(term_filter).filter(record_type=EsdPerson.record_type) \ .field_limit(['username', 'first_name', 'last_name', 'department_name', 'ad_name'], score=True) \ .sort_by('-score').sort_by('ad_name_sort') \ .paginate(rows=10).execute() # NOTE: may want to cut off based on some relevance score, # (e.g., if score is below 0.5 and there is at least one good match, # omit the less relevant items) suggestions = [ { 'label': u['ad_name'], # directory name in lastname, firstname format 'description': u.get('department_name', ''), # may be suppressed 'username': u['username'], # first name is missing in some cases-- don't error if it's not present # NOTE: if first name is missing, name may be listed/filled in wrong 'first_name': u.get('first_name', ''), 'last_name': u['last_name'], 'affiliation': 'Emory University' } for u in r ] return HttpResponse(json_serializer.encode(suggestions), mimetype='application/json')
def handle(self, *args, **options): self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all self.v_normal = 1 #counters counts = defaultdict(int) #connection to repository #uses default user / pass configured in localsettings.py repo = Repository() #Connection to solr solr = solr_interface() #todays date in the same format as embargo_end date today = datetime.datetime.now().strftime("%Y-%m-%d") #if pids specified, use that list if len(args) != 0: pid_set = list(args) #convert list into dict so both solr and pid list formats are the same pid_set = [{'pid' : pid} for pid in pid_set] else: #search for active Articles with an embargo_end date less than today, # and that do not have fulltext field indexed. Only return the pid for each record. try: pid_set = solr.query().filter(content_model=Article.ARTICLE_CONTENT_MODEL, state='A', embargo_end__lt=today).exclude(fulltext__any=True).\ field_limit('pid') except Exception as e: if 'is not a valid field name' in e.message: raise CommandError('Solr unknown field error ' + '(check that local schema matches running instance)') raise CommandError('Error (%s)' % e.message) try: expired_embargoes = Paginator(pid_set, 20) counts['total'] = expired_embargoes.count except Exception as e: self.output(0, "Error paginating items: : %s " % (e.message)) #process all expired embargoes for p in expired_embargoes.page_range: try: objs = expired_embargoes.page(p).object_list except Exception as e: #print error and go to next iteration of loop self.output(0,"Error getting page: %s : %s " % (p, e.message)) counts['errors'] +=1 continue for obj in objs: try: article = repo.get_object(type=Article, pid=obj['pid']) if not article.exists: self.output(1, "Skipping %s because pid does not exist" % obj['pid']) counts['skipped'] +=1 continue #do not try to index items without valid fulltext field data = article.index_data() if 'fulltext' in data and data['fulltext'] != None and data['fulltext'].strip(): self.output(1,"Processing %s" % article.pid) if not options['noact']: solr.add(data) counts['indexed'] +=1 else: self.output(1, "Skipping %s because fulltext does not exist" % article.pid) counts['skipped'] +=1 except Exception as e: self.output(0, "Error processing pid: %s : %s " % (obj['pid'], e.message)) counts['errors'] +=1 # summarize what was done self.stdout.write("Total number selected: %s\n" % counts['total']) self.stdout.write("Indexed: %s\n" % counts['indexed']) self.stdout.write("Skipped: %s\n" % counts['skipped']) self.stdout.write("Errors: %s\n" % counts['errors'])
def handle(self, *args, **options): self.verbosity = int( options['verbosity']) # 1 = normal, 0 = minimal, 2 = all self.v_normal = 1 #counters for script reporting counts = defaultdict(int) #Connection to solr solr = solr_interface() #info today's date will be used to caculate previous quarter today = datetime.datetime.today() current_year = today.year current_month = today.month current_quarter = year_quarter(current_month) self.output( 1, "Current month/year: %s/%s quarter: %s" % (current_month, current_year, current_quarter)) if current_quarter == 1: self.year = current_year - 1 self.quarter = 4 else: self.year = current_year self.quarter = current_quarter - 1 self.output( 1, "Report will run for year: %s quarter: %s" % (self.year, self.quarter)) #start and end dates start_end = { 1: ('January 1, %s' % self.year, 'March 31, %s' % self.year), 2: ('April 1, %s' % self.year, 'June 30, %s' % self.year), 3: ('July 1, %s' % self.year, 'September 30, %s' % self.year), 4: ('October 1, %s' % self.year, 'December 31, %s' % self.year), } #if netids specified, use that list if len(args) != 0: netid_set = list(args) else: #search for authors with published articles. try: netid_set = solr.query().filter(content_model=Article.ARTICLE_CONTENT_MODEL,state='A').\ facet_by('owner').paginate(rows=0).execute() except Exception as e: if 'is not a valid field name' in e.message: raise CommandError( 'Solr unknown field error ' + '(check that local schema matches running instance)') raise CommandError('Error (%s)' % e.message) #get just the netid and thow away the count netid_set = netid_set.facet_counts.facet_fields['owner'] netid_set = [n[0] for n in netid_set] #query solr for all articles for each user for n in netid_set: self.output(1, "Processing user %s" % n) try: article_query = solr.query().filter( content_model=Article.ARTICLE_CONTENT_MODEL, state='A', owner=n).field_limit(['pid', 'title']) articles = Paginator(article_query, 5) #change later articles = articles.object_list except Exception as e: self.output.error(0, e.message) continue article_data = self.get_article_data(articles, self.year, self.quarter) #add name and email to article data user = User.objects.filter(username=n) if user: user = user[0] # again should only be 1 record article_data['first_name'] = user.first_name article_data['last_name'] = user.last_name article_data['email'] = user.email article_data['start'] = start_end[self.quarter][0] article_data['end'] = start_end[self.quarter][1] #send the email! self.send_mail(article_data, options)
def items(self): solr = solr_interface() r = solr.query(content_model=Publication.ARTICLE_CONTENT_MODEL, state='A') return r
def items(self): solr = solr_interface() r = solr.query(content_model=Article.ARTICLE_CONTENT_MODEL, state='A') return r
def handle(self, *args, **options): self.verbosity = int(options["verbosity"]) # 1 = normal, 0 = minimal, 2 = all self.v_normal = 1 # counters counts = defaultdict(int) # check required options if not options["username"]: raise CommandError("Username is required") else: if not options["password"] or options["password"] == "": options["password"] = getpass() # connection to repository repo = Repository(username=options["username"], password=options["password"]) # Connection to solr solr = solr_interface() coll = repo.get_object(pid=settings.PID_ALIASES["oe-collection"]) # if pids specified, use that list if len(args) != 0: pid_set = list(args) # convert list into dict so both solr and pid list formats are the same pid_set = [{"pid": pid} for pid in pid_set] else: # search for Articles. Only return the pid for each record. try: pid_set = solr.query().filter(content_model=Article.ARTICLE_CONTENT_MODEL).field_limit("pid") except Exception as e: if "is not a valid field name" in e.message: raise CommandError( "Solr unknown field error " + "(check that local schema matches running instance)" ) raise CommandError("Error (%s)" % e.message) try: articles = Paginator(pid_set, 20) counts["total"] = articles.count except Exception as e: self.output(0, "Error paginating items: : %s " % (e.message)) # process all Articles for p in articles.page_range: try: objs = articles.page(p).object_list except Exception as e: # print error and go to next iteration of loop self.output(0, "Error getting page: %s : %s " % (p, e.message)) counts["errors"] += 1 continue for obj in objs: try: article = repo.get_object(type=Article, pid=obj["pid"]) if not article.exists: self.output(1, "Skipping %s because pid does not exist" % obj["pid"]) counts["skipped"] += 1 continue else: self.output(0, "Processing %s" % article.pid) # clear out all access_conditions to prep for licens and copyright fields article.descMetadata.content.access_conditions = [] # Remove contentMetadata if empty if article.contentMetadata.exists and article.contentMetadata.content.is_empty(): if not options["noact"]: article.api.purgeDatastream( article.pid, "contentMetadata", logMessage="Removing empty datastream" ) self.output(1, "Removing empty contentMetadata datastream %s" % article.pid) counts["removed"] += 1 elif article.contentMetadata.exists: # Copy License info if available if article.contentMetadata.content.license: article.descMetadata.content.create_license() article.descMetadata.content.license.text = article.contentMetadata.content.license.text article.descMetadata.content.license.link = article.contentMetadata.content.license.link self.output(1, "Copying license info to MODS %s" % article.pid) counts["license"] += 1 # Copy License info from copyright secton if available and not in License section elif ( article.contentMetadata.content.copyright and "creative commons" in article.contentMetadata.content.copyright.lower() ): article.descMetadata.content.create_license() article.descMetadata.content.license.text = article.contentMetadata.content.copyright self.output( 1, "Copying license info from Copyright section to MODS for %s" % article.pid ) counts["copyright_license"] += 1 # Copy Copyright info if available if article.contentMetadata.content.copyright: article.descMetadata.content.create_copyright() article.descMetadata.content.copyright.text = article.contentMetadata.content.copyright self.output(1, "Copying copyright info to MODS %s" % article.pid) counts["copyright"] += 1 # Add to collection article.collection = coll self.output(1, "Adding %s to collection %s" % (article.pid, coll.pid)) counts["collection"] += 1 # Add itemID for OAI # if article.is_published: # article.oai_itemID = "oai:ark:/25593/%s" % article.noid # self.output(1, "Adding itemID to %s" % article.pid) # counts['itemid']+= 1 # save article if not options["noact"]: article.save() except Exception as e: self.output(0, "Error processing pid: %s : %s " % (obj["pid"], e.message)) counts["errors"] += 1 # summarize what was done self.stdout.write("\n\n") self.stdout.write("Total number selected: %s\n" % counts["total"]) self.stdout.write("Removed contentMetadata: %s\n" % counts["removed"]) self.stdout.write("Updated License from License section: %s\n" % counts["license"]) self.stdout.write("Updated License from Copyright section: %s\n" % counts["copyright_license"]) self.stdout.write("Updated Copyright: %s\n" % counts["copyright"]) self.stdout.write("Added to collection: %s\n" % counts["collection"]) # self.stdout.write("Added itemID: %s\n" % counts['itemid']) self.stdout.write("Skipped: %s\n" % counts["skipped"]) self.stdout.write("Errors: %s\n" % counts["errors"])
def handle(self, *args, **options): self.verbosity = int( options['verbosity']) # 1 = normal, 0 = minimal, 2 = all self.v_normal = 1 #counters counts = defaultdict(int) # check required options if not options['username']: raise CommandError('Username is required') else: if not options['password'] or options['password'] == '': options['password'] = getpass() #connection to repository repo = Repository(username=options['username'], password=options['password']) #Connection to solr solr = solr_interface() coll = repo.get_object(pid=settings.PID_ALIASES['oe-collection']) #if pids specified, use that list if len(args) != 0: pid_set = list(args) #convert list into dict so both solr and pid list formats are the same pid_set = [{'pid': pid} for pid in pid_set] else: #search for Articles. Only return the pid for each record. try: pid_set = solr.query().filter( content_model=Article.ARTICLE_CONTENT_MODEL).field_limit( 'pid') except Exception as e: if 'is not a valid field name' in e.message: raise CommandError( 'Solr unknown field error ' + '(check that local schema matches running instance)') raise CommandError('Error (%s)' % e.message) try: articles = Paginator(pid_set, 20) counts['total'] = articles.count except Exception as e: self.output(0, "Error paginating items: : %s " % (e.message)) #process all Articles for p in articles.page_range: try: objs = articles.page(p).object_list except Exception as e: #print error and go to next iteration of loop self.output(0, "Error getting page: %s : %s " % (p, e.message)) counts['errors'] += 1 continue for obj in objs: try: article = repo.get_object(type=Article, pid=obj['pid']) if not article.exists: self.output( 1, "Skipping %s because pid does not exist" % obj['pid']) counts['skipped'] += 1 continue else: self.output(0, "Processing %s" % article.pid) # clear out all access_conditions to prep for licens and copyright fields article.descMetadata.content.access_conditions = [] # Remove contentMetadata if empty if article.contentMetadata.exists and article.contentMetadata.content.is_empty( ): if not options['noact']: article.api.purgeDatastream( article.pid, 'contentMetadata', logMessage='Removing empty datastream') self.output( 1, "Removing empty contentMetadata datastream %s" % article.pid) counts['removed'] += 1 elif article.contentMetadata.exists: # Copy License info if available if article.contentMetadata.content.license: article.descMetadata.content.create_license() article.descMetadata.content.license.text = article.contentMetadata.content.license.text article.descMetadata.content.license.link = article.contentMetadata.content.license.link self.output( 1, "Copying license info to MODS %s" % article.pid) counts['license'] += 1 # Copy License info from copyright secton if available and not in License section elif article.contentMetadata.content.copyright and \ 'creative commons' in article.contentMetadata.content.copyright.lower(): article.descMetadata.content.create_license() article.descMetadata.content.license.text = article.contentMetadata.content.copyright self.output( 1, "Copying license info from Copyright section to MODS for %s" % article.pid) counts['copyright_license'] += 1 # Copy Copyright info if available if article.contentMetadata.content.copyright: article.descMetadata.content.create_copyright() article.descMetadata.content.copyright.text = article.contentMetadata.content.copyright self.output( 1, "Copying copyright info to MODS %s" % article.pid) counts['copyright'] += 1 # Add to collection article.collection = coll self.output( 1, "Adding %s to collection %s" % (article.pid, coll.pid)) counts['collection'] += 1 # Add itemID for OAI # if article.is_published: # article.oai_itemID = "oai:ark:/25593/%s" % article.noid # self.output(1, "Adding itemID to %s" % article.pid) # counts['itemid']+= 1 # save article if not options['noact']: article.save() except Exception as e: self.output( 0, "Error processing pid: %s : %s " % (obj['pid'], e.message)) counts['errors'] += 1 # summarize what was done self.stdout.write("\n\n") self.stdout.write("Total number selected: %s\n" % counts['total']) self.stdout.write("Removed contentMetadata: %s\n" % counts['removed']) self.stdout.write("Updated License from License section: %s\n" % counts['license']) self.stdout.write("Updated License from Copyright section: %s\n" % counts['copyright_license']) self.stdout.write("Updated Copyright: %s\n" % counts['copyright']) self.stdout.write("Added to collection: %s\n" % counts['collection']) # self.stdout.write("Added itemID: %s\n" % counts['itemid']) self.stdout.write("Skipped: %s\n" % counts['skipped']) self.stdout.write("Errors: %s\n" % counts['errors'])
def handle(self, *args, **options): self.verbosity = int( options['verbosity']) # 1 = normal, 0 = minimal, 2 = all self.v_normal = 1 #counters counts = defaultdict(int) #connection to repository #uses default user / pass configured in localsettings.py repo = Repository() #Connection to solr solr = solr_interface() #todays date in the same format as embargo_end date today = datetime.datetime.now().strftime("%Y-%m-%d") #if pids specified, use that list if len(args) != 0: pid_set = list(args) #convert list into dict so both solr and pid list formats are the same pid_set = [{'pid': pid} for pid in pid_set] else: #search for active Articles with an embargo_end date less than today, # and that do not have fulltext field indexed. Only return the pid for each record. try: pid_set = solr.query().filter(content_model=Article.ARTICLE_CONTENT_MODEL, state='A', embargo_end__lt=today).exclude(fulltext__any=True).\ field_limit('pid') except Exception as e: if 'is not a valid field name' in e.message: raise CommandError( 'Solr unknown field error ' + '(check that local schema matches running instance)') raise CommandError('Error (%s)' % e.message) try: expired_embargoes = Paginator(pid_set, 20) counts['total'] = expired_embargoes.count except Exception as e: self.output(0, "Error paginating items: : %s " % (e.message)) #process all expired embargoes for p in expired_embargoes.page_range: try: objs = expired_embargoes.page(p).object_list except Exception as e: #print error and go to next iteration of loop self.output(0, "Error getting page: %s : %s " % (p, e.message)) counts['errors'] += 1 continue for obj in objs: try: article = repo.get_object(type=Article, pid=obj['pid']) if not article.exists: self.output( 1, "Skipping %s because pid does not exist" % obj['pid']) counts['skipped'] += 1 continue #do not try to index items without valid fulltext field data = article.index_data() if 'fulltext' in data and data[ 'fulltext'] != None and data['fulltext'].strip(): self.output(1, "Processing %s" % article.pid) if not options['noact']: solr.add(data) counts['indexed'] += 1 else: self.output( 1, "Skipping %s because fulltext does not exist" % article.pid) counts['skipped'] += 1 except Exception as e: self.output( 0, "Error processing pid: %s : %s " % (obj['pid'], e.message)) counts['errors'] += 1 # summarize what was done self.stdout.write("Total number selected: %s\n" % counts['total']) self.stdout.write("Indexed: %s\n" % counts['indexed']) self.stdout.write("Skipped: %s\n" % counts['skipped']) self.stdout.write("Errors: %s\n" % counts['errors'])
def handle(self, *args, **options): self.verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all self.v_normal = 1 #counters for script reporting counts = defaultdict(int) #Connection to solr solr = solr_interface() #info today's date will be used to caculate previous quarter today = datetime.datetime.today() current_year = today.year current_month = today.month current_quarter = year_quarter(current_month) self.output(1, "Current month/year: %s/%s quarter: %s" % (current_month, current_year, current_quarter)) if current_quarter == 1: self.year = current_year -1 self.quarter = 4 else: self.year = current_year self.quarter = current_quarter - 1 self.output(1, "Report will run for year: %s quarter: %s" % (self.year, self.quarter)) #start and end dates start_end = { 1 : ('January 1, %s' % self.year, 'March 31, %s' % self.year), 2 : ('April 1, %s' % self.year, 'June 30, %s' % self.year), 3 : ('July 1, %s' % self.year, 'September 30, %s' % self.year), 4 : ('October 1, %s' % self.year, 'December 31, %s' % self.year), } #if netids specified, use that list if len(args) != 0: netid_set = list(args) else: #search for authors with published articles. try: netid_set = solr.query().filter(content_model=Publication.ARTICLE_CONTENT_MODEL,state='A').\ facet_by('owner').paginate(rows=0).execute() except Exception as e: if 'is not a valid field name' in e.message: raise CommandError('Solr unknown field error ' + '(check that local schema matches running instance)') raise CommandError('Error (%s)' % e.message) #get just the netid and thow away the count netid_set = netid_set.facet_counts.facet_fields['owner'] netid_set = [n[0] for n in netid_set] #query solr for all articles for each user for n in netid_set: self.output(1, "Processing user %s" % n) try: article_query = solr.query().filter(content_model=Publication.ARTICLE_CONTENT_MODEL,state='A' , owner=n).field_limit(['pid', 'title']) articles = Paginator(article_query, 5) #change later articles = articles.object_list except Exception as e: self.output.error(0, e.message) continue article_data = self.get_article_data(articles, self.year, self.quarter) #add name and email to article data user = User.objects.filter(username=n) if user: user = user[0] # again should only be 1 record article_data['first_name'] = user.first_name article_data['last_name'] = user.last_name article_data['email'] = user.email article_data['start'] = start_end[self.quarter][0] article_data['end'] = start_end[self.quarter][1] #send the email! self.send_mail(article_data, options)