def create_from_subjects_acceptable(custom_provider, add_missing=False, missing=None): tries = 0 subjects_to_copy = list(rules_to_subjects(custom_provider.subjects_acceptable)) if missing and add_missing: subjects_to_copy = subjects_to_copy + missing while len(subjects_to_copy): previous_len = len(subjects_to_copy) tries += 1 if tries == 10: raise RuntimeError('Unable to map subjects acceptable with 10 iterations -- subjects remaining: {}'.format(subjects_to_copy)) for subj in list(subjects_to_copy): if map_custom_subject(custom_provider, subj.text, subj.parent.text if subj.parent else None, subj.text): subjects_to_copy.remove(subj) elif add_missing and subj.parent and subj.parent not in subjects_to_copy: # Dirty subjects_to_copy.append(subj.parent) previous_len += 1 else: logger.warn('Failed. Retrying next iteration') new_len = len(subjects_to_copy) if new_len == previous_len: raise RuntimeError('Unable to map any custom subjects on iteration -- subjects remaining: {}'.format(subjects_to_copy))
def test_rules_to_subjects(self): rules = [[[self.parent_one._id, self.child_one_1._id], False]] subject_queryset_ideal = Subject.objects.filter( Q(id=self.parent_one.id) | Q(id=self.child_one_1.id)) returned_subjects = rules_to_subjects(rules) self.assertItemsEqual(subject_queryset_ideal, returned_subjects)
def test_rules_to_subjects(self): rules = [ [[self.parent_one._id, self.child_one_1._id], False] ] subject_queryset_ideal = Subject.objects.filter(Q(id=self.parent_one.id) | Q(id=self.child_one_1.id)) returned_subjects = rules_to_subjects(rules) self.assertItemsEqual(subject_queryset_ideal, returned_subjects)
def validate_input(custom_provider, data, provider_type='osf.preprintprovider', copy=False, add_missing=False): # This function may be run outside of this command (e.g. in the admin app) so we # need to make sure that BEPRESS_PROVIDER is set global BEPRESS_PROVIDER BEPRESS_PROVIDER = AbstractProvider.objects.filter(_id='osf', type='osf.preprintprovider').first() logger.info('Validating data') includes = data.get('include', []) excludes = data.get('exclude', []) customs = data.get('custom', {}) merges = data.get('merge', {}) if copy: included_subjects = rules_to_subjects(custom_provider.subjects_acceptable) else: assert not set(includes) & set(excludes), 'There must be no overlap between includes and excludes' for text in includes: assert Subject.objects.filter(provider=BEPRESS_PROVIDER, text=text).exists(), 'Unable to find included subject with text {}'.format(text) included_subjects = Subject.objects.filter(provider=BEPRESS_PROVIDER, text__in=includes).include_children() logger.info('Successfully validated `include`') for text in excludes: try: Subject.objects.get(provider=BEPRESS_PROVIDER, text=text) except Subject.DoesNotExist: raise RuntimeError('Unable to find excluded subject with text {}'.format(text)) assert included_subjects.filter(text=text).exists(), 'Excluded subject with text {} was not included'.format(text) included_subjects = included_subjects.exclude(text__in=excludes) logger.info('Successfully validated `exclude`') for cust_name, map_dict in customs.items(): assert not included_subjects.filter(text=cust_name).exists(), 'Custom text {} already exists in mapped set'.format(cust_name) assert Subject.objects.filter(provider=BEPRESS_PROVIDER, text=map_dict.get('bepress')).exists(), 'Unable to find specified BePress subject with text {}'.format(map_dict.get('bepress')) if map_dict.get('parent'): # Null parent possible assert map_dict['parent'] in set(customs.keys()) | set(included_subjects.values_list('text', flat=True)), 'Unable to find specified parent with text {} in mapped set'.format(map_dict['parent']) # TODO: hierarchy length validation? Probably more trouble than worth here, done on .save logger.info('Successfully validated `custom`') included_subjects = included_subjects | Subject.objects.filter(text__in=[map_dict['bepress'] for map_dict in customs.values()]) for merged_from, merged_into in merges.items(): assert not included_subjects.filter(text=merged_from).exists(), 'Cannot merge subject "{}" that will be included'.format(merged_from) assert merged_into in set(included_subjects.values_list('text', flat=True)) | set(customs.keys()), 'Unable to determine merge target for "{}"'.format(merged_into) included_subjects = included_subjects | Subject.objects.filter(text__in=merges.keys()) missing_subjects = Subject.objects.filter(id__in=set([hier[-1].id for ps in Preprint.objects.filter(provider=custom_provider) for hier in ps.subject_hierarchy])).exclude(id__in=included_subjects.values_list('id', flat=True)) if not add_missing: assert not missing_subjects.exists(), 'Incomplete mapping -- following subjects in use but not included:\n{}'.format(list(missing_subjects.values_list('text', flat=True))) if isinstance(custom_provider, PreprintProvider): assert custom_provider.share_title not in [None, '', 'bepress'], 'share title not set; please set the share title on this provider before creating a custom taxonomy.' logger.info('Successfully validated mapping completeness') return list(missing_subjects) if add_missing else None
def validate_input(custom_provider, data, provider_type='osf.preprintprovider', copy=False, add_missing=False): # This function may be run outside of this command (e.g. in the admin app) so we # need to make sure that BEPRESS_PROVIDER is set global BEPRESS_PROVIDER BEPRESS_PROVIDER = AbstractProvider.objects.filter(_id='osf', type='osf.preprintprovider').first() logger.info('Validating data') includes = data.get('include', []) excludes = data.get('exclude', []) customs = data.get('custom', {}) merges = data.get('merge', {}) if copy: included_subjects = rules_to_subjects(custom_provider.subjects_acceptable) else: assert not set(includes) & set(excludes), 'There must be no overlap between includes and excludes' for text in includes: assert Subject.objects.filter(provider=BEPRESS_PROVIDER, text=text).exists(), 'Unable to find included subject with text {}'.format(text) included_subjects = Subject.objects.filter(provider=BEPRESS_PROVIDER, text__in=includes).include_children() logger.info('Successfully validated `include`') for text in excludes: try: Subject.objects.get(provider=BEPRESS_PROVIDER, text=text) except Subject.DoesNotExist: raise RuntimeError('Unable to find excluded subject with text {}'.format(text)) assert included_subjects.filter(text=text).exists(), 'Excluded subject with text {} was not included'.format(text) included_subjects = included_subjects.exclude(text__in=excludes) logger.info('Successfully validated `exclude`') for cust_name, map_dict in customs.iteritems(): assert not included_subjects.filter(text=cust_name).exists(), 'Custom text {} already exists in mapped set'.format(cust_name) assert Subject.objects.filter(provider=BEPRESS_PROVIDER, text=map_dict.get('bepress')).exists(), 'Unable to find specified BePress subject with text {}'.format(map_dict.get('bepress')) if map_dict.get('parent'): # Null parent possible assert map_dict['parent'] in set(customs.keys()) | set(included_subjects.values_list('text', flat=True)), 'Unable to find specified parent with text {} in mapped set'.format(map_dict['parent']) # TODO: hierarchy length validation? Probably more trouble than worth here, done on .save logger.info('Successfully validated `custom`') included_subjects = included_subjects | Subject.objects.filter(text__in=[map_dict['bepress'] for map_dict in customs.values()]) for merged_from, merged_into in merges.iteritems(): assert not included_subjects.filter(text=merged_from).exists(), 'Cannot merge subject "{}" that will be included'.format(merged_from) assert merged_into in set(included_subjects.values_list('text', flat=True)) | set(customs.keys()), 'Unable to determine merge target for "{}"'.format(merged_into) included_subjects = included_subjects | Subject.objects.filter(text__in=merges.keys()) missing_subjects = Subject.objects.filter(id__in=set([hier[-1].id for ps in PreprintService.objects.filter(provider=custom_provider) for hier in ps.subject_hierarchy])).exclude(id__in=included_subjects.values_list('id', flat=True)) if not add_missing: assert not missing_subjects.exists(), 'Incomplete mapping -- following subjects in use but not included:\n{}'.format(list(missing_subjects.values_list('text', flat=True))) if isinstance(custom_provider, PreprintProvider): assert custom_provider.share_title not in [None, '', 'bepress'], 'share title not set; please set the share title on this provider before creating a custom taxonomy.' logger.info('Successfully validated mapping completeness') return list(missing_subjects) if add_missing else None
def get(self, request, *args, **kwargs): rules = json.loads(request.GET['rules']) all_subjects = rules_to_subjects(rules) return JsonResponse({'subjects': [sub.id for sub in all_subjects]})