class Company(semantictags.Taggable): id = models.AutoField(primary_key=True) name = sharedstrings.SharedStringField() # name = models.ForeignKey("sharedstrings.Strings",related_name="+",on_delete=models.PROTECT) case_sensitive_name = models.BooleanField(null=False, blank=False, default=False) parent = models.ForeignKey("self", on_delete=models.PROTECT, null=True, blank=True, related_name="children") website = models.ForeignKey("retconpeople.Website", on_delete=models.PROTECT, null=True, blank=True) defunct = models.BooleanField(null=True, blank=True) external_representations = models.ManyToManyField( "remotables.ContentResource", related_name="+", blank=True) def __str__(self): return self.name.name def pull_associated_people(self): raise NotImplementedError() def pull_associated_works(self): raise NotImplementedError() class Meta: ordering = ['name__name']
class Character(models.Model): name = sharedstrings.SharedStringField() description = models.CharField(max_length=64) franchise = models.ForeignKey("Franchise", on_delete=models.SET_NULL, null=True, blank=True) def __str__(self): return "{}:{}".format(self.name, self.description)
class Genre(models.Model): name = sharedstrings.SharedStringField() decription = models.CharField(max_length=128) parent = models.ForeignKey("self", null=True, blank=True, on_delete=models.PROTECT) def __str__(self): return '{}'.format(self.name) class Meta: ordering = ('name', )
class UserName(UserLabel): id = models.AutoField(primary_key=True) website = models.ForeignKey("Website", related_name="user_names", on_delete=models.PROTECT, null=False) name = sharedstrings.SharedStringField(null=False) tags = models.ManyToManyField("semantictags.Tag", related_name="+") belongs_to = models.ForeignKey("Person", related_name='usernames', on_delete=models.CASCADE, null=True, blank=True) def get_url(self): raise NotImplementedError() def __str__(self): return "{}@{}".format(self.name, str(self.website)) class Meta: unique_together = ['website', 'name']
class Website(models.Model): BRIEF_TRUNCATE_LENGTH = 100 #TODO get from settings id = models.AutoField(primary_key=True) parent_site = models.ForeignKey("self", on_delete=models.DO_NOTHING, null=True, blank=True, related_name="child_sites") domain = models.CharField(max_length=256, help_text="e.g. twitter.com", unique=True) #A domain should consist only of tld and name not subdomain name = sharedstrings.SharedStringField() tld = sharedstrings.SharedStringField() user_id_format_string = models.CharField(max_length=1024, null=True, blank=True) description = models.CharField(max_length=255) tags = models.ManyToManyField("semantictags.Tag", related_name="+", blank=True) def save(self, *args, **kwargs): if self.tld is None: try: self.auto_replace_tld() except: pass #if tldextract not present silently fail or error if isinstance(self.name, str): value, created = sharedstrings.Strings.objects.get_or_create( name=self.name) self.name = value super().save(*args, **kwargs) # Call the "real" save() method. def calculate_tld(self): import tldextract #this import is intentionally here as an optional requirement return tldextract.extract(self.domain).suffix def auto_replace_tld(self): value = self.calculate_tld() value, created = sharedstrings.Strings.objects.get_or_create( name=value) self.tld = value def substitute_username_pattern(self, args): raise NotImplementedError() def substitute_user_number_pattern(self, args): raise NotImplementedError() @classmethod def parse_url(cls, url): raise NotImplementedError() #get patterns test patterns #test #if is int produce a Usernummber #else produce a username #note this pair may already exist so get_or_create #TODO on save update url pattern cache @classmethod def reload_pattern_cache(cls): raise NotImplementedError() @classmethod def list_patterns(cls): raise NotImplementedError() def parent_site_name(self): '''Used for display in tabular views''' return self.parent_site.domain if self.parent_site else None def brief(self): s = self.description if len(self.description) >= self.BRIEF_TRUNCATE_LENGTH: s = s[:self.BRIEF_TRUNCATE_LENGTH] return s + '…' else: return s def __str__(self): return "{} ({})".format(self.name, self.domain)
class Person(models.Model): id = models.AutoField(primary_key=True) first_name = sharedstrings.SharedStringField(blank=True, null=True) last_name = sharedstrings.SharedStringField() pseudonyms = models.ManyToManyField("sharedstrings.Strings", related_name="+", blank=True) description = models.CharField(max_length=512, blank=True) merged_into = models.ForeignKey("self", related_name="merged_from", on_delete=models.DO_NOTHING, null=True, blank=True) distinguish_from = models.ManyToManyField( "self", symmetrical=True, blank=True, help_text="Indicate people of similar names who should not be confused" ) tags = models.ManyToManyField("semantictags.Tag", related_name="+", blank=True) ambiguous_tags = models.ManyToManyField("sharedstrings.Strings", blank=True) uuid = models.UUIDField(default=uuid.uuid4, blank=True, unique=True) external_representations = models.ManyToManyField( "remotables.ContentResource", related_name="+", blank=True) canonicalize = False @property def pseudonyms_readonly(self): return ",".join([x.name for x in self.pseudonyms.all()]) def get_usernames(self): raise NotImplementedError() l = [] for c in self._collect_children(): l.append(c.usernames) return l def _collect_children(self): l = [] for c in self.merges_from: l.append(c._collect_children()) l.append(c) l.append(self) return l def _merge_into(self, target): if self.merged_into is None: self.merged_into = target else: raise ValueError( "Merge cannot be performed because this model is already merged" ) def merge_with(self, target): if target.id < self.id: self._merge_into(target) else: target._merge_into(self) target.save(update_fields=['merged_into']) def clean(self): #Cheap test first, aways need to do, doesn't hit db if self.merged_into_id is not None: if self.merged_into_id == self.id: raise ValidationError( {'merged_into': ('Cannot merge into self')}) if self.merged_into.merged_into.id is not None: raise ValidationError( {'merged_into': ('Cannot merge into merged object')}) if self.pk is not None: #this is an update if self.merged_into is not None: #django normally updates without select, but we need to select current = Person.objects.get(id=self.id) if current.merged_into_id is not None: if not self.canonicalize and (current.merged_into_id != self.merged_into_id): raise ValidationError({ 'merged_into': ('Changing merge target violates integrity.') }) else: #Do a long scan for the current name target = self visited = [] while target.merged_into_id is not None: visited.append(target.id) #check both current and next id so we don't have to do unnecessary fetch if target.id in visited or target.merged_into_id in visited: raise ValidationError({ 'merged_into': ('Cycle detected:{}'.format(visited)) }) target = target.merged_into self.merged_into_id = target.id def initials(self): return self.first_name[0] if self.first_name else "" @property def formatted_name(self, shorten=False): if self.last_name is None: if self.first_name is not None: return self.first_name else: try: #This occasionally causes crashes by infintie recursion when the debugger request formatting #This happens for a currently unknown reason if self.pseudonyms.count() > 0: o = self.pseudonyms.all()[0] return o.name else: un = self.usernames if un.count() > 0: u = un.all()[0] o = u.name return o.name # return 'debug name formate error' except: return "?" else: if self.first_name is not None: if shorten: "{}.{}".format(self.first_name[0].upper(), self.last_name) return "{}, {}".format(self.last_name, self.first_name) @property def brief(self, length=64, include_ellipsis=True): s = self.description if len(s) > length: if include_ellipsis: s = s[0:length - 2] + u"…" else: s = s[0:length - 1] return s def __str__(self): return "{}: {}".format(self.formatted_name, self.brief) def save(self, *args, **kwargs): self.full_clean() super().save(*args, **kwargs) def pull_associated_works(self): raise NotImplementedError() def pull_associated_companies(self): raise NotImplementedError() def wanted_id_count(self): return self.usernames.filter( wanted=True).count() + self.user_numbers.filter( wanted=True).count() class DuplicateIdentityError(ValueError): def __init__(self, identities): self.identities = identities @classmethod def create_from_identifiers(cls, urls=[], user_identifiers=[], fail_on_missing_domain=True): '''returns (Created,Partial,Person,UserLabel[])''' try: person_created = False person_partial = False with transaction.atomic(): name_sites = list(cls.urls_to_name_site_pair(urls)) for x in user_identifiers: if isinstance(x, dict): #unpack dictionary form try: t = (x['name'], x['domain']) name_sites.append(t) except KeyError: raise ValueError('Malformed user_identifiers') else: #(name,domain) form name_sites.append(x) if len(name_sites) == 0: return False #Check if there is an existing person with any of these ids pid = cls.search_by_identifiers(urls=[], user_identifiers=name_sites, expect_single=True) if len(pid) > 0: #if there is unpack them pid = pid[0] else: #if there isn't create a new one person_created = True pid = Person() pid.save() for name, domain in name_sites: site = domain if isinstance(site, str): try: site = Website.objects.get(domain=site) except ObjectDoesNotExist as e: if fail_on_missing_domain: raise e else: person_partial = True continue try: name = int(name) un, l_created = UserNumber.objects.get_or_create( number=name, website=site) un.save() pid.user_numbers.add(un) except ValueError: ns, l_created = sharedstrings.Strings.objects.get_or_create( name=name) ns.save() un, l_created = UserName.objects.get_or_create( name=ns, website=site) un.save() pid.usernames.add(un) pid.save() return (person_created, person_partial, pid) except Person.DuplicateIdentityError as e: raise e @classmethod def urls_to_name_site_pair(cls, urls=[]): if len(urls) > 0: user_url_patterns = UrlPattern.objects.all() #TODO maybe optimal to filter domainname first at scale for p in user_url_patterns: #NB match only looks at the start of the string but this is optimal #since we're looking at full urls for url in urls: out = re.match(re.compile(p.pattern), url) if out is None: continue name = out.group(1) try: name = int(name) t = (name, p.website) yield t except: t = (name, p.website) yield t @classmethod def search_by_identifiers(cls, urls=[], user_identifiers=[], expect_single=False): '''Search for persons with identities exiting early if duplicating and expect_single''' identities = set() names = set() for x in user_identifiers: if isinstance(x, dict): name = x['name'] domain_name = x['domain'] else: name, domain_name = x try: try: name = int(name) if isinstance(domain_name, Website): un = UserNumber.objects.get(number=name, website=domain_name) else: un = UserNumber.objects.get( number=name, website__domain=domain_name) except ValueError: if isinstance(domain_name, Website): un = UserName.objects.get(name__name__iexact=name, website=domain_name) else: un = UserName.objects.get(name__name__iexact=name, website__domain=domain_name) except UserNumber.DoesNotExist: #Found no such pair continue except UserName.DoesNotExist: continue identities.add(un.belongs_to) if expect_single and len(identities) > 1: raise Person.DuplicateIdentityError(list(identities)) if len(urls) > 0: user_url_patterns = UrlPattern.objects.all() #TODO maybe optimal to filter domainname first at scale for p in user_url_patterns: #NB match only looks at the start of the string but this is optimal #since we're looking at full urls regex = re.compile(p.pattern) for url in urls: #do regex match to extract site and identifier. out = re.match(regex, url) if out is None: continue name = out.group(1) try: name = int(name) un = UserNumber.objects.get(number=name, website=p.website) except ObjectDoesNotExist: #The regex matched, but no identity,keep looking at the other urls pass except ValueError: try: un = UserName.objects.get(name__name__iexact=name, website=p.website) identities.add(un.belongs_to) except ObjectDoesNotExist: #The regex matched, but no identity,keep looking at the other urls pass if expect_single and len(identities) > 1: raise Person.DuplicateIdentityError(list(identities)) #sanity check if expect_single and len(identities) > 1: raise Person.DuplicateIdentityError(list(identities)) return list(identities) class Meta: # ordering=['id'] pass
class Franchise(models.Model): name = sharedstrings.SharedStringField() description = models.CharField(max_length=64)