def get_file_guids(cls, materialized_path, provider, node=None): guids = [] path = materialized_path.strip('/') file_obj = cls.load(path) if not file_obj: file_obj = TrashedFileNode.load(path) # At this point, file_obj may be an OsfStorageFile, an OsfStorageFolder, or a # TrashedFileNode. TrashedFileNodes do not have *File and *Folder subclasses, since # only osfstorage trashes folders. To search for children of TrashFileNodes # representing ex-OsfStorageFolders, we will reimplement the `children` method of the # Folder class here. if not file_obj.is_file: children = [] if isinstance(file_obj, TrashedFileNode): children = TrashedFileNode.find(Q('parent', 'eq', file_obj._id)) else: children = file_obj.children for item in children: guids.extend(cls.get_file_guids(item.path, provider, node=node)) else: try: guid = Guid.find(Q('referent', 'eq', file_obj))[0] except IndexError: guid = None if guid: guids.append(guid._id) return guids
def restore(self, recursive=True, parent=None): """Recreate a StoredFileNode from the data in this object Will re-point all guids and finally remove itself :raises KeyExistsException: """ data = self.to_storage() data.pop('deleted_on') data.pop('deleted_by') data.pop('suspended') if parent: data['parent'] = parent._id elif data['parent']: # parent is an AbstractForeignField, so it gets stored as tuple data['parent'] = data['parent'][0] restored = FileNode.resolve_class(self.provider, int(self.is_file))(**data) if not restored.parent: raise ValueError('No parent to restore to') restored.save() # repoint guid for guid in Guid.find(Q('referent', 'eq', self)): guid.referent = restored guid.save() if recursive: for child in TrashedFileNode.find(Q('parent', 'eq', self)): child.restore(recursive=recursive, parent=restored) TrashedFileNode.remove_one(self) return restored
def main(): modm_guids = MODMGuid.find() print 'MODM Guids: {}'.format(len(modm_guids)) guids = Guid.objects.filter(guid__in=modm_guids.get_keys()) filtered_count = len(guids) total_count = Guid.objects.count() if len(modm_guids) == filtered_count == total_count: print 'Guids verified!' else: print 'Guids not verified!' print 'Postgres Guids: {}'.format(Guid.objects.count()) guids = modm_guids = filtered_count = total_count = None gc.collect() modm_blacklist_guids = MODMBlacklistGuid.find() print 'MODM BlacklistGuids: {}'.format(len(modm_blacklist_guids)) blacklist_guids = BlackListGuid.objects.filter( guid__in=modm_blacklist_guids.get_keys()) filtered_count = len(blacklist_guids) total_count = BlackListGuid.objects.count() if len(modm_blacklist_guids) == filtered_count == total_count: print 'Blacklist Guids Verified!' else: print 'Blacklist Guids Not Verified!' print 'Postgres Blacklist Guids: {}'.format(BlackListGuid.objects.count()) blacklist_guids = modm_blacklist_guids = filtered_count = total_count = None gc.collect()
def main(): modm_guids = MODMGuid.find() print 'MODM Guids: {}'.format(len(modm_guids)) guids = Guid.objects.filter(guid__in=modm_guids.get_keys()) filtered_count = len(guids) total_count = Guid.objects.count() if len(modm_guids) == filtered_count == total_count: print 'Guids verified!' else: print 'Guids not verified!' print 'Postgres Guids: {}'.format(Guid.objects.count()) guids = modm_guids = filtered_count = total_count = None gc.collect() modm_blacklist_guids = MODMBlacklistGuid.find() print 'MODM BlacklistGuids: {}'.format(len(modm_blacklist_guids)) blacklist_guids = BlackListGuid.objects.filter(guid__in=modm_blacklist_guids.get_keys()) filtered_count = len(blacklist_guids) total_count = BlackListGuid.objects.count() if len(modm_blacklist_guids) == filtered_count == total_count: print 'Blacklist Guids Verified!' else: print 'Blacklist Guids Not Verified!' print 'Postgres Blacklist Guids: {}'.format(BlackListGuid.objects.count()) blacklist_guids = modm_blacklist_guids = filtered_count = total_count = None gc.collect()
def get_guid(self): """Attempt to find a Guid that points to this object. :rtype: Guid or None """ try: # Note sometimes multiple GUIDs can exist for # a single object. Just go with the first one return Guid.find(Q('referent', 'eq', self))[0] except IndexError: return None
def get_guid(self, create=False): """Attempt to find a Guid that points to this object. One will be created if requested. :rtype: Guid """ try: # Note sometimes multiple GUIDs can exist for # a single object. Just go with the first one return Guid.find(Q('referent', 'eq', self))[0] except IndexError: if not create: return None return Guid.generate(self)
def get_targets(): """Find GUIDs with no referents and GUIDs with referents that no longer exist.""" # Use a loop because querying MODM with Guid.find(Q('referent', 'eq', None)) # only catches the first case. ret = [] # NodeFiles were once a GuidStored object and are no longer used any more. # However, they still exist in the production database. We just skip over them # for now, but they can probably need to be removed in the future. # There were also 10 osfguidfile objects that lived in a corrupt repo that # were not migrated to OSF storage, so we skip those as well. /sloria /jmcarp for each in Guid.find(Q('referent.1', 'nin', ['nodefile', 'osfguidfile'])): if each.referent is None: logger.info('GUID {} has no referent.'.format(each._id)) ret.append(each) return ret
def get_guid(self, create=False): """Attempt to find a Guid that points to this object. One will be created if requested. :param Boolean create: Should we generate a GUID if there isn't one? Default: False :rtype: Guid or None """ try: # Note sometimes multiple GUIDs can exist for # a single object. Just go with the first one return Guid.find(Q("referent", "eq", self))[0] except IndexError: if not create: return None return Guid.generate(self)
def get_file_guids(cls, materialized_path, provider, node=None, guids=None): guids = guids or [] path = materialized_path.strip('/') file_obj = cls.load(path) if not file_obj: file_obj = TrashedFileNode.load(path) if not file_obj.is_file: for item in file_obj.children: cls.get_file_guids(item.path, provider, node=node, guids=guids) else: try: guid = Guid.find(Q('referent', 'eq', file_obj))[0] except IndexError: guid = None if guid: guids.append(guid._id) return guids
def main(): modm_guids = MODMGuid.find() total = len(modm_guids) count = 0 page_size = 10000 print 'Migrating {} Guids'.format(total) django_guids = [] for guid in modm_guids.get_keys(): django_guids.append(Guid(guid=guid)) count += 1 if count % page_size == 0: print count print 'Saving {} Guids'.format(len(django_guids)) Guid.objects.bulk_create(django_guids) print 'Django Guids {}\nMODM Guids {}'.format(Guid.objects.all().count(), total)
def _repoint_guids(self, updated): for guid in Guid.find(Q('referent', 'eq', self)): guid.referent = updated guid.save()
def get_targets(): """Find GUIDs with no referents and GUIDs with referents that no longer exist.""" # Use a list comp because querying MODM with Guid.find(Q('referent', 'eq', None)) # only catches the first case. return [each for each in Guid.find() if each.referent is None]
def _repoint_guids(self, updated): for guid in Guid.find(Q("referent", "eq", self)): guid.referent = updated guid.save()
def get_targets(): return Guid.find(QUERY)