def setUp(self): super(TestMigrateDates, self).setUp() self.path = 'old-pizza' self.project = ProjectFactory() self.node_settings = self.project.get_addon('osfstorage') self.node_file = NodeFile(path=self.path) self.node_file.save() self.date = self.node_file.date_modified self.project.files_versions['old_pizza'] = [self.node_file._id] self.project.save() self.version = FileVersionFactory( date_modified=datetime.datetime.now()) self.record = OsfStorageFileRecord.get_or_create( self.node_file.path, self.node_settings) self.record.versions = [self.version] self.record.save()
def migrate_node(node, dry_run=True): """Migrate legacy files for a node. If the git repo for the node is corrupt, attempt to use its source node (registration or fork) instead. """ logger.info('Migrating node {0}'.format(node._id)) node_settings = node.get_or_add_addon('osfstorage', auth=None, log=False) repo_intact = check_node(node) source_node = None if not repo_intact: logger.warn( 'Original node {0} is corrupt; attempting to recover'.format( node._id)) source_node = get_source_node(node) if source_node is None: logger.error( 'Could not identify source node for recovery on node {0}'. format(node._id)) for path, versions in node.files_versions.iteritems(): for idx, version in enumerate(versions): try: node_file = NodeFile.load(version) migrate_version(idx, node_file, node_settings, node=source_node, dry_run=dry_run) except Exception as error: logger.error('Could not migrate object {0} on node {1}'.format( version, node._id)) logger.exception(error) break
def list_file_paths(**kwargs): node_to_use = kwargs['node'] or kwargs['project'] return {'files': [ NodeFile.load(fid).path for fid in node_to_use.files_current.values() ]}
def test_migrate_incomplete(self): node_settings = self.project.get_or_add_addon('osfstorage', auth=None, log=False) record = model.OsfStorageFileRecord.get_or_create('pizza.md', node_settings) node_file = NodeFile.load(self.project.files_versions['pizza_md'][0]) content, _ = self.project.read_file_object(node_file) file_pointer = StringIO(content) hash_str = scripts_settings.UPLOAD_PRIMARY_HASH(content).hexdigest() record.create_pending_version(node_file.uploader, hash_str) main(dry_run=False)
def file_info(auth, fid, **kwargs): versions = [] node = kwargs['node'] or kwargs['project'] file_name = fid file_name_clean = urlsafe_filename(file_name) files_page_url = node.web_url_for('collect_file_trees') latest_download_url = None api_url = None anonymous = has_anonymous_link(node, auth) try: files_versions = node.files_versions[file_name_clean] except KeyError: raise HTTPError(http.NOT_FOUND) latest_version_number = get_latest_version_number(file_name_clean, node) + 1 for idx, version in enumerate(list(reversed(files_versions))): node_file = NodeFile.load(version) number = len(files_versions) - idx unique, total = get_basic_counters('download:{}:{}:{}'.format( node._primary_key, file_name_clean, number, )) download_url = node_file.download_url(node) api_url = node_file.api_url(node) versions.append({ 'file_name': file_name, 'download_url': download_url, 'version_number': number, 'display_number': number if idx > 0 else 'current', 'modified_date': node_file.date_uploaded.strftime('%Y/%m/%d %I:%M %p'), 'downloads': total if total else 0, 'committer_name': privacy_info_handle( node_file.uploader.fullname, anonymous, name=True ), 'committer_url': privacy_info_handle(node_file.uploader.url, anonymous), }) if number == latest_version_number: latest_download_url = download_url return { 'node_title': node.title, 'file_name': file_name, 'versions': versions, 'registered': node.is_registration, 'urls': { 'api': api_url, 'files': files_page_url, 'latest': { 'download': latest_download_url, }, } }
class TestMigrateDates(OsfTestCase): def setUp(self): super(TestMigrateDates, self).setUp() self.path = 'old-pizza' self.project = ProjectFactory() self.node_settings = self.project.get_addon('osfstorage') self.node_file = NodeFile(path=self.path) self.node_file.save() self.date = self.node_file.date_modified self.project.files_versions['old_pizza'] = [self.node_file._id] self.project.save() self.version = FileVersionFactory(date_modified=datetime.datetime.now()) self.record = OsfStorageFileRecord.get_or_create(self.node_file.path, self.node_settings) self.record.versions = [self.version] self.record.save() def test_migrate_dates(self): assert_not_equal(self.version.date_modified, self.date) main(dry_run=False) assert_equal(self.version.date_modified, self.date)
class TestMigrateDates(OsfTestCase): def setUp(self): super(TestMigrateDates, self).setUp() self.path = 'old-pizza' self.project = ProjectFactory() self.node_settings = self.project.get_addon('osfstorage') self.node_file = NodeFile(path=self.path) self.node_file.save() self.date = self.node_file.date_modified self.project.files_versions['old_pizza'] = [self.node_file._id] self.project.save() self.version = FileVersionFactory( date_modified=datetime.datetime.now()) self.record = OsfStorageFileRecord.get_or_create( self.node_file.path, self.node_settings) self.record.versions = [self.version] self.record.save() def test_migrate_dates(self): assert_not_equal(self.version.date_modified, self.date) main(dry_run=False) assert_equal(self.version.date_modified, self.date)
def setUp(self): super(TestMigrateDates, self).setUp() self.path = 'old-pizza' self.project = ProjectFactory() self.node_settings = self.project.get_addon('osfstorage') self.node_file = NodeFile(path=self.path) self.node_file.save() self.date = self.node_file.date_modified self.project.files_versions['old_pizza'] = [self.node_file._id] self.project.save() self.version = FileVersionFactory(date_modified=datetime.datetime.now()) self.record = OsfStorageFileRecord.get_or_create(self.node_file.path, self.node_settings) self.record.versions = [self.version] self.record.save()
def migrate_node(node, dry_run=True): node_settings = node.get_addon('osfstorage') for path, versions in node.files_versions.iteritems(): for idx, version in enumerate(versions): logger.info('Migrating file {0}, version {1} on node {2}'.format(path, idx, node._id)) if dry_run: continue try: node_file = NodeFile.load(version) record = OsfStorageFileRecord.find_by_path(node_file.path, node_settings) migrate_version(idx, node_file, record) except Exception as error: logger.error('Could not migrate object {0} on node {1}'.format(version, node._id)) logger.exception(error) break
def test_view_creates_guid(self): guid_fid = 'unique' guid_content = 'snowflake' self._upload_file(guid_fid, guid_content) node_file = NodeFile.load(self.project.files_current[guid_fid]) guid_count = OsfGuidFile.find().count() # View file for the first time url = node_file.url(self.project) res = self.app.get( url, auth=self.user.auth, ).follow( auth=self.user.auth, ) guid = OsfGuidFile.find_one( Q('node', 'eq', self.project) & Q('name', 'eq', guid_fid) ) # GUID count has been incremented by one assert_equal( OsfGuidFile.find().count(), guid_count + 1 ) # Client has been redirected to GUID assert_equal( res.request.path.strip('/'), guid._id, ) # View file for the second time self.app.get( url, auth=self.user.auth, ).follow( auth=self.user.auth, ) # GUID count has not been incremented assert_equal( OsfGuidFile.find().count(), guid_count + 1 )
def migrate_node(node, dry_run=True): node_settings = node.get_addon('osfstorage') for path, versions in node.files_versions.iteritems(): for idx, version in enumerate(versions): logger.info('Migrating file {0}, version {1} on node {2}'.format( path, idx, node._id)) if dry_run: continue try: node_file = NodeFile.load(version) record = OsfStorageFileRecord.find_by_path( node_file.path, node_settings) migrate_version(idx, node_file, record) except Exception as error: logger.error('Could not migrate object {0} on node {1}'.format( version, node._id)) logger.exception(error) break
def get_osffiles_hgrid(node_settings, auth, **kwargs): node = node_settings.owner can_edit = node.can_edit(auth) and not node.is_registration can_view = node.can_view(auth) info = [] if can_view: for name, fid in node.files_current.iteritems(): fobj = NodeFile.load(fid) item = { rubeus.KIND: rubeus.FILE, 'name': _clean_file_name(fobj.path), 'urls': { 'view': fobj.url(node), 'download': fobj.download_url(node), 'delete': fobj.api_url(node), }, 'permissions': { 'view': True, 'edit': can_edit, }, 'downloads': fobj.download_count(node), 'size': [ float(fobj.size), rubeus.format_filesize(fobj.size), ], 'dates': { 'modified': [ time.mktime(fobj.date_modified.timetuple()), fobj.date_modified.strftime('%Y/%m/%d %I:%M %p') ], } } info.append(item) return info
def get_osffiles(auth, **kwargs): node_settings = kwargs['node_addon'] node = node_settings.owner can_view = node.can_view(auth) info = [] if can_view: for name, fid in node.files_current.iteritems(): fobj = NodeFile.load(fid) item = { 'name': _clean_file_name(fobj.path), 'download': fobj.download_url(node), 'size': rubeus.format_filesize(fobj.size), 'date_modified': fobj.date_modified.strftime('%Y/%m/%d %I:%M %p'), 'versions': node.files_versions[name] } info.append(item) return info
def migrate_node(node, dry_run=True): """Migrate legacy files for a node. If the git repo for the node is corrupt, attempt to use its source node (registration or fork) instead. """ logger.info('Migrating node {0}'.format(node._id)) node_settings = node.get_or_add_addon('osfstorage', auth=None, log=False) repo_intact = check_node(node) source_node = None if not repo_intact: logger.warn('Original node {0} is corrupt; attempting to recover'.format(node._id)) source_node = get_source_node(node) if source_node is None: logger.error('Could not identify source node for recovery on node {0}'.format(node._id)) for path, versions in node.files_versions.iteritems(): for idx, version in enumerate(versions): try: node_file = NodeFile.load(version) migrate_version(idx, node_file, node_settings, node=source_node, dry_run=dry_run) except Exception as error: logger.error('Could not migrate object {0} on node {1}'.format(version, node._id)) logger.exception(error) break
def test_view_creates_guid(self): guid_fid = 'unique' guid_content = 'snowflake' self._upload_file(guid_fid, guid_content) node_file = NodeFile.load(self.project.files_current[guid_fid]) guid_count = OsfGuidFile.find().count() # View file for the first time url = node_file.url(self.project) res = self.app.get( url, auth=self.user.auth, ).follow(auth=self.user.auth, ) guid = OsfGuidFile.find_one( Q('node', 'eq', self.project) & Q('name', 'eq', guid_fid)) # GUID count has been incremented by one assert_equal(OsfGuidFile.find().count(), guid_count + 1) # Client has been redirected to GUID assert_equal( res.request.path.strip('/'), guid._id, ) # View file for the second time self.app.get( url, auth=self.user.auth, ).follow(auth=self.user.auth, ) # GUID count has not been incremented assert_equal(OsfGuidFile.find().count(), guid_count + 1)
def test_guid_url_returns_404(self): f = NodeFile() f.save() url = '/{}/'.format(f._id) res = self.app.get(url, expect_errors=True) assert_equal(res.status_code, 404)
def view_file(auth, **kwargs): node_settings = kwargs['node_addon'] node = kwargs['node'] or kwargs['project'] file_name = kwargs['fid'] file_name_clean = file_name.replace('.', '_') try: guid = OsfGuidFile.find_one( Q('node', 'eq', node) & Q('name', 'eq', file_name) ) except: guid = OsfGuidFile( node=node, name=file_name, ) guid.save() redirect_url = check_file_guid(guid) if redirect_url: return redirect(redirect_url) # Throw 404 and log error if file not found in files_versions try: file_id = node.files_versions[file_name_clean][-1] except KeyError: logger.error('File {} not found in files_versions of component {}.'.format( file_name_clean, node._id )) raise HTTPError(http.NOT_FOUND) file_object = NodeFile.load(file_id) # Ensure NodeFile is attached to Node; should be fixed by actions or # improved data modeling in future if not file_object.node: file_object.node = node file_object.save() download_url = file_object.download_url(node) render_url = file_object.render_url(node) info_url = file_object.info_url(node) file_path = os.path.join( settings.UPLOADS_PATH, node._primary_key, file_name ) # Throw 404 and log error if file not found on disk if not os.path.isfile(file_path): logger.error('File {} not found on disk.'.format(file_path)) raise HTTPError(http.NOT_FOUND) _, file_ext = os.path.splitext(file_path.lower()) # Get or create rendered file cache_file = get_cache_file( file_object.filename, file_object.latest_version_number(node) ) rendered = get_cache_content( node_settings, cache_file, start_render=True, file_path=file_path, file_content=None, download_path=download_url, ) rv = { 'file_name': file_name, 'render_url': render_url, 'rendered': rendered, 'info_url': info_url, } rv.update(_view_project(node, auth)) return rv
if not person: continue if person._id not in contrib: contrib[person._id] = [] for neighbor in project.contributors: if not neighbor: continue if neighbor._id not in contrib[person._id]: contrib[person._id].append(neighbor._id) unique, total = get_basic_counters('node:' + str(project._id)) if total: number_views_total += total number_views_unique += unique for k,v in project.files_versions.iteritems(): for i, f in enumerate(v): fi = NodeFile.load(f) unique, total = get_basic_counters('download:' + str(project._id) + ':' + fi.path.replace('.', '_')) if total: number_downloads_total += total number_downloads_unique += unique print "number_users" , number_users print "number_projects" , number_projects print "number_projects_public" , number_projects_public print "number_projects_forked" , number_projects_forked print "number_projects_registered", number_projects_registered print "number_downloads_total" , number_downloads_total print "number_downloads_unique" , number_downloads_unique print "number_views_total" , number_views_total print "number_views_unique" , number_views_unique
if not person: continue if person._id not in contrib: contrib[person._id] = [] for neighbor in project.contributors: if not neighbor: continue if neighbor._id not in contrib[person._id]: contrib[person._id].append(neighbor._id) unique, total = get_basic_counters('node:' + str(project._id)) if total: number_views_total += total number_views_unique += unique for k, v in project.files_versions.iteritems(): for i, f in enumerate(v): fi = NodeFile.load(f) unique, total = get_basic_counters('download:' + str(project._id) + ':' + fi.path.replace('.', '_')) if total: number_downloads_total += total number_downloads_unique += unique print "number_users", number_users print "number_projects", number_projects print "number_projects_public", number_projects_public print "number_projects_forked", number_projects_forked print "number_projects_registered", number_projects_registered print "number_downloads_total", number_downloads_total print "number_downloads_unique", number_downloads_unique print "number_views_total", number_views_total print "number_views_unique", number_views_unique
def view_file(auth, **kwargs): node_settings = kwargs['node_addon'] node = kwargs['node'] or kwargs['project'] file_name = kwargs['fid'] file_name_clean = file_name.replace('.', '_') try: guid = OsfGuidFile.find_one( Q('node', 'eq', node) & Q('name', 'eq', file_name) ) except: guid = OsfGuidFile( node=node, name=file_name, ) guid.save() redirect_url = check_file_guid(guid) if redirect_url: return redirect(redirect_url) # Throw 404 and log error if file not found in files_versions try: file_id = node.files_versions[file_name_clean][-1] except KeyError: logger.error('File {} not found in files_versions of component {}.'.format( file_name_clean, node._id )) raise HTTPError(http.NOT_FOUND) file_object = NodeFile.load(file_id) # Ensure NodeFile is attached to Node; should be fixed by actions or # improved data modeling in future if not file_object.node: file_object.node = node file_object.save() download_url = file_object.download_url(node) render_url = file_object.render_url(node) info_url = file_object.info_url(node) file_path = os.path.join( settings.UPLOADS_PATH, node._primary_key, file_name ) # Throw 404 and log error if file not found on disk if not os.path.isfile(file_path): logger.error('File {} not found on disk.'.format(file_path)) raise HTTPError(http.NOT_FOUND) _, file_ext = os.path.splitext(file_path.lower()) # Get or create rendered file cache_file = get_cache_file( file_object.filename, file_object.latest_version_number(node) ) rendered = get_cache_content( node_settings, cache_file, start_render=True, file_path=file_path, file_content=None, download_path=download_url, ) ret = { 'file_name': file_name, 'render_url': render_url, 'rendered': rendered, 'info_url': info_url, } ret.update(_view_project(node, auth)) return ret