def main(self): # By the time main() is invoked, arguments are parsed and available as # self.args. If you need more complicated argument validation than # argparse gives you, do it here: if self.args.batch_size < 1: sys.exit('--batch size must be positive') if os.path.exists(self.args.path): sys.exit('Cannot download to %s; file exists' % self.args.path) # Arguments passed to etl.py are also parsed and available as # self.etl_args. Here we use them to figure out the requested course's # namespace. namespace = etl_lib.get_context( self.etl_args.course_url_prefix).get_namespace_name() # Because our models are namespaced, we need to change to the requested # course's namespace when doing datastore reads. with common_utils.Namespace(namespace): # This base query can be modified to add whatever filters you need. query = models.Student.all() students = common_utils.iter_all(query, self.args.batch_size) # Write the results. Done! with open(self.args.path, 'w') as f: for student in students: f.write(student.email) f.write('\n')
def main(self): # By the time main() is invoked, arguments are parsed and available as # self.args. If you need more complicated argument validation than # argparse gives you, do it here: if self.args.batch_size < 1: sys.exit('--batch size must be positive') if os.path.exists(self.args.path): sys.exit('Cannot download to %s; file exists' % self.args.path) # Arguments passed to etl.py are also parsed and available as # self.etl_args. Here we use them to figure out the requested course's # namespace. namespace = etl_lib.get_context( self.etl_args.course_url_prefix).get_namespace_name() # Because our models are namespaced, we need to change to the requested # course's namespace before doing datastore reads or we won't find its # data. Get the current namespace so we can change back when we're done. old_namespace = namespace_manager.get_namespace() try: namespace_manager.set_namespace(namespace) # For this example, we'll only process the first 1000 results. Can # do a keys_only query because the student's email is key.name(). keys = models.Student.all(keys_only=True).fetch(1000) finally: # The current namespace is global state. We must change it back to # the old value no matter what to prevent corrupting datastore # operations that run after us. namespace_manager.set_namespace(old_namespace) # Write the results. Done! with open(self.args.path, 'w') as f: for key in keys: f.write(str(key.name() + '\n'))
def _upload(upload_type, archive_path, course_url_prefix, force_overwrite): _LOG.info("Processing course with URL prefix %s from archive path %s", course_url_prefix, archive_path) context = etl_lib.get_context(course_url_prefix) if not context: _die("No course found with course_url_prefix %s" % course_url_prefix) if upload_type == _TYPE_COURSE: _upload_course(context, archive_path, course_url_prefix, force_overwrite) elif upload_type == _TYPE_DATASTORE: _upload_datastore()
def _upload(upload_type, archive_path, course_url_prefix): _LOG.info('Processing course with URL prefix %s from archive path %s', course_url_prefix, archive_path) context = etl_lib.get_context(course_url_prefix) if not context: _die('No course found with course_url_prefix %s' % course_url_prefix) if upload_type == _TYPE_COURSE: _upload_course(context, archive_path, course_url_prefix) elif upload_type == _TYPE_DATASTORE: _upload_datastore()
def main(self): # By the time main() is invoked, arguments are parsed and available as # self.args. If you need more complicated argument validation than # argparse gives you, do it here: if self.args.batch_size < 1: sys.exit('--batch size must be positive') if not os.path.isdir(self.args.path): sys.exit('Cannot download to %s; Its not a directory' % self.args.path) # Arguments passed to etl.py are also parsed and available as # self.etl_args. Here we use them to figure out the requested course's # namespace. namespace = etl_lib.get_context( self.etl_args.course_url_prefix).get_namespace_name() # Because our models are namespaced, we need to change to the requested # course's namespace before doing datastore reads or we won't find its # data. Get the current namespace so we can change back when we're done. old_namespace = namespace_manager.get_namespace() try: namespace_manager.set_namespace(namespace) app_context = sites.get_app_context_for_namespace(namespace) course = courses.Course(None, app_context=app_context) if not course: return units = course.get_units() for unit in units: if unit.type != 'PA': continue content = prog_assignment.ProgAssignmentBaseHandler.get_content( course, unit) f = open( self.args.path + '/' + namespace + '-problem-' + str(unit.unit_id), 'w') f.write(transforms.dumps(content)) finally: # The current namespace is global state. We must change it back to # the old value no matter what to prevent corrupting datastore # operations that run after us. namespace_manager.set_namespace(old_namespace)
def main(self): # By the time main() is invoked, arguments are parsed and available as # self.args. If you need more complicated argument validation than # argparse gives you, do it here: if self.args.batch_size < 1: sys.exit('--batch size must be positive') if not os.path.isdir(self.args.path): sys.exit('Cannot download to %s; Its not a directory' % self.args.path) # Arguments passed to etl.py are also parsed and available as # self.etl_args. Here we use them to figure out the requested course's # namespace. namespace = etl_lib.get_context( self.etl_args.course_url_prefix).get_namespace_name() file_dict = dict() # Because our models are namespaced, we need to change to the requested # course's namespace before doing datastore reads or we won't find its # data. Get the current namespace so we can change back when we're done. old_namespace = namespace_manager.get_namespace() try: namespace_manager.set_namespace(namespace) # For this example, we'll only process the first 1000 results. Can # do a keys_only query because the student's email is key.name(). for sub in iter_all(student_work.Submission.all()): print sub.key().name() unit_id = sub.unit_id if unit_id not in file_dict: f = open( self.args.path + '/' + namespace + '-' + str(unit_id), 'w') file_dict[unit_id] = f data = dict() data['unit_id'] = unit_id data['user'] = sub.key().name() data['code'] = sub.contents file_dict[unit_id].write(transforms.dumps(data) + '\n') finally: # The current namespace is global state. We must change it back to # the old value no matter what to prevent corrupting datastore # operations that run after us. namespace_manager.set_namespace(old_namespace)
def main(self): # By the time main() is invoked, arguments are parsed and available as # self.args. If you need more complicated argument validation than # argparse gives you, do it here: if not os.path.exists(self.args.path): sys.exit('%s does not exist' % self.args.path) # Arguments passed to etl.py are also parsed and available as # self.etl_args. Here we use them to figure out the requested course's # context. context = etl_lib.get_context(self.etl_args.course_url_prefix) # Create the absolute path we'll write to. remote_path = os.path.join( appengine_config.BUNDLE_ROOT, self.args.target) with open(self.args.path) as f: # Perform the write using the context's filesystem. In a real # program you'd probably want to do additional work (preventing # overwrites of existing files, etc.). context.fs.impl.put(remote_path, f, is_draft=False)
def _get_app_context_or_die(cls, course_url_prefix): app_context = etl_lib.get_context(course_url_prefix) if not app_context: _die('Unable to find course with url prefix ' + course_url_prefix) return app_context