def _parse_form_xfer(xfer, user_args, client_id, configuration): """Parse xfer request (i.e. copy, move or upload) file/dir entries from user_args. """ _logger = configuration.logger files, rejected = [], [] i = 0 client_dir = client_id_dir(client_id) base_dir = os.path.abspath(os.path.join(configuration.user_home, client_dir)) + os.sep xfer_pattern = 'freeze_%s_%%d' % xfer for i in xrange(max_freeze_files): if user_args.has_key(xfer_pattern % i): source_path = user_args[xfer_pattern % i][-1].strip() source_path = os.path.normpath(source_path).lstrip(os.sep) _logger.debug('found %s entry: %s' % (xfer, source_path)) if not source_path: continue try: valid_path(source_path) except Exception, exc: rejected.append('invalid path: %s (%s)' % (source_path, exc)) continue # IMPORTANT: path must be expanded to abs for proper chrooting abs_path = os.path.abspath( os.path.join(base_dir, source_path)) # Prevent out-of-bounds, and restrict some greedy targets if not valid_user_path(configuration, abs_path, base_dir, True): _logger.error('found illegal directory traversal %s entry: %s' % (xfer, source_path)) rejected.append('invalid path: %s (%s)' % (source_path, 'illegal path!')) continue elif os.path.exists(abs_path) and os.path.samefile(abs_path, base_dir): _logger.warning('refusing archival of entire user home %s: %s' % (xfer, source_path)) rejected.append('invalid path: %s (%s)' % (source_path, 'entire home not allowed!')) continue elif in_vgrid_share(configuration, abs_path) == source_path: _logger.warning( 'refusing archival of entire vgrid shared folder %s: %s' % (xfer, source_path)) rejected.append('invalid path: %s (%s)' % (source_path, 'entire %s share not allowed!' % configuration.site_vgrid_label)) continue # expand any dirs recursively if os.path.isdir(abs_path): for (root, dirnames, filenames) in os.walk(abs_path): for subname in filenames: abs_sub = os.path.join(root, subname) sub_base = root.replace(abs_path, source_path) sub_path = os.path.join(sub_base, subname) files.append((abs_sub, sub_path.lstrip(os.sep))) else: files.append((abs_path, source_path.lstrip(os.sep)))
def parse_form_upload(user_args, client_id, configuration, base_dir): """Parse upload file and chunk entries from user_args. Chunk limits are extracted from content-range http header in environment. Files are considered to be inside uplad tmp dir inside base_dir. """ files, rejected = [], [] logger = configuration.logger cache_dir = os.path.join(base_dir, upload_tmp_dir) + os.sep # TODO: we only support single filename and chunk for now; extend? #for name_index in xrange(max_upload_files): # if user_args.has_key(filename_field) and \ # len(user_args[filename_field]) > name_index: for name_index in [0]: if user_args.has_key(filename_field): if isinstance(user_args[filename_field], basestring): filename = user_args[filename_field] else: filename = user_args[filename_field][name_index] logger.info('found name: %s' % filename) else: # No more files break if not filename.strip(): continue try: filename = strip_dir(filename) valid_path(filename) except Exception, exc: logger.error('invalid filename: %s' % filename) rejected.append((filename, 'invalid filename: %s (%s)' % \ (filename, exc))) continue rel_path = os.path.join(upload_tmp_dir, filename) real_path = os.path.abspath(os.path.join(base_dir, rel_path)) if not valid_user_path(real_path, cache_dir, True): logger.error('%s tried to access restricted path %s ! (%s)' % (client_id, real_path, cache_dir)) rejected.append("Invalid path (%s expands to an illegal path)" \ % filename) continue #for chunk_index in xrange(max_upload_chunks): # if user_args.has_key(files_field) and \ # len(user_args[files_field]) > chunk_index: for chunk_index in [0]: if user_args.has_key(files_field): chunk = user_args[files_field][chunk_index] else: break configuration.logger.debug('find chunk range: %s' % filename) (chunk_first, chunk_last) = extract_chunk_region(configuration) if len(chunk) > upload_block_size: configuration.logger.error('skip bigger than allowed chunk') continue elif chunk_last < 0: chunk_last = len(chunk) - 1 files.append((rel_path, (chunk, chunk_first, chunk_last)))
def do_GET(self): """Handle all HTTP GET requests""" try: self.parsed_uri = urlparse(self.path) self.query = {} for (key, val) in cgi.parse_qsl(self.parsed_uri[4]): # print "DEBUG: checking input arg %s: '%s'" % (key, val) validate_helper = self.validators.get(key, invalid_argument) # Let validation errors pass to general exception handler below validate_helper(val) self.query[key] = val self.setUser() # print "DEBUG: checking path '%s'" % self.parsed_uri[2] valid_path(self.parsed_uri[2]) path = self.parsed_uri[2] # Strip server_base before testing location path = path.replace("%s/" % self.server.server_base, '', 1) if path == '/': self.showMainPage() elif path.startswith('/ping'): self.showPingPage() elif path == '/openidserver': self.serverEndPoint(self.query) elif path == '/login': self.showLoginPage('/%s/' % self.server.server_base, '/%s/' % self.server.server_base) elif path == '/loginsubmit': self.doLogin() elif path.startswith('/id/'): self.showIdPage(path) elif path.startswith('/yadis/'): self.showYadis(path[7:]) elif path == '/serveryadis': self.showServerYadis() else: self.send_response(404) self.end_headers() except (KeyboardInterrupt, SystemExit): raise except: self.send_response(500) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write(cgitb.html(sys.exc_info(), context=10)) print "ERROR: %s" % cgitb.html(sys.exc_info(), context=10)
def get_fs_path(configuration, abs_path, root, chroot_exceptions): """Internal helper to translate path with chroot and invisible files in mind. Also assures general path character restrictions are applied. Automatically expands to abs path to avoid traversal issues with e.g. MYVGRID/../bla that would expand to vgrid_files_home/bla instead of bla in user home if left as is. """ try: valid_path(abs_path) except: raise ValueError("Invalid path characters") if not valid_user_path(configuration, abs_path, root, True, chroot_exceptions): raise ValueError("Illegal path access attempt") return abs_path
def parse_form_upload(user_args, client_id, configuration): """Parse upload file entries from user_args""" files, rejected = [], [] i = 0 client_dir = client_id_dir(client_id) for i in xrange(max_freeze_files): if user_args.has_key('freeze_upload_%d' % i): file_item = user_args['freeze_upload_%d' % i] filename = user_args.get('freeze_upload_%dfilename' % i, '') if not filename.strip(): continue filename = strip_dir(filename) try: valid_path(filename) except Exception, exc: rejected.append('invalid filename: %s (%s)' % (filename, exc)) continue files.append((filename, file_item[0]))
def do_POST(self): """Handle all HTTP POST requests""" try: self.parsed_uri = urlparse(self.path) content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length) self.query = {} for (key, val) in cgi.parse_qsl(post_data): # print "DEBUG: checking post input arg %s: '%s'" % (key, val) validate_helper = self.validators.get(key, invalid_argument) # Let validation errors pass to general exception handler below validate_helper(val) self.query[key] = val self.setUser() # print "DEBUG: checking path '%s'" % self.parsed_uri[2] valid_path(self.parsed_uri[2]) path = self.parsed_uri[2] # Strip server_base before testing location path = path.replace("%s/" % self.server.server_base, '', 1) if path == '/openidserver': self.serverEndPoint(self.query) elif path == '/allow': self.handleAllow(self.query) else: self.send_response(404) self.end_headers() except (KeyboardInterrupt, SystemExit): raise except: self.send_response(500) self.send_header('Content-type', 'text/html') self.end_headers() self.wfile.write(cgitb.html(sys.exc_info(), context=10)) print "ERROR: %s" % cgitb.html(sys.exc_info(), context=10)
def _parse_form_xfer(xfer, user_args, client_id, configuration): """Parse xfer request (i.e. copy, move or upload) file/dir entries from user_args. """ files, rejected = [], [] i = 0 client_dir = client_id_dir(client_id) base_dir = os.path.abspath(os.path.join(configuration.user_home, client_dir)) + os.sep xfer_pattern = 'freeze_%s_%%d' % xfer for i in xrange(max_freeze_files): if user_args.has_key(xfer_pattern % i): source_path = user_args[xfer_pattern % i][-1].strip() configuration.logger.debug('found %s entry: %s' % (xfer, source_path)) if not source_path: continue try: valid_path(source_path) except Exception, exc: rejected.append('invalid path: %s (%s)' % (source_path, exc)) continue source_path = os.path.normpath(source_path).lstrip(os.sep) real_path = os.path.abspath(os.path.join(base_dir, source_path)) if not valid_user_path(real_path, base_dir, True): rejected.append('invalid path: %s (%s)' % \ (source_path, 'illegal path!')) continue # expand any dirs recursively if os.path.isdir(real_path): for (root, dirnames, filenames) in os.walk(real_path): for subname in filenames: real_sub = os.path.join(root, subname) sub_base = root.replace(real_path, source_path) sub_path = os.path.join(sub_base, subname) files.append((real_sub, sub_path)) else: files.append((real_path, source_path))
def parse( localfile_spaces, job_id, client_id, forceddestination, outfile='AUTOMATIC', ): """Parse job description and optionally write results to parsed mRSL file. If outfile is non-empty it is used as destination file, and the keyword AUTOMATIC is replaced by the default mrsl dir destination. """ configuration = get_configuration_object() logger = configuration.logger client_dir = client_id_dir(client_id) # return a tuple (bool status, str msg). This is done because cgi-scripts # are not allowed to print anything before 'the first two special lines' # are printed result = parser.parse(localfile_spaces) external_dict = mrslkeywords.get_keywords_dict(configuration) # The mRSL has the right structure check if the types are correct too # and inline update the default external_dict entries with the ones # from the actual job specification (status, msg) = parser.check_types(result, external_dict, configuration) if not status: return (False, 'Parse failed (typecheck) %s' % msg) logger.debug('check_types updated job dict to: %s' % external_dict) global_dict = {} # Insert the parts from mrslkeywords we need in the rest of the MiG system for (key, value_dict) in external_dict.iteritems(): global_dict[key] = value_dict['Value'] # We do not expand any job variables yet in order to allow any future # resubmits to properly expand job ID. vgrid_list = global_dict['VGRID'] vgrid_access = user_vgrid_access(configuration, client_id) # Replace any_vgrid keyword with all allowed vgrids (on time of submit!) try: any_pos = vgrid_list.index(any_vgrid) vgrid_list[any_pos:any_pos] = vgrid_access # Remove any additional any_vgrid keywords while any_vgrid in vgrid_list: vgrid_list.remove(any_vgrid) except ValueError: # No any_vgrid keywords in list - move along pass # Now validate supplied vgrids for vgrid_name in vgrid_list: if not vgrid_name in vgrid_access: return (False, """Failure: You must be an owner or member of the '%s' vgrid to submit a job to it!""" % vgrid_name) # Fall back to default vgrid if no vgrid was supplied if not vgrid_list: # Please note that vgrid_list is a ref to global_dict list # so we must modify and not replace with a new list! vgrid_list.append(default_vgrid) # convert specified runtime environments to upper-case and verify they # actually exist # do not check runtime envs if the job is for ARC (submission will # fail later) if global_dict.get('JOBTYPE', 'unset') != 'arc' \ and global_dict.has_key('RUNTIMEENVIRONMENT'): re_entries_uppercase = [] for specified_re in global_dict['RUNTIMEENVIRONMENT']: specified_re = specified_re.upper() re_entries_uppercase.append(specified_re) if not is_runtime_environment(specified_re, configuration): return (False, """You have specified a non-nexisting runtime environment '%s', therefore the job can not be run on any resources.""" % \ specified_re) if global_dict.get('MOUNT', []) != []: if configuration.res_default_mount_re.upper()\ not in re_entries_uppercase: re_entries_uppercase.append( configuration.res_default_mount_re.upper()) global_dict['RUNTIMEENVIRONMENT'] = re_entries_uppercase if global_dict.get('JOBTYPE', 'unset').lower() == 'interactive': # if jobtype is interactive append command to create the notification # file .interactivejobfinished that breaks the infinite loop waiting # for the interactive job to finish and send output files to the MiG # server global_dict['EXECUTE'].append('touch .interactivejobfinished') # put job id and name of user in the dictionary global_dict['JOB_ID'] = job_id global_dict['USER_CERT'] = client_id # mark job as received global_dict['RECEIVED_TIMESTAMP'] = time.gmtime() global_dict['STATUS'] = 'PARSE' if forceddestination: global_dict['FORCEDDESTINATION'] = forceddestination if forceddestination.has_key('UNIQUE_RESOURCE_NAME'): global_dict["RESOURCE"] = "%(UNIQUE_RESOURCE_NAME)s_*" % \ forceddestination if forceddestination.has_key('RE_NAME'): re_name = forceddestination['RE_NAME'] # verify the verifyfiles entries are not modified (otherwise RE creator # can specify multiple ::VERIFYFILES:: keywords and give the entries # other names (perhaps overwriting files in the home directories of # resource owners executing the testprocedure) for verifyfile in global_dict['VERIFYFILES']: verifytypes = ['.status', '.stderr', '.stdout'] found = False for verifytype in verifytypes: if verifyfile == 'verify_runtime_env_%s%s' % (re_name, verifytype): found = True if not found: return (False, '''You are not allowed to specify the ::VERIFY:: keyword in a testprocedure, it is done automatically''') # normalize any path fields to be taken relative to home for field in ('INPUTFILES', 'OUTPUTFILES', 'EXECUTABLES', 'VERIFYFILES'): if not global_dict.has_key(field): continue normalized_field = [] for line in global_dict[field]: normalized_parts = [] line_parts = line.split(src_dst_sep) if len(line_parts) < 1 or len(line_parts) > 2: return (False, '%s entries must contain 1 or 2 space-separated items'\ % field) for part in line_parts: # deny leading slashes i.e. force absolute to relative paths part = part.lstrip('/') if part.find('://') != -1: # keep external targets as is - normpath breaks '://' normalized_parts.append(part) check_path = part.split('/')[-1] else: # normalize path to avoid e.g. './' which breaks dir # handling on resource check_path = os.path.normpath(part) normalized_parts.append(check_path) try: valid_path(check_path) except Exception, exc: return (False, 'Invalid %s part in %s: %s' % \ (field, html_escape(part), exc)) normalized_field.append(' '.join(normalized_parts)) global_dict[field] = normalized_field
def parse_form_upload(user_args, user_id, configuration, base_dir, dst_dir, reject_write=False): """Parse upload file and chunk entries from user_args. Chunk limits are extracted from content-range http header in environment. Existing files are automatically taken from upload_tmp_dir and uploads go into dst_dir inside base_dir. The optional reject_write argument is used for delayed refusal if someone tries to upload to a read-only sharelink. """ files, rejected = [], [] logger = configuration.logger rel_dst_dir = dst_dir.replace(base_dir, '') # TODO: we only support single filename and chunk for now; extend? # for name_index in xrange(max_upload_files): # if user_args.has_key(filename_field) and \ # len(user_args[filename_field]) > name_index: for name_index in [0]: if user_args.has_key(filename_field): if isinstance(user_args[filename_field], basestring): filename = user_args[filename_field] else: filename = user_args[filename_field][name_index] logger.info('found name: %s' % filename) else: # No more files break if not filename.strip(): continue if reject_write: rejected.append((filename, 'read-only share: upload refused!')) continue try: filename = strip_dir(filename) valid_path(filename) except Exception, exc: logger.error('invalid filename: %s' % filename) rejected.append( (filename, 'invalid filename: %s (%s)' % (filename, exc))) continue rel_path = os.path.join(rel_dst_dir, filename) # IMPORTANT: path must be expanded to abs for proper chrooting abs_path = os.path.abspath(os.path.join(base_dir, rel_path)) if not valid_user_path(configuration, abs_path, dst_dir, True): logger.error('%s tried to access restricted path %s ! (%s)' % (user_id, abs_path, dst_dir)) rejected.append("Invalid path (%s expands to an illegal path)" % filename) continue # for chunk_index in xrange(max_upload_chunks): # if user_args.has_key(files_field) and \ # len(user_args[files_field]) > chunk_index: for chunk_index in [0]: if user_args.has_key(files_field): chunk = user_args[files_field][chunk_index] else: break configuration.logger.debug('find chunk range: %s' % filename) (chunk_first, chunk_last) = extract_chunk_region(configuration) if len(chunk) > upload_block_size: configuration.logger.error('skip bigger than allowed chunk') continue elif chunk_last < 0: chunk_last = len(chunk) - 1 files.append((rel_path, (chunk, chunk_first, chunk_last)))
def parse( localfile_spaces, job_id, client_id, forceddestination, outfile='AUTOMATIC', ): """Parse job description and optionally write results to parsed mRSL file. If outfile is non-empty it is used as destination file, and the keyword AUTOMATIC is replaced by the default mrsl dir destination. """ configuration = get_configuration_object() logger = configuration.logger client_dir = client_id_dir(client_id) # return a tuple (bool status, str msg). This is done because cgi-scripts # are not allowed to print anything before 'the first two special lines' # are printed result = parser.parse(localfile_spaces) external_dict = mrslkeywords.get_keywords_dict(configuration) # The mRSL has the right structure check if the types are correct too # and inline update the default external_dict entries with the ones # from the actual job specification (status, msg) = parser.check_types(result, external_dict, configuration) if not status: return (False, 'Parse failed (typecheck) %s' % msg) logger.debug('check_types updated job dict to: %s' % external_dict) global_dict = {} # Insert the parts from mrslkeywords we need in the rest of the MiG system for (key, value_dict) in external_dict.iteritems(): global_dict[key] = value_dict['Value'] # We do not expand any job variables yet in order to allow any future # resubmits to properly expand job ID. vgrid_list = global_dict['VGRID'] allowed_vgrids = user_allowed_vgrids(configuration, client_id) # Replace any_vgrid keyword with all allowed vgrids (on time of submit!) try: any_pos = vgrid_list.index(any_vgrid) vgrid_list[any_pos:any_pos] = allowed_vgrids # Remove any additional any_vgrid keywords while any_vgrid in vgrid_list: vgrid_list.remove(any_vgrid) except ValueError: # No any_vgrid keywords in list - move along pass # Now validate supplied vgrids for vgrid_name in vgrid_list: if not vgrid_name in allowed_vgrids: return (False, """Failure: You must be an owner or member of the '%s' vgrid to submit a job to it!""" % vgrid_name) # Fall back to default vgrid if no vgrid was supplied if not vgrid_list: # Please note that vgrid_list is a ref to global_dict list # so we must modify and not replace with a new list! vgrid_list.append(default_vgrid) # convert specified runtime environments to upper-case and verify they # actually exist # do not check runtime envs if the job is for ARC (submission will # fail later) if global_dict.get('JOBTYPE', 'unset') != 'arc' \ and global_dict.has_key('RUNTIMEENVIRONMENT'): re_entries_uppercase = [] for specified_re in global_dict['RUNTIMEENVIRONMENT']: specified_re = specified_re.upper() re_entries_uppercase.append(specified_re) if not is_runtime_environment(specified_re, configuration): return (False, """You have specified a non-nexisting runtime environment '%s', therefore the job can not be run on any resources.""" % \ specified_re) if global_dict.get('MOUNT', []) != []: re_entries_uppercase.append(configuration.res_default_mount_re.upper()) global_dict['RUNTIMEENVIRONMENT'] = re_entries_uppercase if global_dict.get('JOBTYPE', 'unset').lower() == 'interactive': # if jobtype is interactive append command to create the notification # file .interactivejobfinished that breaks the infinite loop waiting # for the interactive job to finish and send output files to the MiG # server global_dict['EXECUTE'].append('touch .interactivejobfinished') # put job id and name of user in the dictionary global_dict['JOB_ID'] = job_id global_dict['USER_CERT'] = client_id # mark job as received global_dict['RECEIVED_TIMESTAMP'] = time.gmtime() global_dict['STATUS'] = 'PARSE' if forceddestination: global_dict['FORCEDDESTINATION'] = forceddestination if forceddestination.has_key('UNIQUE_RESOURCE_NAME'): global_dict["RESOURCE"] = "%(UNIQUE_RESOURCE_NAME)s_*" % \ forceddestination if forceddestination.has_key('RE_NAME'): re_name = forceddestination['RE_NAME'] # verify the verifyfiles entries are not modified (otherwise RE creator # can specify multiple ::VERIFYFILES:: keywords and give the entries # other names (perhaps overwriting files in the home directories of # resource owners executing the testprocedure) for verifyfile in global_dict['VERIFYFILES']: verifytypes = ['.status', '.stderr', '.stdout'] found = False for verifytype in verifytypes: if verifyfile == 'verify_runtime_env_%s%s' % (re_name, verifytype): found = True if not found: return (False, '''You are not allowed to specify the ::VERIFY:: keyword in a testprocedure, it is done automatically''') # normalize any path fields to be taken relative to home for field in ('INPUTFILES', 'OUTPUTFILES', 'EXECUTABLES', 'VERIFYFILES'): if not global_dict.has_key(field): continue normalized_field = [] for line in global_dict[field]: normalized_parts = [] line_parts = line.split() if len(line_parts) < 1 or len(line_parts) > 2: return (False, '%s entries must contain 1 or 2 space-separated items'\ % field) for part in line_parts: # deny leading slashes i.e. force absolute to relative paths part = part.lstrip('/') if part.find('://') != -1: # keep external targets as is - normpath breaks '://' normalized_parts.append(part) check_path = part.split('/')[-1] else: # normalize path to avoid e.g. './' which breaks dir # handling on resource check_path = os.path.normpath(part) normalized_parts.append(check_path) try: valid_path(check_path) except Exception, exc: return (False, 'Invalid %s part in %s: %s' % \ (field, html_escape(part), exc)) normalized_field.append(' '.join(normalized_parts)) global_dict[field] = normalized_field