def test_urlsplit(self): port = self.disco_settings['DISCO_PORT'] self.assertEquals(urlsplit('http://host/path'), ('http', ('host', ''), 'path')) self.assertEquals(urlsplit('http://host:port/path'), ('http', ('host', 'port'), 'path')) self.assertEquals(urlsplit('disco://master/long/path'), ('http', ('master', '%s' % port), 'long/path'))
def open(url, task=None): if task: scheme, netloc, path = util.urlsplit(url, localhost=task.host, disco_port=task.disco_port, disco_data=task.disco_data, ddfs_data=task.ddfs_data) else: scheme, netloc, path = util.urlsplit(url, localhost=None) return comm.open_url(util.urljoin((scheme, netloc, path)))
def process_restrict(interface, state, label, inp, task, label_fn, ffuncs, ghfuncs, deffuncs, agg_fn, wide=False, need_agg=False): from disco import util empty = () # inp contains a set of replicas, let's force local #HACK input_processed = False for i, inp_url in inp.input.replicas: scheme, (netloc, port), rest = util.urlsplit(inp_url) if netloc == task.host: input_processed = True inp.input = inp_url break if not input_processed: raise Exception("Input %s not processed, no LOCAL resource found." % str(inp.input)) # opportunistically aggregate in this stage if need_agg and not wide: for out_label, key in agg_fn(inp, label_fn, ffuncs, ghfuncs, deffuncs): interface.output(out_label).add(key, empty) else: for key, value in inp: out_label = label_fn(key) # print "RESTRICT: %s %s" % (key, value) interface.output(out_label).add(key, value)
def map_input_stream(stream, size, url, params): from disco.func import string_input_stream from disco.util import urlsplit scheme, netloc, path = urlsplit(url) assert netloc.host != Task.host return string_input_stream(str(netloc), size, url, params)
def request(method, url, data=None, headers={}, sleep=0): scheme, netloc, path = urlsplit(urlresolve(url)) try: conn = HTTPConnection(str(netloc)) conn.request(method, '/{0}'.format(path), body=data, headers=headers) response = conn.getresponse() status = response.status errmsg = response.reason except httplib.HTTPException as e: status = None errmsg = str(e) or repr(e) except (httplib.socket.error, socket.error) as e: status = None errmsg = e if isinstance(e, basestring) else str(e) or repr(e) if not status or isunavailable(status): if sleep == 9: raise CommError(errmsg, url, status) time.sleep(random.randint(1, 2**sleep)) return request(method, url, data=data, headers=headers, sleep=sleep + 1) elif isredirection(status): loc = response.getheader('location') return request(method, loc if loc.startswith('http:') else resolveuri(url, loc), data=data, headers=headers, sleep=sleep) elif not issuccessful(status): raise CommError(response.read(), url, status) return response
def process_restrict(interface, state, label, inp, task, label_fn, ffuncs, ghfuncs, deffuncs, agg_fn, wide=False, need_agg=False, distinct=False, limit=sys.maxint): from disco import util from itertools import groupby, islice empty = () # inp contains a set of replicas, let's force local #HACK input_processed = False for i, inp_url in inp.input.replicas: scheme, (netloc, port), rest = util.urlsplit(inp_url) if netloc == task.host: input_processed = True inp.input = inp_url break if not input_processed: raise util.DataError("Input %s not processed, no LOCAL resource found." % str(inp.input), '') # opportunistically aggregate, distinct and limit in this stage if need_agg and not wide: for out_label, key in agg_fn(inp, label_fn, ffuncs, ghfuncs, deffuncs): interface.output(out_label).add(key, empty) else: if distinct: for uniqkey, _ in islice(groupby(inp, lambda (k, v): tuple(k)), 0, limit): label = label_fn(uniqkey) interface.output(label).add(uniqkey, empty) else:
def imp_process(data): from disco.util import urlsplit _, (host, _), _ = urlsplit(data["url"]) if host.startswith("www."): host = host[4:] data["site_id"] = host
def imp_process(data): from disco.util import urlsplit _, (host, _), _ = urlsplit(data['url']) if host.startswith('www.'): host = host[4:] data['site_id'] = host
def stat_input_stream(fd, size, url, params): from disco import util from hustle.core.marble import MarbleStream try: scheme, netloc, rest = util.urlsplit(url) except Exception as e: print "Error handling hustle_input_stream for %s. %s" % (url, e) raise e otab = None try: # print "FLurlG: %s" % url fle = util.localize(rest, disco_data=params._task.disco_data, ddfs_data=params._task.ddfs_data) # print "FLOGLE: %s" % fle otab = MarbleStream(fle) rows = otab.number_rows frows = float(rows) rval = {'_': rows, } for field, (subdb, subindexdb, _, column, _) in otab.dbs.iteritems(): if subindexdb: rval[field] = subindexdb.stat(otab.txn)['ms_entries'] / frows yield '', rval except Exception as e: print "Gibbers: %s" % e raise e finally: if otab: otab.close()
def map_input_stream(stream, size, url, params): from disco.util import urlsplit from disco import comm scheme, netloc, path = urlsplit(url) # test that scheduler preserved data locality msg("NODE %s GOT URL %s" % (Task.netloc, url)) assert netloc == Task.netloc return comm.open_remote("http://%s/%s" % (path, netloc))
def request(method, url, data=None, headers={}, sleep=0): scheme, netloc, path = urlsplit(urlresolve(url)) try: conn = HTTPConnection(str(netloc)) conn.request(method, '/%s' % path, body=data, headers=headers) response = conn.getresponse() except (httplib.HTTPException, httplib.socket.error), e: raise CommError("Request failed: %s" % e, url)
def request(method, url, data=None, headers={}, sleep=0): scheme, netloc, path = urlsplit(urlresolve(url)) try: conn = HTTPConnection(str(netloc)) conn.request(method, '/%s' % path, body=data, headers=headers) response = conn.getresponse() status = response.status errmsg = response.reason except httplib.HTTPException, e: status = None errmsg = str(e) or repr(e)
def input_stream(fd, size, url, params): """ Opens the path on host using an http client and the setting `DISCO_PORT`. """ scheme, netloc, rest = urlsplit(url) prefix, fname = rest.split('/', 1) if netloc[0] == Task.netloc[0]: if prefix == 'ddfs': root = Task.settings['DDFS_ROOT'] else: root = Task.settings['DISCO_DATA'] path = os.path.join(root, fname) return comm.open_local(path) return comm.open_remote('http://%s/%s/%s' % (netloc, prefix, fname))
def hustle_input_stream(fd, size, url, params, wheres, gen_where_index, key_names): from disco import util from hustle.core.marble import Expr, MarbleStream from itertools import izip, repeat empty = () try: scheme, netloc, rest = util.urlsplit(url) except Exception as e: print "Error handling hustle_input_stream for %s. %s" % (url, e) raise e fle = util.localize(rest, disco_data=params._task.disco_data, ddfs_data=params._task.ddfs_data) # print "FLOGLE: %s %s" % (url, fle) otab = None try: # import sys # sys.path.append('/Library/Python/2.7/site-packages/pycharm-debug.egg') # import pydevd # pydevd.settrace('localhost', port=12999, stdoutToServer=True, stderrToServer=True) otab = MarbleStream(fle) bitmaps = {} for index, where in enumerate(wheres): # do not process where clauses that have nothing to do with this marble if where._name == otab.marble._name: if type(where) is Expr and not where.is_partition: bm = where(otab) bitmaps[index] = (bm, len(bm)) else: # it is either the table itself, or a partition expression. # Either way, returns the entire table bitmaps[index] = (otab.iter_all(), otab.number_rows) for index, (bitmap, blen) in bitmaps.iteritems(): prefix_gen = [repeat(index, blen)] if gen_where_index else [] row_iter = prefix_gen + \ [otab.mget(col, bitmap) if col is not None else repeat(None, blen) for col in key_names[index]] for row in izip(*row_iter): yield row, empty finally: if otab: otab.close()
def _push(self, source_target, replicas=None, exclude=[], **kwargs): source, target = source_target qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('replicas', replicas)) if v]) urls = self._download('{0}/ddfs/new_blob/{1}?{2}' .format(self.master, target, qs)) try: return [json.loads(bytes_to_str(url)) for url in self._upload(urls, source, to_master=False, **kwargs)] except CommError as e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, exclude=exclude + [host], **kwargs)
def input_stream(fd, size, url, params): scheme, netloc, rest = util.urlsplit(url) if netloc[0] == Task.netloc[0]: path, rest = rest.split('!', 1) if '!' in rest else (rest, '') Task.discodb = DiscoDB.load(open(os.path.join(Task.root, path))) if rest: method, arg = rest.split('/', 1) if method == 'query': if hasattr(params, 'discodb_query'): return Task.discodb.query(params.discodb_query), size, url return Task.discodb.query(Q.urlscan(arg)), size, url return getattr(Task.discodb, method)(), size, url return Task.discodb, size, url raise core.DiscoError("Scheme 'discodb' can only be used with force_local=True")
def process_stat(interface, state, label, inp, task): from disco import util # inp contains a set of replicas, let's force local #HACK input_processed = False for i, inp_url in inp.input.replicas: scheme, (netloc, port), rest = util.urlsplit(inp_url) if netloc == task.host: input_processed = True inp.input = inp_url break if not input_processed: raise Exception("Input %s not processed, no LOCAL resource found." % str(inp.input)) for key, value in inp: interface.output(0).add(key, value)
def _push(self, source_target, replicas=None, forceon=[], exclude=[], **kwargs): source, target = source_target qs = urlencode( [ (k, v) for k, v in (("exclude", ",".join(exclude)), ("include", ",".join(forceon)), ("replicas", replicas)) if v ] ) urls = self._download("{0}/ddfs/new_blob/{1}?{2}".format(self.master, target, qs)) try: return [json.loads(bytes_to_str(url)) for url in self._upload(urls, source, to_master=False, **kwargs)] except CommError as e: scheme, (host, port), path = urlsplit(e.url) if hasattr(source, "seek"): source.seek(0) # source will be read again; seek to the beginning else: print("{0} is not seekable, retrying".format(source)) return self._push((source, target), replicas=replicas, forceon=forceon, exclude=exclude + [host], **kwargs)
def read(interface, state, label, inp): from disco import util for e in inp: scheme, netloc, _ = util.urlsplit(e) fileName, joinColumn = str(netloc).split("?") File = open(PREFIX + fileName, "r") col = int(joinColumn) reader = csv.reader(File) firstRow = True for row in reader: if firstRow: tableName = row[0] firstRow = False else: fullName = tableName + "?" + str(col) Hash = int(hashlib.md5(str_to_bytes(row[col])).hexdigest(), 16) % 160 interface.output(Hash).add(fullName, row)
def hustle_input_stream(fd, size, url, params, wheres, gen_where_index, key_names): from disco import util from hustle.core.marble import Expr, MarbleStream empty = () try: scheme, netloc, rest = util.urlsplit(url) except Exception as e: print "Error handling hustle_input_stream for %s. %s" % (url, e) raise e fle = util.localize(rest, disco_data=params._task.disco_data, ddfs_data=params._task.ddfs_data) # print "FLOGLE: %s %s" % (url, fle) otab = None try: # import sys # sys.path.append('/Library/Python/2.7/site-packages/pycharm-debug.egg') # import pydevd # pydevd.settrace('localhost', port=12999, stdoutToServer=True, stderrToServer=True) otab = MarbleStream(fle) bitmaps = {} for index, where in enumerate(wheres): # do not process where clauses that have nothing to do with this marble if where._name == otab.marble._name: if type(where) is Expr and not where.is_partition: bitmaps[index] = where(otab) else: # it is either the table itself, or a partition expression. either way, # return the entire table bitmaps[index] = otab.iter_all() for index, bitmap in bitmaps.iteritems(): prefix = [index] if gen_where_index else [] for row_id in bitmap: record = [otab.get(col, row_id) if col else None for col in key_names[index]] # print "Gibbled: %s" % repr(record) record[0:0] = prefix # this looks odd, but is faster than 'prefix + record' yield tuple(record), empty finally: if otab: otab.close()
def process_restrict(interface, state, label, inp, task, label_fn): from disco import util # inp contains a set of replicas, let's force local #HACK input_processed = False for i, inp_url in inp.input.replicas: scheme, (netloc, port), rest = util.urlsplit(inp_url) if netloc == task.host: input_processed = True inp.input = inp_url break if not input_processed: raise Exception("Input %s not processed, no LOCAL resource found." % str(inp.input)) for key, value in inp: out_label = label_fn(key) # print "RESTRICT: %s %s" % (key, value) interface.output(out_label).add(key, value)
def test_urlsplit(self): port = self.disco_settings['DISCO_PORT'] ddfs = self.disco_settings['DDFS_ROOT'] data = self.disco_settings['DISCO_DATA'] self.assertEquals(urlsplit('http://host/path'), ('http', ('host', ''), 'path')) self.assertEquals(urlsplit('http://host:port/path'), ('http', ('host', 'port'), 'path')) self.assertEquals(urlsplit('disco://master/long/path'), ('http', ('master', '%s' % port), 'long/path')) self.assertEquals(urlsplit('disco://localhost/ddfs/path', localhost='localhost'), ('file', ('localhost', ''), os.path.join(ddfs, 'path'))) self.assertEquals(urlsplit('disco://localhost/data/path', localhost='localhost'), ('file', ('localhost', ''), os.path.join(data, 'path'))) self.assertEquals(urlsplit('tag://tag', ''), ('tag', ('', ''), 'tag')) self.assertEquals(urlsplit('tag://host/tag', ''), ('tag', ('host', ''), 'tag')) self.assertEquals(urlsplit('tag://host:port/tag', ''), ('tag', ('host', 'port'), 'tag'))
def input_stream(fd, size, url, params): import os from disco import util from disco.comm import download from discodb import DiscoDB, Q scheme, netloc, rest = util.urlsplit(url) path, rest = rest.split('!', 1) if '!' in rest else (rest, '') if netloc[0] == Task.netloc[0]: discodb = DiscoDB.load(open(os.path.join(Task.root, path))) else: discodb = DiscoDB.loads(download('disco://%s/%s' % (netloc, path))) if rest: method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None) method = getattr(discodb, method_name) if method_name in ('metaquery', 'query'): return method(Q.urlscan(arg)), size, url return method(*filter(None, arg)), size, url return discodb, size, url
def Open(url, task=None): if task: disco_data = task.disco_data ddfs_data = task.ddfs_data else: from disco.settings import DiscoSettings settings = DiscoSettings() disco_data = settings['DISCO_DATA'] ddfs_data = settings['DDFS_DATA'] scheme, netloc, rest = util.urlsplit(url) path, rest = rest.split('!', 1) if '!' in rest else (rest, '') discodb = DiscoDB.load( open(util.localize(path, disco_data=disco_data, ddfs_data=ddfs_data))) if rest: method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None) method = getattr(discodb, method_name) if method_name in ('metaquery', 'query'): return method(Q.urlscan(arg)) return method(*filter(None, arg)) return discodb
def filename_input_stream(fd, size, url, params): """This input_stream simply returns the path of the local disk file for this map job""" from disco import util from disco.worker.classic import worker try: scheme, netloc, rest = util.urlsplit(url) netloc = "%s:%s" % netloc if netloc[1] else netloc[0] except Exception as e: msg = "Error handling hustle_input_stream for %s. %s" % (url, e) raise util.DataError(msg, url) if scheme == 'file': yield url, "/%s" % rest else: # print url, rest fle = util.localize(rest, disco_data=worker.Task.disco_data, ddfs_data=worker.Task.ddfs_data) yield url, fle
def request(method, url, data=None, headers={}, sleep=0): scheme, netloc, path = urlsplit(urlresolve(url)) # This fixes a problem with Unicode errors in Python 2.7 # works in Python 2.6 as well, but not earlier versions try: if data is not None: data = bytearray(data) except NameError: # In Python < 2.6, bytearray doesn't exist pass try: conn = HTTPConnection(str(netloc)) conn.request(method, '/%s' % path, body=data, headers=headers) response = conn.getresponse() status = response.status errmsg = response.reason except httplib.HTTPException, e: status = None errmsg = str(e) or repr(e)
def Open(url, task=None): if task: disco_data = task.disco_data ddfs_data = task.ddfs_data else: from disco.settings import DiscoSettings settings = DiscoSettings() disco_data = settings['DISCO_DATA'] ddfs_data = settings['DDFS_DATA'] scheme, netloc, rest = util.urlsplit(url) path, rest = rest.split('!', 1) if '!' in rest else (rest, '') discodb = DiscoDB.load(open(util.localize(path, disco_data=disco_data, ddfs_data=ddfs_data))) if rest: method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None) method = getattr(discodb, method_name) if method_name in ('metaquery', 'query'): return method(Q.urlscan(arg)) return method(*filter(None, arg)) return discodb
def test_urlsplit(self): port = self.settings['DISCO_PORT'] ddfs = self.settings['DDFS_DATA'] data = self.settings['DISCO_DATA'] self.assertEquals(urlsplit('http://host/path'), ('http', ('host', ''), 'path')) self.assertEquals(urlsplit('http://host:port/path'), ('http', ('host', 'port'), 'path')) self.assertEquals(urlsplit('disco://master/long/path'), ('http', ('master', '{0}'.format(port)), 'long/path')) self.assertEquals(urlsplit('disco://localhost/ddfs/path', localhost='localhost', ddfs_data=ddfs), ('file', ('', ''), os.path.join(ddfs, 'path'))) self.assertEquals(urlsplit('disco://localhost/data/path', localhost='localhost', disco_data=data), ('file', ('', ''), os.path.join(data, 'path'))) self.assertEquals(urlsplit('tag://tag', ''), ('tag', ('', ''), 'tag')) self.assertEquals(urlsplit('tag://host/tag', ''), ('tag', ('host', ''), 'tag')) self.assertEquals(urlsplit('tag://host:port/tag', ''), ('tag', ('host', 'port'), 'tag'))
def process_restrict(interface, state, label, inp, task, label_fn, ffuncs, ghfuncs, deffuncs, agg_fn, wide=False, need_agg=False): from disco import util empty = () # inp contains a set of replicas, let's force local #HACK input_processed = False for i, inp_url in inp.input.replicas: scheme, (netloc, port), rest = util.urlsplit(inp_url) if netloc == task.host: input_processed = True inp.input = inp_url break if not input_processed: raise util.DataError( "Input %s not processed, no LOCAL resource found." % str(inp.input), '') # opportunistically aggregate in this stage if need_agg and not wide: for out_label, key in agg_fn(inp, label_fn, ffuncs, ghfuncs, deffuncs): interface.output(out_label).add(key, empty) else: for key, value in inp: out_label = label_fn(key) # print "RESTRICT: %s %s" % (key, value) interface.output(out_label).add(key, value)
def download(url, data=None, redir=False, offset=0, method=None, sleep=0, header=None): header = header if header != None else {} from disco.util import urlsplit try: scheme, netloc, path = urlsplit(url) http = httplib.HTTPConnection(str(netloc)) h = {} if offset: if type(offset) == tuple: offs = 'bytes=%d-%d' % offset else: offs = 'bytes=%d-' % offset h = {'Range': offs} if not method: method = 'POST' if data != None else 'GET' http.request(method, '/%s' % path, data, headers = h) fd = http.getresponse() if fd.status == 302: loc = fd.getheader('location') if loc.startswith('http://'): url = loc elif loc.startswith('/'): url = 'http://%s%s' % (netloc, loc) else: url = '%s/%s' % (url, loc) return download(url, data, redir, offset, method, sleep, header) header.update(fd.getheaders()) return fd.status, fd.read() except (httplib.HTTPException, httplib.socket.error), e: raise CommError("Transfer %s failed: %s" % (url, e), url)
def _push(self, source_target, replicas=None, forceon=[], exclude=[], **kwargs): source, target = source_target qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('include', ','.join(forceon)), ('replicas', replicas)) if v]) urls = self._download('{0}/ddfs/new_blob/{1}?{2}'.format( self.master, target, qs)) try: return [ json.loads(bytes_to_str(url)) for url in self._upload( urls, source, to_master=False, **kwargs) ] except CommError as e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, forceon=forceon, exclude=exclude + [host], **kwargs)
def request(method, url, data=None, headers={}, sleep=0): scheme, netloc, path = urlsplit(urlresolve(url)) try: conn = HTTPConnection(str(netloc)) conn.request(method, '/{0}'.format(path), body=data, headers=headers) response = conn.getresponse() status = response.status errmsg = response.reason except httplib.HTTPException as e: status = None errmsg = str(e) or repr(e) except (httplib.socket.error, socket.error) as e: status = None errmsg = e if isinstance(e, basestring) else str(e) or repr(e) if not status or isunavailable(status): if sleep == 9: raise CommError(errmsg, url, status) time.sleep(random.randint(1, 2**sleep)) return request(method, url, data=data, headers=headers, sleep=sleep + 1) elif isredirection(status): loc = response.getheader('location') return request( method, loc if loc.startswith('http:') else resolveuri(url, loc), data=data, headers=headers, sleep=sleep) elif not issuccessful(status): raise CommError(response.read(), url, status) return response
def relativizetag(tag, parent): _scheme, netloc, name = urlsplit(canonizetag(tag)) _scheme, parentloc, _ = urlsplit(canonizetag(parent)) return urljoin(('tag', netloc or parentloc, name))
def tagname(tag): scheme, netloc, name = urlsplit(canonizetag(tag)) return name
def answers(self): for input in self.input: scheme, netloc, path = urlsplit(input) yield str(netloc), ''
def resolveuri(baseuri, uri): if uri.startswith('/'): scheme, netloc, _path = urlsplit(baseuri) return '%s://%s%s' % (scheme, netloc, uri) return '%s/%s' % (baseuri, uri)
def assertResults(self, job, input): self.assertAllEqual(sorted(self.results(job)), sorted((str(urlsplit(i)[1]), '') for i in input))
def hustle_input_stream(fd, size, url, params, wheres, gen_where_index, key_names, limit): from disco import util from hustle.core.marble import Expr, MarbleStream from itertools import izip, repeat, islice, imap from sys import maxint from pyebset import BitSet empty = () try: scheme, netloc, rest = util.urlsplit(url) except Exception as e: msg = "Error handling hustle_input_stream for %s. %s" % (url, e) raise util.DataError(msg, url) fle = util.localize(rest, disco_data=params._task.disco_data, ddfs_data=params._task.ddfs_data) otab = None try: otab = MarbleStream(fle) bitmaps = {} for index, where in enumerate(wheres): # do not process where clauses that have nothing to do with this marble if where._name == otab.marble._name: if type(where) is Expr and not where.is_partition: bm = where(otab) if limit != maxint: bs = BitSet() for i in islice(bm, 0, limit): bs.set(i) bitmaps[index] = (bs, len(bs)) else: bitmaps[index] = (bm, len(bm)) else: # it is either the table itself, or a partition expression. # Either way, returns the entire table if limit != maxint: bs = BitSet() for i in islice(otab.iter_all(), 0, limit): bs.set(i) bitmaps[index] = (bs, len(bs)) else: bitmaps[index] = (otab.iter_all(), otab.number_rows) for index, (bitmap, blen) in bitmaps.iteritems(): prefix_gen = [repeat(index, blen)] if gen_where_index else [] # row_iter = prefix_gen + \ # [otab.mget(col, bitmap) if col is not None else repeat(None, blen) # for col in key_names[index]] row_creators = [] for col, column_fn in key_names[index]: if col is not None: if column_fn is None: row_creators.append(otab.mget(col, bitmap)) else: row_creators.append( imap(column_fn, otab.mget(col, bitmap))) else: row_creators.append(repeat(None, blen)) row_iter = prefix_gen + row_creators for row in izip(*row_iter): yield row, empty finally: if otab: otab.close()
return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path) return url def _push(self, (source, target), replicas=None, exclude=[], **kwargs): qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('replicas', replicas)) if v]) urls = self._download('%s/ddfs/new_blob/%s?%s' % (self.master, target, qs)) try: return [ json.loads(url) for url in self._upload(urls, source, **kwargs) ] except CommError, e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, exclude=exclude + [host], **kwargs) def _tagattr(self, tag, attr): return '%s/%s' % (self._resolve(canonizetag(tag)), attr) def _token(self, token, method): if token is None: if method == 'GET': return self.settings['DDFS_READ_TOKEN'] return self.settings['DDFS_WRITE_TOKEN'] return token
def map_input_stream(stream, size, url, params): scheme, (host, port), test_server = urlsplit(url) # test that scheduler observed the blacklist print("NODE {0} GOT URL {1}".format(Task.host, url)) assert Task.host <= host return open_remote("http://{0}/{1}".format(test_server, host))
def _maybe_proxy(self, url, method='GET'): if self.proxy: scheme, (host, port), path = urlsplit(url) return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path) return url
def map_input_stream(stream, size, url, params): scheme, netloc, path = urlsplit(url) assert netloc.host == Task.host return string_input_stream(str(netloc), size, url, params)
def map_input_stream(stream, size, url, params): scheme, (host, port), test_server = urlsplit(url) # test that scheduler preserved data locality msg("NODE {0} GOT URL {1}".format(Task.host, url)) assert Task.host == host return open_remote("http://{0}/{1}".format(test_server, host))
def map_input_stream(stream, size, url, params): scheme, (host, port), test_server = urlsplit(url) # test that scheduler observed the blacklist msg("NODE {0} GOT URL {1}".format(Task.host, url)) assert Task.host <= host return open_remote("http://{0}/{1}".format(test_server, host))
def open_url(self, url): scheme, netloc, rest = util.urlsplit(url, localhost=self.host) if not scheme or scheme == 'file': return comm.open_local(rest) return comm.open_remote('%s://%s/%s' % (scheme, netloc, rest))
def resolveuri(baseuri, uri): if uri.startswith('/'): scheme, netloc, _path = urlsplit(baseuri) return '{0}://{1}{2}'.format(scheme, netloc, uri) return '{0}/{1}'.format(baseuri, uri)
def relativizetag(tag, parent): _scheme, netloc, name = urlsplit(canonizetag(tag)) _scheme, parentloc, _ = urlsplit(canonizetag(parent)) return urljoin(("tag", netloc or parentloc, name))