def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, b'', b''
     jobdict = {}
     jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
Пример #2
0
 def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, b'', b''
     jobdict = {}
     jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
Пример #3
0
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack

    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip

    def jobdata(data):
        if data.startswith('@'):
            return open(data[1:]).read()
        return data

    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]

    jobdict = {
        'input': program.input(*inputs),
        'worker': worker,
        'map?': program.options.has_map,
        'reduce?': program.options.has_reduce,
        'nr_reduces': program.options.nr_reduces,
        'prefix': prefix(program.options.prefix),
        'scheduler': program.scheduler,
        'owner': program.options.owner or program.settings['DISCO_JOB_OWNER']
    }
    jobenvs = dict(program.options.env)
    jobzip = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print("jobdict:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item)
                        for item in jobdict.items()))
        print("jobenvs:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item)
                        for item in jobenvs.items()))
        print("jobzip:")
        print("\n".join("\t{0}".format(name) for name in jobzip.namelist()))
        print("jobdata:")
        print("\n".join("\t{0}".format(line) for line in jobdata.splitlines()))
    if program.options.dump_jobpack:
        print(jobpack.dumps())
    else:
        print(program.disco.submit(jobpack.dumps()))
Пример #4
0
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack

    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip

    def jobdata(data):
        if data.startswith("@"):
            return open(data[1:]).read()
        return data

    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]

    jobdict = {
        "input": program.input(*inputs),
        "worker": worker,
        "map?": program.options.has_map,
        "reduce?": program.options.has_reduce,
        "nr_reduces": program.options.nr_reduces,
        "prefix": prefix(program.options.prefix),
        "scheduler": program.scheduler,
        "owner": program.options.owner or program.settings["DISCO_JOB_OWNER"],
    }
    jobenvs = dict(program.options.env)
    jobzip = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print("jobdict:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobdict.items()))
        print("jobenvs:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobenvs.items()))
        print("jobzip:")
        print("\n".join("\t{0}".format(name) for name in jobzip.namelist()))
        print("jobdata:")
        print("\n".join("\t{0}".format(line) for line in jobdata.splitlines()))
    if program.options.dump_jobpack:
        print(jobpack.dumps())
    else:
        print(program.disco.submit(jobpack.dumps()))
Пример #5
0
    def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, b'', b''
        jobdict = {'prefix':'a/b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {'prefix':'a.b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
Пример #6
0
Файл: task.py Проект: caox/disco
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              mode=None,
              taskid=-1):
     from disco.job import JobPack
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.mode = mode
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}'.format(mode,
                                         taskid,
                                         hexhash(str((time.time())).encode()),
                                         os.getpid())
Пример #7
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              stage=None,
              group=None,
              grouping=None,
              taskid=-1):
     from disco.job import JobPack
     from disco.ddfs import DDFS
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.stage = stage
     self.group = '{0[0]}-{0[1]}'.format(group)
     self.group_label, self.group_host = group
     self.grouping = grouping
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}-{4}'.format(
         self.stage, DDFS.safe_name(self.group), self.taskid,
         hexhash(str((time.time())).encode()), os.getpid())
Пример #8
0
def pack(jobname, jobdict, worker, workdir):
    import cStringIO
    zmem = cStringIO.StringIO()
    z = zipfile.ZipFile(zmem, "w")
    # FIXME: Strip off leading pathname elements from zipped paths
    if not workdir:
        z.write(worker)
    else:
        def walker(arg, dirname, names):
            z.write(dirname)
            for n in names:
                z.write(os.path.join(dirname, n))
        os.path.walk(workdir, walker, None)
    z.close()

    def contents(*t):
        offset = JobPack.HEADER_SIZE
        for segment in t:
            yield offset, segment
            offset += len(segment)

    offsets, fields = zip(*contents(json.dumps(jobdict),
                                    json.dumps({}),
                                    zmem.getvalue(),
                                    ''))
    hdr = JobPack.header(offsets)
    return hdr + ''.join(fields)
Пример #9
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              mode=None,
              taskid=-1):
     from disco.job import JobPack
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.mode = mode
     self.taskid = taskid
     self.outputs = {}
     self.uid = '%s:%s-%s-%x' % (mode,
                                 taskid,
                                 hexhash(str((time.time()))),
                                 os.getpid())
Пример #10
0
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack
    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip
    def jobdata(data):
        if data.startswith('@'):
            return open(data[1:]).read()
        return data
    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]
    jobdict = {'input': program.input(*inputs),
               'worker': worker,
               'map?': program.options.has_map,
               'reduce?': program.options.has_reduce,
               'nr_reduces': program.options.nr_reduces,
               'prefix': prefix(program.options.prefix),
               'scheduler': program.scheduler,
               'owner': program.options.owner or program.settings['DISCO_JOB_OWNER']}
    jobenvs = dict(program.options.env)
    jobzip  = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print "jobdict:"
        print "\n".join("\t%s\t%s" % item for item in jobdict.items())
        print "jobenvs:"
        print "\n".join("\t%s\t%s" % item for item in jobenvs.items())
        print "jobzip:"
        print "\n".join("\t%s" % name for name in jobzip.namelist())
        print "jobdata:"
        print "\n".join("\t%s" % line for line in jobdata.splitlines())
    if program.options.dump_jobpack:
        print jobpack.dumps()
    else:
        print program.disco.submit(jobpack.dumps())
Пример #11
0
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from disco.compat import BytesIO
     from disco.job import JobPack
     return JobPack.load(
         BytesIO(
             self.request('/disco/ctrl/parameters?name={0}'.format(jobname),
                          as_bytes=True)))
 def test_badlength(self):
     jobenvs, jobzip, jobdata = {}, b'0'*64, b'0'*64
     jobdict = {'prefix':'JobPackBadLength', 'scheduler':{}, 'input':["raw://data"],
                "map?":True, 'worker':"w", 'owner':"o", 'nr_reduces':"2"}
     jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata).dumps()
     jobpack = jobpack[:(len(jobpack)-len(jobdata)-1)]
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("invalid header") >= 0)
Пример #13
0
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + b'0' * 3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size + 1, hdr_size]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
Пример #14
0
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + b'0'*3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size+1, hdr_size]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
Пример #15
0
    def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, b'', b''
        jobdict = {
            'prefix': 'a/b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {
            'prefix': 'a.b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
Пример #16
0
    def __init__(self, jobpack):
        self.jobpack = JobPack.load(StringIO(jobpack))
        self.host = "localhost"
        self.data_root = os.environ['DISCO_DATA']

        self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp())

        self.home = os.path.join(self.host, leisure.disco.hex_hash(self.name),
                                 self.name)
        self.job_dir = extract_jobhome(os.path.join(self.data_root, self.home),
                                       self.jobpack.jobhome)

        self.save_jobfile(jobpack)
        self.ensure_worker_executable()
        self.results = []
        self.status = "active"
Пример #17
0
  def __init__(self, jobpack):
    self.jobpack = JobPack.load(StringIO(jobpack))
    self.host = "localhost"
    self.data_root = os.environ['DISCO_DATA']

    self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp())


    
    self.home = os.path.join(
      self.host, 
      leisure.disco.hex_hash(self.name), 
      self.name
    )
    self.job_dir = extract_jobhome(
      os.path.join(self.data_root, self.home),
      self.jobpack.jobhome
    )

    self.save_jobfile(jobpack)
    self.ensure_worker_executable()
    self.results = []
    self.status = "active"
Пример #18
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              stage=None,
              group=None,
              grouping=None,
              taskid=-1):
     from disco.job import JobPack
     from disco.ddfs import DDFS
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.stage = stage
     self.group = '{0[0]}-{0[1]}'.format(group)
     self.group_label, self.group_host = group
     self.grouping = grouping
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}-{4}'.format(self.stage,
                                             DDFS.safe_name(self.group),
                                             self.taskid,
                                             hexhash(str((time.time())).encode()),
                                             os.getpid())
Пример #19
0
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + b'0'*4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
Пример #20
0
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from cStringIO import StringIO
     from disco.job import JobPack
     return JobPack.load(
         StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))
Пример #21
0
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + b'0' * 4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
Пример #22
0
Файл: core.py Проект: hmas/disco
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from disco.compat import BytesIO
     from disco.job import JobPack
     return JobPack.load(BytesIO(self.request('/disco/ctrl/parameters?name={0}'.format(jobname),
                                              as_bytes=True)))
Пример #23
0
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from cStringIO import StringIO
     from disco.job import JobPack
     return JobPack.load(StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))