Пример #1
0
    def __init__( self, app, appconfig, workdata ):
        WorkerBase.__init__( self, app, appconfig, workdata )
        self.surveyorname = workdata["hostname"]
        self.vsub = nn.Socket( nn.SUB, domain=nn.AF_SP )
        self.vpub = nn.Socket( nn.PUB, domain=nn.AF_SP )
        self.broker_address = None

        self.bonjour = BonjourResolver( "_vertexremap._tcp", self.cb_broker_changed )
        self.bonjour.start()

        inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] )
        outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" )

        self.input = self.app.create_vertex_reader( inputfile )
        self.outputdir = outputdir
        self.partitions = {}

        self.mode = MODE_IDLE

        self.surveyor = nn.Socket( nn.RESPONDENT )
        self.surveyor.connect( "tcp://%s:8688"%(self.surveyorname) )
        # 6 seconds
        self.surveyor.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, 50 )
        self.vertices = {}

        logger.info("Waiting to get vertex broker host from bonjour")

        self.ready = False
Пример #2
0
    def __init__(self, app, appconfig, workdata):
        WorkerBase.__init__(self, app, appconfig, workdata)
        self.total_size = 0
        self.prevkey = None

        # This is a reducer operation
        inputdir = os.path.join(self.remaproot, "job", self.jobid, "part",
                                self.workdata["partition"])
        outputdir = os.path.join(self.remaproot, "data",
                                 self.workdata["outputdir"])

        self.reducerfiles = sorted(os.listdir(inputdir))
        self.inputdir = inputdir
        self.numparts = len(self.reducerfiles)
        self.fraction = 100.0 / self.numparts
        self.completedparts = 0
        self.outputdir = outputdir
        self.partition = self.workdata["partition"]
        self.reducerWriter = self.app.create_reducer_writer(
            self.outputdir, self.partition)

        self.sources = []
        for filename in self.reducerfiles:
            f = self.app.create_reducer_reader(
                os.path.join(self.inputdir, filename))
            self.sources.append(f)
            self.total_size = self.total_size + f.filesize

        decorated = [((key, list_of_values, recsize)
                      for key, list_of_values, recsize in f.read())
                     for f in self.sources]
        self.merged = heapq.merge(*decorated)
Пример #3
0
    def __init__( self, app, appconfig, workdata ):
        WorkerBase.__init__( self, app, appconfig, workdata )
        self.total_size = 0
        self.prevkey = None

        # This is a reducer operation
        inputdir = os.path.join( self.remaproot, "job", self.jobid, "part", self.workdata["partition"] )
        outputdir = os.path.join( self.remaproot, "data", self.workdata["outputdir"] )

        self.reducerfiles = sorted(os.listdir( inputdir ))
        self.inputdir = inputdir
        self.numparts = len(self.reducerfiles)
        self.fraction = 100.0 / self.numparts
        self.completedparts = 0
        self.outputdir = outputdir
        self.partition = self.workdata["partition"]
        self.reducerWriter = self.app.create_reducer_writer( self.outputdir, self.partition )

        self.sources = []
        for filename in self.reducerfiles:
            f = self.app.create_reducer_reader( os.path.join( self.inputdir, filename ))
            self.sources.append( f )
            self.total_size = self.total_size + f.filesize

        decorated = [
            ((key,list_of_values,recsize) for key,list_of_values,recsize in f.read())
            for f in self.sources]
        self.merged = heapq.merge(*decorated)
Пример #4
0
    def __init__( self, app, appconfig, workdata ):
        WorkerBase.__init__( self, app, appconfig, workdata )
        self.workerid = workdata["workid"]

        inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] )
        outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" )

        self.input = self.app.create_mapper_reader( inputfile )
        self.outputdir = outputdir
        self.partitions = {}
Пример #5
0
    def __init__(self, app, appconfig, workdata):
        WorkerBase.__init__(self, app, appconfig, workdata)
        self.workerid = workdata["workid"]

        inputfile = os.path.join(self.remaproot, "data",
                                 self.workdata["inputfile"])
        outputdir = os.path.join(self.remaproot, "job", self.jobid, "part")

        self.input = self.app.create_mapper_reader(inputfile)
        self.outputdir = outputdir
        self.partitions = {}