def __init__( self, app, appconfig, workdata ): WorkerBase.__init__( self, app, appconfig, workdata ) self.surveyorname = workdata["hostname"] self.vsub = nn.Socket( nn.SUB, domain=nn.AF_SP ) self.vpub = nn.Socket( nn.PUB, domain=nn.AF_SP ) self.broker_address = None self.bonjour = BonjourResolver( "_vertexremap._tcp", self.cb_broker_changed ) self.bonjour.start() inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] ) outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" ) self.input = self.app.create_vertex_reader( inputfile ) self.outputdir = outputdir self.partitions = {} self.mode = MODE_IDLE self.surveyor = nn.Socket( nn.RESPONDENT ) self.surveyor.connect( "tcp://%s:8688"%(self.surveyorname) ) # 6 seconds self.surveyor.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, 50 ) self.vertices = {} logger.info("Waiting to get vertex broker host from bonjour") self.ready = False
def __init__(self, app, appconfig, workdata): WorkerBase.__init__(self, app, appconfig, workdata) self.total_size = 0 self.prevkey = None # This is a reducer operation inputdir = os.path.join(self.remaproot, "job", self.jobid, "part", self.workdata["partition"]) outputdir = os.path.join(self.remaproot, "data", self.workdata["outputdir"]) self.reducerfiles = sorted(os.listdir(inputdir)) self.inputdir = inputdir self.numparts = len(self.reducerfiles) self.fraction = 100.0 / self.numparts self.completedparts = 0 self.outputdir = outputdir self.partition = self.workdata["partition"] self.reducerWriter = self.app.create_reducer_writer( self.outputdir, self.partition) self.sources = [] for filename in self.reducerfiles: f = self.app.create_reducer_reader( os.path.join(self.inputdir, filename)) self.sources.append(f) self.total_size = self.total_size + f.filesize decorated = [((key, list_of_values, recsize) for key, list_of_values, recsize in f.read()) for f in self.sources] self.merged = heapq.merge(*decorated)
def __init__( self, app, appconfig, workdata ): WorkerBase.__init__( self, app, appconfig, workdata ) self.total_size = 0 self.prevkey = None # This is a reducer operation inputdir = os.path.join( self.remaproot, "job", self.jobid, "part", self.workdata["partition"] ) outputdir = os.path.join( self.remaproot, "data", self.workdata["outputdir"] ) self.reducerfiles = sorted(os.listdir( inputdir )) self.inputdir = inputdir self.numparts = len(self.reducerfiles) self.fraction = 100.0 / self.numparts self.completedparts = 0 self.outputdir = outputdir self.partition = self.workdata["partition"] self.reducerWriter = self.app.create_reducer_writer( self.outputdir, self.partition ) self.sources = [] for filename in self.reducerfiles: f = self.app.create_reducer_reader( os.path.join( self.inputdir, filename )) self.sources.append( f ) self.total_size = self.total_size + f.filesize decorated = [ ((key,list_of_values,recsize) for key,list_of_values,recsize in f.read()) for f in self.sources] self.merged = heapq.merge(*decorated)
def __init__( self, app, appconfig, workdata ): WorkerBase.__init__( self, app, appconfig, workdata ) self.workerid = workdata["workid"] inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] ) outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" ) self.input = self.app.create_mapper_reader( inputfile ) self.outputdir = outputdir self.partitions = {}
def __init__(self, app, appconfig, workdata): WorkerBase.__init__(self, app, appconfig, workdata) self.workerid = workdata["workid"] inputfile = os.path.join(self.remaproot, "data", self.workdata["inputfile"]) outputdir = os.path.join(self.remaproot, "job", self.jobid, "part") self.input = self.app.create_mapper_reader(inputfile) self.outputdir = outputdir self.partitions = {}