def __init__(self): Config.__init__(self) Logger.__init__(self) self.env = self.conf.get("enviroment") if self.env.get("RUN_CONTEINER"): super().__init__(self) else: WebDriver.__init__(self)
def __init__(self, fconf, handler): """ Initialize a MasterServer instance @param fconf the path to the configuration file @param handler the handler object in charge of managing HTTP requests """ Logger.__init__(self, "Manager") conf = json.load(open(fconf)) # Jinja2 initialization. tmpl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'templates') self.env = Environment(loader=FileSystemLoader(tmpl_path)) self.status = ApplicationStatus() # This is a dictionary structure in the form # reduce_dict["group-name"] = [ # [ file list by unique integers, size in byte # ] => Reduce-0 # [ # ] => Reduce-1 # ] self.reduce_mark = set() self.reduce_dict = defaultdict(list) self.dead_reduce_dict = defaultdict(list) # This is a dictionary nick => Handler instance self.masters = {} self.last_id = -1 self.pending_works = defaultdict(list) # nick => [work, ...] self.ping_max = int(conf["ping-max"]) self.ping_interval = int(conf["ping-interval"]) self.num_reducer = int(conf["num-reducer"]) # This will just keep track of the name of the files self.reduce_files = [] self.results_printed = False for _ in range(self.num_reducer): self.reduce_files.append("N/A") # Load the input module and assing the generator to the work_queue module = load_module(conf["input-module"]) cls = getattr(module, "Input", None) # Some code for the DFS generator = cls(fconf).input() self.use_dfs = use_dfs = conf['dfs-enabled'] if use_dfs: dfsconf = conf['dfs-conf'] dfsconf['host'] = dfsconf['master'] self.path = conf['output-prefix'] else: dfsconf = None self.path = os.path.join( os.path.join(conf['datadir'], conf['output-prefix']) ) self.work_queue = WorkQueue(self.logger, generator, use_dfs, dfsconf) # Lock to synchronize access to the timestamps dictionary self.lock = Lock() self.timestamps = {} # nick => (send_ts:enum, ts:float) # Ping thread self.hb_thread = Thread(target=self.hearthbeat) # Event to mark the end of the server self.finished = Event() self.addrinfo = (conf['master-host'], conf['master-port']) Server.__init__(self, self.addrinfo[0], self.addrinfo[1], handler)
def __init__(self, nick, fconf): """ Create a Master client/server @param nick a friendly name string for identification @param fconf path to the configuration file """ Logger.__init__(self, "Master") HTTPClient.__init__(self) self.fconf = fconf self.conf = json.load(open(fconf)) # This keep track of the statistics of the master. See status.py self.status = MasterStatus() # Set to true if the registration was succesful self.registered = False self.unique_id = -1 # Marks the end of the stream. The server has no more maps to execute. # Set to true whenever a end-of-stream message is received self.end_of_stream = False self.comm = MPI.COMM_WORLD self.n_machines = count_machines(self.conf["machine-file"]) # The mux object. self.communicators = None # The lock is used to synchronize the access to units_to_kill variable # which will be accessed by two different threads, namely the one # interacting with server and the one interacting with the workers self.kill_lock = Lock() self.units_to_kill = 0 self.info("We have %d available slots" % (self.n_machines)) self.nick = nick self.url = self.conf['master-url'] self.sleep_inter = self.conf['sleep-interval'] # Generic lock to synchronize the access to the instance variables of # the object itself. Its use should be minimized. self.lock = Lock() # Integer marking the number of maps which are currently being # executed. Incremented on assignment, decremented on finish. self.num_map = 0 # Simple queue of WorkerStatus(TYPE_MAP, ..) objects. Filled whenever # the server returns us a compute-map message. self.map_queue = [] # An event that whenever is set marks the end of the computation, set # upon reception of the plz-die message self.ev_finished = Event() # Maximum number of simultaneous files that the reduce may manage in # one row. Usually should be set to the MAX_FD of the system. self.threshold_nfile = int(self.conf["threshold-nfile"]) # Simple lock that synchronize access to reduc* instance variables. self.reduce_lock = Lock() # This holds the triples in the sense that for each reduce we have # a nested list which integers representing output of the mappers. # If we have two reducers we will have for example: # [ # [(0, 45), (1, 32), (3, 331)], # [(5, 22), (6, 99)] # ] # Meaning: # Reduce #1: -> output-reduce-000000-000000, 45 bytes # -> output-reduce-000000-000001, 32 bytes # -> output-reduce-000000-000003, 331 bytes # Reduce #2: -> output-reduce-000001-000005, 22 bytes # -> output-reduce-000001-000006, 99 bytes self.reducing_files = [] # It will contain boolean values indicating the status of the reducers self.reduce_started = [] for _ in xrange(int(self.conf['num-reducer'])): self.reduce_started.append(False) self.reducing_files.append([]) # The timer will be used to unlock the semaphore that is used as # bounding mechanism for requesting new jobs to the server. self.timer = None self.num_pending_request = Semaphore(self.n_machines) # Here we start two simple thread one in charge of executing requests # and the other which is in charge of executing the main loop. There is # also another thread executing asyncore.loop that manages the http # communication with the server. self.requester_thread = Thread(target=self.__requester_thread) self.main_thread = Thread(target=self.__main_loop)