Exemplo n.º 1
0
 def run_mapper(self, index):
     """Runs the implemented mapper
     
     """
     input_split_file = open(settings.get_input_split_file(index), "r")
     key = input_split_file.readline()
     value = input_split_file.read()
     input_split_file.close()
     if(self.clean):
         os.unlink(settings.get_input_split_file(index))
     mapper_result = self.mapper(key, value)
     for reducer_index in range(self.n_reducers):
         temp_map_file = open(settings.get_temp_map_file(index, reducer_index), "w+")
         json.dump([(key, value) for (key, value) in mapper_result 
                                     if self.check_position(key, reducer_index)]
                     , temp_map_file)
         temp_map_file.close()
Exemplo n.º 2
0
 def begin_file_split(self, split_index, index):
     """initialize a split file by opening and adding an index.
     
     :param split_index: the split index we are currently on, to be used for naming the file.
     :param index: the index given to the file.
     
     """
     file_split = open(settings.get_input_split_file(split_index-1), "w+")
     file_split.write(str(index) + "\n")
     return file_split  
Exemplo n.º 3
0
    def initiate_file_split(self, split_index, index):
        """initialize a split file by opening and adding an index.

        :param split_index: the split index we are currently on, to be used for naming the file.
        :param index: the index given to the file.

        """
        file_split = open(settings.get_input_split_file(split_index - 1), "w+")
        file_split.write(str(index) + "\n")
        return file_split
Exemplo n.º 4
0
    def run_mapper(self, index):
        """Runs the implemented mapper

        :param index: the index of the thread to run on
        """
        input_split_file = open(settings.get_input_split_file(index), "r")
        key = input_split_file.readline()
        value = input_split_file.read()
        input_split_file.close()
        if (self.clean):
            os.unlink(settings.get_input_split_file(index))
        mapper_result = self.mapper(key, value)
        for reducer_index in range(self.n_reducers):
            temp_map_file = open(
                settings.get_temp_map_file(index, reducer_index), "w+")
            json.dump([(key, value) for (key, value) in mapper_result
                       if self.check_position(key, reducer_index)],
                      temp_map_file)
            temp_map_file.close()