Exemplo n.º 1
0
def compute(lst):
    
    # number of map nodes num_map_nodes
    num_map_nodes = get_num_map_nodes()
    
    # Manager to keep track of all map results
    manager = Manager()

    # Map processes emit key-value pairs to emissions
    emissions = manager.list()    

    
    # Job is a list of processes
    jobs = []
        
    # Process number
    num_p = 0    
    
    
    
    start_time = time.time()    
    
    list_of_list = lst
       
    print ("Up to Permutation:", str(time.time() - start_time ) + " seconds"  )
    
    len_list_of_list = len(list_of_list)
    
    print ("Size of S: ", len_list_of_list)
    len_sublist = max(1,int(len_list_of_list/float(num_map_nodes)))
    
    i=0
    
    while i < len_list_of_list:
        
        if (len_list_of_list - i <= len_sublist):
            # last step
            sliced_list = list_of_list[i:]
        else:
            sliced_list = list_of_list[i:i + len_sublist]
        print(len(sliced_list))
        p = Process(target=map, args=(num_p, sliced_list, emissions))
        i += len_sublist
        jobs.append(p)
        p.start()
        num_p += 1  
        
    
    
    print ("Wait for Catch Up\n")
    for p in jobs:
        p.join() 
    print(len(emissions))
    print ("Up to Mapping Stage:", str(time.time() - start_time ) + " seconds"  )

    #---------------------------------------------------
    # Shuffle/Reduce
    
    jobs = []   
    manager_2 = Manager()    
    result_lst = manager_2.list()    


    for key in range(emissions[-1][1],emissions[-1][2]+5):
        key_list = [1 for x in emissions if x[0] == key]
        q = Process(target=reduce, args=(key,key_list,result_lst))
        jobs.append(q)
        q.start()

    print ("Wait for Catch Up\n")
    for q in jobs:
        q.join()     
        
    print ("Up to Reducing Stage:", str(time.time() - start_time ) + " seconds"  )
    print("Input Size: ", len_list_of_list )
    return sum(result_lst)
def compute(lst, size_param = 0, option='not_fixed'):
    try:       
        # number of map nodes num_map_nodes
        if option=='not_fixed':
            num_map_nodes = get_num_map_nodes(size_param)
        elif option == 'fixed':
            num_map_nodes = 5
        else:
            raise NameError('second parameter [option] should be either `not_fixed` or `fixed` ... see documentation')
        
        # Manager to keep track of all map results
        list_of_emissions = []
        
        # Job is a list of processes
        jobs = []
            
        # Process number
        num_p = 0    
        
        manager = Manager()
        
        start_time = time.time() 
        
        print ("Up to Permutation:", str(time.time() - start_time ) + " seconds"  )
        
        len_lst = len(lst)
        
        print ("Size of S: ", len_lst)
        len_sublist = max(1,int(len_lst/float(num_map_nodes)))
        
        i=0
        
        #Split input into muliple sliced_list each sent to one map node
        
        while i < len_lst:
            list_of_emissions.append(manager.list())
            if (len_lst - i <= len_sublist):
                # last step
                sliced_list = lst[i:]
            else:
                sliced_list = lst[i:i + len_sublist]
            #print(len(sliced_list))
            p = Process(target=map, args=(num_p, sliced_list, list_of_emissions[-1]))
            i += len_sublist
            jobs.append(p)
            p.start()
            num_p += 1  
            
        
        print ("Wait for Catch Up\n")
        for p in jobs:
            p.join() 
        
        print ("Up to Mapping Stage:", str(time.time() - start_time ) + " seconds"  )

        #---------------------------------------------------
        # Combine Step
        
        jobs = []           
        manager_3 = Manager()    
        combined_lists = []
        
        for emissions in list_of_emissions:
            
            for key in range(emissions[-1][1],emissions[-1][2]+5):
                combined_lists.append(manager_3.list())
                q = Process(target=combine, args=(key,emissions,combined_lists[-1]))
                q.start()
                
        print ("Wait for Catch Up\n")
        for q in jobs:
            q.join()                 

        jobs = []  
        manager_2 = Manager() 
        result_lst = manager_2.list()  
        
        # REDUCE --------------------------------------------------------
        for combined_list in combined_lists:
                r = Process(target=reduce, args=(key,combined_list,result_lst))
                jobs.append(r)
                r.start()

        print ("Wait for Catch Up\n")
        for r in jobs:
            r.join()     
            
        print ("Up to Reducing Stage:", str(time.time() - start_time ) + " seconds"  )
        print("Length of List of List: ", len_lst )
        return sum(result_lst)
        
    except NameError as e:
        print('Usage Error:', e)
def compute(lst, size_param=0, option="not_fixed"):
    try:
        # number of map nodes is num_map_nodes
        if option == "not_fixed":
            num_map_nodes = get_num_map_nodes(size_param)
        elif option == "fixed":
            num_map_nodes = 5
        else:
            raise NameError("second parameter [option] should be either `not_fixed` or `fixed` ... see documentation")

        # Manager to keep track of all map results
        manager = Manager()

        # Store key value emitted from map nodes here
        list_of_emissions = []

        # Job is a list of processes
        jobs = []

        # Process number
        num_p = 0

        # Timer
        start_time = time.time()

        # Get list size
        len_lst = len(lst)

        print("Size of S: ", len_lst)
        len_sublist = max(1, int(len_lst / float(num_map_nodes)))

        i = 0

        # Split input into muliple sliced_list each sent to one map node
        while i < len_lst:
            # list_of_emissions[i] hold key value pairs emitted from one map node i
            list_of_emissions.append(manager.list())
            if len_lst - i <= len_sublist:
                # last step
                sliced_list = lst[i:]
            else:
                sliced_list = lst[i : i + len_sublist]
            # print(len(sliced_list))
            p = Process(target=map, args=(num_p, sliced_list, list_of_emissions[-1]))
            i += len_sublist
            jobs.append(p)
            p.start()
            num_p += 1

        print("Wait for Catch Up\n")
        for p in jobs:
            p.join()

        print("Up to Mapping Stage:", str(time.time() - start_time) + " seconds")

        # Shuffle/Reduce

        jobs = []
        manager_2 = Manager()
        result_lst = manager_2.list()

        # This is the step five optimization (duplicating
        # number of reduce nodes for each key)
        for emissions in list_of_emissions:
            for key in range(emissions[-1][1], emissions[-1][2] + 5):
                key_list = [1 for x in emissions if x[0] == key]
                q = Process(target=reduce, args=(key, key_list, result_lst))
                jobs.append(q)
                q.start()

        print("Wait for Catch Up\n")
        for q in jobs:
            q.join()

        print("Up to Reducing Stage:", str(time.time() - start_time) + " seconds")
        print("Length of List of List: ", len_lst)
        return sum(result_lst)

    except NameError as e:
        print("Usage Error:", e)