def __init__(self, worker_id, host, port):
     self.host = host
     self.port = port
     self.worker_id = worker_id
     self.mapper = Mapper()
     self.reducer = Reducer()
     self.logger = logging.getLogger('worker ' + str(self.worker_id))
     self.logger.debug('Worker connecting to %s:%d', self.host, self.port)
示例#2
0
 def set_attributes(self, submit_time, mapper_list, reducer_list,
                    data_size_list):
     self.submitTime = submit_time
     self.mapperList = mapper_list
     for i in range(0, len(reducer_list)):
         r = Reducer("R" + self.jobName[1:] + "-" + str(i),
                     data_size_list[i], self)
         r.set_attributes(reducer_list[i], self.submitTime, mapper_list)
         self.reducerList.append(r)
def main():
    mapper = Mapper()
    reducer = Reducer()
    arrayMap = mapper.map("Esta à Frase, fRASe tomas frase esta unica única")
    arrayMap2 = mapper.map("única Este cena\n. frase única")
    arrayMap3 = mapper.map("à frase à")
    mapFinal = reducer.reduce([arrayMap3])

    print("Reduced: ", mapFinal)
示例#4
0
def start():
    mapper = Mapper()
    reducer = Reducer()

    mapper.setInputFile("./inputFiles/AComp_Passenger_data.csv")
    mapper.setMapFunction(StripErrorsUserCode.mapDuplicates)
    reducer.setRedFunction(StripErrorsUserCode.redWrite)
    reducer.setOutputFile("./inputFiles/PassengerData.csv")
    pairs = mapper.run()
    reducer.run(pairs, 'w')

    mapper.setInputFile("./inputFiles/PassengerData.csv")
    mapper.setMapFunction(StripErrorsUserCode.mapSpelling)
    pairs = mapper.run()
    reducer.run(pairs, 'w')
def start():
    mapper = Mapper()
    reducer = Reducer()

    # Produce a list of all the airports in a csv file with the headings 'Airport Code', and 'Null'
    mapper.setInputFile("./inputFiles/PassengerData.csv")
    mapper.setMapFunction(PassengersOnEachFlightUserCode.mapPassengerToFlight)
    reducer.setRedFunction(PassengersOnEachFlightUserCode.redCountPassengers)
    reducer.setOutputFile("./results/NumberOfPassengersOnEachFlight.csv")
    noPassengers = mapper.run()
    reducer.run(noPassengers, 'w')

    print(":: Task 2 complete")
def start():
    mapper = Mapper()
    reducer = Reducer()

    # Produce a list of all the airports in a csv file with the headings 'Airport Code', and 'Null'
    mapper.setInputFile("./inputFiles/PassengerData.csv")
    mapper.setMapFunction(FlightInformationUserCode.mapReOrder)
    reducer.setRedFunction(FlightInformationUserCode.redCalcFlightInfo)
    reducer.setOutputFile(
        "./results/ListOfFlightsWithDurationAndAllPassengers.csv")
    noPassengers = mapper.run()
    reducer.run(noPassengers, 'w')

    print(":: Task 3 complete")
示例#7
0
import pickle

from Reducer import Reducer
from Trie import Trie
import sys

current_key = None
current_trie = Trie()
key = None

# input comes from STDIN
for line in sys.stdin:
    # remove leading and trailing whitespace
    line = line.strip()

    # parse the input we got from mapper.py
    key, filename = line.split('\t', 1)
    file = open(filename, mode='rb')
    t = pickle.load(file)
    if current_key == key:
        # join tries
        Reducer.reduce(current_trie, t)
    else:
        if current_key:
            #traverse tries
            current_trie.printWords(path=current_key)
        current_trie = t
        current_key = key
if current_key == key:
    current_trie.printWords(path=current_key)
class Worker():
    def __init__(self, worker_id, host, port):
        self.host = host
        self.port = port
        self.worker_id = worker_id
        self.mapper = Mapper()
        self.reducer = Reducer()
        self.logger = logging.getLogger('worker ' + str(self.worker_id))
        self.logger.debug('Worker connecting to %s:%d', self.host, self.port)

    def parse_msg(self, msg):
        msg_len = len(msg)
        return '0' * (MAX_N_BYTES -
                      len(str(int(msg_len)))) + str(msg_len) + msg

    def proccess_msg(self, msg):
        # msg = self.queue_in.get()
        if msg['task'] == 'map_request':
            # logger.debug('THIS IS A MAP REQ')
            result = self.mapper.map(msg['value'])
            reply = {'task': 'map_reply', 'value': result}
            return reply
        elif msg['task'] == 'reduce_request':
            # logger.debug('THIS IS A REDUCE REQ')
            result = self.reducer.reduce(msg['value'])
            reply = {'task': 'reduce_reply', 'value': result}
            return reply
        elif msg['task'] == 'done':
            pass
        else:
            self.logger.debug('THIS IS NOT FOR ME: %s', msg['task'])

    def register(self):
        message = {'task': 'register', 'id': self.worker_id}
        return message

    async def tcp_echo_client(self, host, port, loop):
        self.logger.debug('Openning connection')
        reader, writer = await asyncio.open_connection(host, port, loop=loop
                                                       )  # open connection

        # register
        to_send = self.register()  # register on first time
        msg_json = json.dumps(to_send)
        parsed_msg = self.parse_msg(msg_json)
        self.logger.info('Sending to: %s' % host)
        writer.write(parsed_msg.encode())  # send message
        await writer.drain()

        while True:

            # receive data
            try:
                data = await reader.read(MAX_N_BYTES)
            except ConnectionResetError:
                await asyncio.sleep(
                    3)  # give the backup coordinator time to start
                break

            if not data:
                await asyncio.sleep(
                    3)  # give the backup coordinator time to start
                break

            # self.logger.info('Received (size of json str): %r ' % data.decode() )

            cur_size = 0
            total_size = int(data.decode())
            final_str = ''

            while (total_size - cur_size) >= CHUNK:
                data = await reader.read(CHUNK)
                final_str = final_str + data.decode()
                cur_size += len(data)

            data = await reader.read(total_size - cur_size)
            final_str = final_str + data.decode()

            # self.logger.info('Received: %r ' % final_str['task'] )
            self.logger.info('Received from: %s ' % host)

            to_send = self.proccess_msg(
                json.loads(final_str))  # process message

            if to_send is not None:
                msg_json = json.dumps(to_send)
                parsed_msg = self.parse_msg(msg_json)
                self.logger.info('Sending to: %s' % host)
                writer.write(parsed_msg.encode())  # send message
                await writer.drain()

        self.logger.info('Close the socket')
        writer.close()
示例#9
0
def start():
    mapper = Mapper()
    reducer = Reducer()

    # Produce a list of all the airports in a csv file with the headings 'Airport Code', and 'Null'
    mapper.setInputFile("./inputFiles/Top30_airports_LatLong.csv")
    mapper.setMapFunction(NoOfFlightsFromAirportsUserCode.mapUnusedAirports)
    reducer.setRedFunction(NoOfFlightsFromAirportsUserCode.redUnusedAirports)
    reducer.setOutputFile("./results/Airports.csv")
    unusedAirports = mapper.run()
    reducer.run(unusedAirports, 'w')

    # Produce a list of all the airports used in the passenger data file in a csv file with the headings 'Airport Code', and 'Number of flights from that airport'
    mapper.setInputFile("./inputFiles/PassengerData.csv")
    mapper.setMapFunction(NoOfFlightsFromAirportsUserCode.mapUsedAirports)
    reducer.setRedFunction(NoOfFlightsFromAirportsUserCode.redUsedAirports)
    usedAirports = mapper.run()
    reducer.run(usedAirports, 'a')

    # Combine the two above results to give a list of all flights from each airport, including those that aren't used
    # Heading titles: 'Airport code', 'No. of flights from that airport'
    mapper.setMapFunction(NoOfFlightsFromAirportsUserCode.mapMakePairs)
    mapper.setInputFile("./results/Airports.csv")
    reducer.setOutputFile("./results/NumberOfFlightsFromEachAirport.csv")
    allAirports = mapper.run()
    reducer.setRedFunction(NoOfFlightsFromAirportsUserCode.redCountFlights)
    reducer.run(allAirports, 'w')

    print(":: Task 1 complete")
示例#10
0
def start():
    mapper = Mapper()
    reducer = Reducer()

    mapper.setInputFile("./inputFiles/PassengerData.csv")
    mapper.setMapFunction(CalcDistanceUserCode.mapCalcFlightDistances)
    reducer.setRedFunction(CalcDistanceUserCode.redCalcFlightDistance)
    reducer.setOutputFile("./results/FlightDistances.csv")
    flightDistances = mapper.run()
    reducer.run(flightDistances, 'w')

    mapper.setInputFile("./results/FlightDistances.csv")
    mapper.setMapFunction(CalcDistanceUserCode.mapTotalPassengerDistance)
    reducer.setRedFunction(CalcDistanceUserCode.redTotalPassengerDistance)
    reducer.setOutputFile(
        "./results/TotalDistanceTravelledByEachPassenger.csv")
    passengerDistance = mapper.run()
    reducer.run(passengerDistance, 'w')

    mapper.setInputFile("./results/FlightDistances.csv")
    mapper.setMapFunction(CalcDistanceUserCode.mapDistaces)
    reducer.setRedFunction(CalcDistanceUserCode.redDistances)
    reducer.setOutputFile("./results/DistanceOfEachFlight.csv")
    distances = mapper.run()
    reducer.run(distances, 'w')

    print(":: Task 4 complete")