Пример #1
0
def mapper(stream):
    fields = ['DayOfWeek', 'ArrDelay']
    dow_map = [
        'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
        'Sunday'
    ]

    for dow, delay in mapred.iter_curated_fields(stream, fields):
        dow_str = dow_map[int(dow) - 1]
        mapred.send(dow_str, (delay, 1))
Пример #2
0
def mapper_step1(stream):
    """This mapper produces the keys to be used for the first problem reduction step: selecting
    the single best flight for each day/day-part/origin/dest.
    """

    fields = [
        'FlightDate', 'Origin', 'Dest', 'ArrDelay', 'CRSDepTime',
        'UniqueCarrier', 'FlightNum'
    ]

    for date, origin, dest, delay, time, carrier, flight in mapred.iter_curated_fields(
            stream, fields):
        try:
            minute_of_day = int(time[:2]) * 60 + int(time[2:])
        except:
            pass
        else:
            period = 'AM' if minute_of_day < 12 * 60 else 'PM'
            mapred.send((date, period),
                        (date, origin, dest, delay, time, carrier, flight))
Пример #3
0
def mapper(stream):
    fields = ['Origin', 'Dest', 'UniqueCarrier', 'ArrDelay']

    for origin, dest, carrier, delay in mapred.iter_curated_fields(
            stream, fields):
        mapred.send((origin, dest, carrier), (delay, 1))
Пример #4
0
def mapper(stream):
    fields = ['Origin', 'Dest', 'DepDelay']

    for origin, dest, delay in mapred.iter_curated_fields(stream, fields):
        mapred.send((origin, dest), (delay, 1))
Пример #5
0
def mapper(stream):
    fields = ['Origin', 'Dest']

    for origin, dest in mapred.iter_curated_fields(stream, fields):
        mapred.send(origin, 1)
        mapred.send(dest, 1)
Пример #6
0
def mapper(stream):
    fields = ['UniqueCarrier', 'ArrDelay']

    for carrier, delay in mapred.iter_curated_fields(stream, fields):
        mapred.send(carrier, (delay, 1))