def mapper(stream): fields = ['DayOfWeek', 'ArrDelay'] dow_map = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ] for dow, delay in mapred.iter_curated_fields(stream, fields): dow_str = dow_map[int(dow) - 1] mapred.send(dow_str, (delay, 1))
def mapper_step1(stream): """This mapper produces the keys to be used for the first problem reduction step: selecting the single best flight for each day/day-part/origin/dest. """ fields = [ 'FlightDate', 'Origin', 'Dest', 'ArrDelay', 'CRSDepTime', 'UniqueCarrier', 'FlightNum' ] for date, origin, dest, delay, time, carrier, flight in mapred.iter_curated_fields( stream, fields): try: minute_of_day = int(time[:2]) * 60 + int(time[2:]) except: pass else: period = 'AM' if minute_of_day < 12 * 60 else 'PM' mapred.send((date, period), (date, origin, dest, delay, time, carrier, flight))
def mapper(stream): fields = ['Origin', 'Dest', 'UniqueCarrier', 'ArrDelay'] for origin, dest, carrier, delay in mapred.iter_curated_fields( stream, fields): mapred.send((origin, dest, carrier), (delay, 1))
def mapper(stream): fields = ['Origin', 'Dest', 'DepDelay'] for origin, dest, delay in mapred.iter_curated_fields(stream, fields): mapred.send((origin, dest), (delay, 1))
def mapper(stream): fields = ['Origin', 'Dest'] for origin, dest in mapred.iter_curated_fields(stream, fields): mapred.send(origin, 1) mapred.send(dest, 1)
def mapper(stream): fields = ['UniqueCarrier', 'ArrDelay'] for carrier, delay in mapred.iter_curated_fields(stream, fields): mapred.send(carrier, (delay, 1))