def mapper(self, _, line): try: o = Weblog(line) except ValueError: sys.stderr.write("Invalid logfile line: {}\n".format(line)) return # See if this is the desired URL if o.wikipage() == "Main_Page": self.lowest.append((o.datetime, line)) self.lowest = sorted(self.lowest)[0:50] # keep just the first 50
def mapper(self, _, line): # Is this a weblog file, or a MaxMind GeoLite2 file? filename = mrjob.compat.jobconf_from_env("map.input.file") if "top1000ips_to_country.txt" in filename: fields = line.split("\t") self.increment_counter("Info","top1000_ips_to_country Count",1) yield fields[0], ("Country", fields[1]) else: log = Weblog(line) logfields = (log.ipaddr,log.datetime,log.url,log.wikipage()) self.increment_counter("Info","weblog Count",1) yield logfields[0], ("Weblog",logfields)
def mapper(self, _, line): # Is this a weblog file, or a MaxMind GeoLite2 file? filename = mrjob.compat.jobconf_from_env("map.input.file") if "top1000ips_to_country.txt" in filename: self.increment_counter("Status","top1000_ips_to_country file found",1) try: (ipaddr, country) = line.strip().split("\t") yield ipaddr, "+"+country except ValueError as e: pass else: try: o = Weblog(line) except ValueError: sys.stderr.write("Invalid Logfile line :{}\n".format(line)) return if o.wikipage() == "Main_Page": yield o.ipaddr, line
def mapper(self, _, line): # Get the name of the input file, per mrjob v0.4.6 documentation # https://pythonhosted.org/mrjob/utils-compat.html filename = mrjob.compat.jobconf_from_env("map.input.file") # parse the weblog input line log = Weblog(line) # output <filename,datetime> yield filename, log.datetime
def mapper(self, _, line): # Is this a weblog file, or a MaxMind GeoLite2 file? filename = mrjob.compat.jobconf_from_env("map.input.file") if "top1000ips_to_country.txt" in filename: self.increment_counter("Status", "top1000_ips_to_country file found", 1) try: (ipaddr, country) = line.strip().split("\t") yield ipaddr, "+" + country except ValueError as e: pass else: try: o = Weblog(line) except ValueError: sys.stderr.write("Invalid Logfile line :{}\n".format(line)) return if o.wikipage() == "Main_Page": yield o.ipaddr, line
def reducer(self, key, values): country = None for v in values: if v[0:1] == '+': country = v[1:] continue if not country: self.increment_counter("Warning", "No Country Found", 1) continue o = Weblog(v) yield "Geolocated", [o.date, country, v]
def mapper(self, _, line): # Is this a weblog file, or a MaxMind GeoLite2 file? filename = mrjob.compat.jobconf_from_env("map.input.file") import sys if "top1000ips_to_country.txt" in filename: # Handle as a GeoLite2 file # try: (ipaddr, country) = line.strip().split("\t") yield ipaddr, ("country", country) except ValueError as e: pass else: # Handle as a weblog file try: o = Weblog(line) except ValueError: sys.stderr.write("Invalid logfile line: {}\n".format(line)) return if o.wikipage() == "Main_Page": yield o.ipaddr, ("ip", line)
def mapper(self, _, line): # Is this a weblog file, or a MaxMind GeoLite2 file? filename = mrjob.compat.jobconf_from_env("map.input.file") import sys if "top1000ips_to_country.txt" in filename: # Handle as a GeoLite2 file # try: (ipaddr, country) = line.strip().split("\t") yield ipaddr, ("country", country) except ValueError as e: pass else: # Handle as a weblog file try: o = Weblog(line) except ValueError: sys.stderr.write("Invalid logfile line: {}\n".format(line)) return if o.wikipage() == "Main_Page": yield o.ipaddr,("ip", line)
def reducer(self, key, values): # values has all the lines for this key country = None for v in values: if v[0:1] == "+": # found the location! country = v[1:] continue if not country: # self.increment_counter("Warning", "No Country Found", 1) continue # If we get here, v is a logfile line. Parse it again o = Weblog(v) yield "Geolocated", [o.date, country, v]
def reducer(self, key, values): # values has all the lines for this key country = None for v in values: if v[0:1] == "+": # found the location! country = v[1:] continue if not country: # self.increment_counter("Warning", "No Country Found", 1) continue # If we get here, v is a logfile line. Parse it again o = Weblog(v) self.lowest.append((o.datetime, country, v)) self.lowest = sorted(self.lowest)[0:50]
def mapper(self, _, line): # Is this a weblog file, or a MaxMind GeoLite2 file? filename = mrjob.compat.jobconf_from_env("map.input.file") if "top1000ips_to_country.txt" in filename: fields = line.split('\t') # Handle as a GeoLite2 file # self.increment_counter("Info", "Obs Count", 1) yield fields[0], ("country", fields) else: # Handle as a weblog file self.increment_counter("Info", "Name Count", 1) fields = Weblog(line) yield fields.ipaddr, ("ip", line)
def mapper(self, _, line): if "Special:" not in line: log = Weblog(line) yield log.date, 1
def mapper(self, _, line): log = Weblog(line) yield log.date, 1
def mapper(self, _, line): o = Weblog(line) yield "first50",(o.datetime,line)
def mapper(self, _, line): log = Weblog(line) yield log.wikipage(), 1
def mapper(self, _, line): filename = mrjob.compat.jobconf_from_env("map.input.file") log = Weblog(line) yield (log.wikipage(),1)