def geolocate(url): """ Finds data about url in geolocation database and transfers it mongodb :param url: url to lacate :return: geolocation_id """ from geoip2.database import Reader from geoip2.webservice import Client from geoip2.errors import GeoIP2Error, HTTPError geolocation_data = dict() try: ip = url_to_ip(url) response = None if config.GEOIP2_WEB_SERVICE_USER_ID and config.GEOIP2_WEB_SERVICE_LICENSE_KEY \ and config.GEOIP2_WEB_SERVICE_TYPE: client = Client(config.GEOIP2_WEB_SERVICE_USER_ID, config.GEOIP2_WEB_SERVICE_LICENSE_KEY) if config.GEOIP2_WEB_SERVICE_TYPE == 'country': response = client.country(ip).country elif config.GEOIP2_WEB_SERVICE_TYPE == 'city': response = client.city(ip).city elif config.GEOIP2_WEB_SERVICE_TYPE == 'insights': response = client.insights(ip).insights elif config.GEOIP2_DATABASE_PATH and config.GEOIP2_DATABASE_TYPE: reader = Reader(config.GEOIP2_DATABASE_PATH) if config.GEOIP2_DATABASE_TYPE == 'country': response = reader.country(ip).country if config.GEOIP2_DATABASE_TYPE == 'city': response = reader.city(ip).city else: reader = Reader( '/opt/project/worker/utils/geoloc_databases/GeoLite2-City.mmdb' ) response = reader.city(ip) for name in dir(response): value = getattr(response, name) if not name.startswith('_') and not type(value) == dict: geolocation_data[name] = value.__dict__ except (GeoIP2Error, HTTPError) as error: geolocation_data = {'_error': str(error)} finally: duplicate = db.geolocation.find_one(geolocation_data) if duplicate: return duplicate['_id'] geolocation_id = db.geolocation.insert_one(geolocation_data) return str(geolocation_id.inserted_id)
class GeoIP: def __init__(self, db='/home/insane/PycharmProjects/octopus/GeoLite2-City.mmdb'): self.r = Reader(db) def get_continent_code(self, ip): return self.r.city(ip).continent def get_longitude(self, ip): return self.r.city(ip).location.longitude def get_latitude(self, ip): return self.r.city(ip).location.latitude def get_iso_code(self, ip): return self.r.city(ip).country.iso_code
def main(): args = docopt(__doc__) out = csv.writer(sys.stdout) reader = Reader(args['-f']) try: ips = [x.strip() for x in open(args['IPFILE'], 'r')] except FileNotFoundError: if not args['-q']: print("IP file {} count not be opened, interpreting as single IP!".format(args['IPFILE'])) ips = [args['IPFILE'].strip()] for ip in ips: try: resp = reader.city(ip) except ValueError: if not args['-q']: print("{} is not an IP address, skipping!".format(ip), file=sys.stderr) continue row = [] if args['-i']: row.append(ip) if args['-c']: row.append(resp.country.names.get('en', '-')) row.append(resp.country.iso_code) try: row.append(resp.city.names['en']) except KeyError: row.append('-') out.writerow(row)
def set_mirror_country( mirror_info: Dict[AnyStr, Union[Dict, AnyStr]], ) -> None: """ Set country by IP of a mirror :param mirror_info: Dict with info about a mirror """ mirror_name = mirror_info['name'] try: ip = socket.gethostbyname(mirror_name) except socket.gaierror: logger.error('Can\'t get IP of mirror %s', mirror_name) mirror_info['country'] = 'Unknown' return db = Reader(GEOPIP_DB) logger.info('Set country for mirror "%s"', mirror_name) try: match = db.city(ip) # type: City mirror_info['country'] = match.country.name except AddressNotFoundError: logger.warning( 'GeoIP db does not have information about IP "%s"', ip, ) mirror_info['country'] = 'Unknown'
def get_location(cls, ip=None): if not ip: ip = cls.get_ip() try: city_client = Reader("./utils/geoip2/GeoLite2-City.mmdb") response = city_client.city(ip) if response.city.name: location = ",".join( [response.country.iso_code, response.city.name]) else: location = ",".join([response.country.iso_code, ""]) except Exception as e: print("fail to get location with geoip2: %s" % str(e)) try: api_key = 'at_IW99hSbVb4uxQq1SbaoIanDbulTbU' api_url = 'https://geo.ipify.org/api/v1?' url = api_url + 'apiKey=' + api_key + '&ipAddress=' + ip temp_region = loads( urlopen(url).read().decode('utf8'))["location"] try: location = ",".join( [temp_region["country"], temp_region["city"]]) except [KeyError, ValueError]: location = temp_region except URLError: location = "network error" return location
def get_location_for_ip(ip_addr): try: reader = Reader( os.path.join(GEOLOCATION_DATABASE_PATH, GEOLOCATION_DATABASE_CITY)) return reader.city(ip_addr) except Exception as e: logger.warn("Failed retrieving geoip info for %s: %s" % (ip_addr, str(e))) return None
def fetch_geoip(ip_address, language=None): # Prepare response object response = {} # Get hostname from IP address, pass if fail try: response['ip_address'] = ip_address response['hostname'] = socket.gethostbyaddr(ip_address)[0] except Exception as ex: pass # Load GeoLite2 City database geoip = Reader(path.join(config.MMDB_PATH, "GeoLite2-City.mmdb")) # Try to fetch data and build response, otherwise raise exception try: data = geoip.city(ip_address) # geoip.city response['city'] = { "name": data.city.name, "id": data.city.geoname_id, "region": data.subdivisions.most_specific.name, "region_code": data.subdivisions.most_specific.iso_code } # geoip.country response['country'] = { "name": data.country.name, "iso_code": data.country.iso_code, "continent": data.continent.name, "continent_code": data.continent.code, "is_eu": data.country.is_in_european_union } # geoip.location response['location'] = { "accuracy_radius": data.location.accuracy_radius, "zip": data.postal.code, "latitude": data.location.latitude, "longitude": data.location.longitude, "timezone": data.location.time_zone } except AddressNotFoundError as ex: raise ex # Close database instances geoip.close() # Load GeoLite2 ASN database (optional) response['asn'] = fetch_asn(ip_address) # Return built response object return response
def find_location(ip): reader = Reader(TannerConfig.get('DATA', 'geo_db')) try: location = reader.city(ip) info = dict( country=location.country.name, country_code=location.country.iso_code, city=location.city.name, zip_code=location.postal.code, ) except geoip2.errors.AddressNotFoundError: info = "NA" # When IP doesn't exist in the db, set info as "NA - Not Available" return info
def ip2city_py(ip): global reader if reader is None: # assume all work nodes have mmdb installed in the following path reader = Reader( "/home/spark/spark-2.4.5-bin-hadoop2.7/maxmind/GeoLite2-City.mmdb" ) try: response = reader.city(ip) city = response.city.name if city is None: return None return city except: return None
def ip2city_py(ip): global reader if reader is None: # assume all work nodes have mmdb installed in the following path reader = Reader( "/home/cloudera/Desktop/spark_sql_101/maxmind/GeoLite2-City.mmdb" ) try: response = reader.city(ip) city = response.city.name if city is None: return None return city except: return None
def statics_city(): """统计区域数据""" city = {} geoip2_reader = Reader("GeoLite2-City.mmdb") for _ip, _cnt in statics_all()["vistors"].items(): _city_name = "Unknow" try: _city = geoip2_reader.city(_ip) # _city_name = "{}/{}".format(_city.country.names.get("en","").encode("utf8"),_city.city.names.get("en","").encode("utf8")) _city_name = "{}/{}".format(_city.country.names.get("zh-CN", ""), _city.city.names.get("zh-CN", "")) except BaseException as e: print(e) pass city.setdefault(_city_name, 0) city[_city_name] += _cnt return city
class MaxMindCityDatabase(BaseDriver): """ MaxMind City Database """ def __init__(self, database): """ Constructor :param database: Location of the city-database """ self._reader = Reader(database) # Close reader when app closes down atexit.register(lambda: self._reader.close()) def insights(self, ip): """ Get insights in ip :param ip: The ip :return: Insights :rtype: geoip2.models.City """ return self._reader.city(ip)
def extract_location(line): global reader if reader is None: reader = Reader( "/home/spark/spark-2.4.5-bin-hadoop2.7/maxmind/GeoLite2-City.mmdb") match = pattern.match(line) if match: ip = match.group(1) response = reader.city(ip) country = response.country.name city = response.city.name if city is None: return country else: return "{},{}".format(country, city) else: return "InvalidLogFound"
def get_lng_lat(ip_address): """ Takes an IPv4 or IPv6 address and returns a 2-tuple of (longitude, latitude). """ coord = None if ip_address and is_public_ip_addr(ip_address): city_db_path = get_city_db_path() db_reader = Reader(city_db_path) try: result = db_reader.city(ip_address) coord = (result.location.longitude, result.location.latitude) except AddressNotFoundError: _LOGGER.warning('The address %s is not in the GeoLite2 database.', ip_address) finally: db_reader.close() return coord
class Handler(GeoipService.ContextIface): def __init__(self, city_db_path): # maxmind docs recommend reusing a Reader across requests self.reader = Reader(city_db_path) def is_healthy(self, context): return True def get_country(self, context, ip_address): # TODO: add anonymous info (tor/ec2/rackspace/etc) try: response = self.reader.city(ip_address) except AddressNotFoundError: country_code = "" country_name = "" else: country_code = response.country.iso_code country_name = response.country.name return GeoIpRecord( country_code=country_code, country_name=country_name, )
def add_location_info(record): global reader if reader is None: reader = Reader( "/home/cloudera/spark-2.4.4-bin-hadoop2.6/maxmind/GeoLite2-City.mmdb" ) ip = record.split(",")[13] try: response = reader.city(ip) country = response.country.name city = response.city.name latitude = response.location.latitude longitude = response.location.longitude qk = quadkey.from_geo((latitude, longitude), 15) acc_num_good_records.add(1) return "{},{},{},{},{},{}".format(record, country, city, latitude, longitude, qk.key) except: acc_num_bad_records.add(1) return "-----"
def get_location_by_remote_service(cls, ip=None): if not ip: ip = cls.get_ip() if ip.find("error") >= 0 or ip.find(".") < 0: location = "ip data error" else: try: city_client = Reader("./utils/geoip2/GeoLite2-City.mmdb") response = city_client.city(ip) location = { "country": response.country.names["zh-CN"], "subdivision": response.subdivisions.most_specific.names["zh-CN"], "city": response.city.names["zh-CN"], "accuracy_radius": response.location.accuracy_radius, "latitude": response.location.latitude, "longitude": response.location.longitude, "time_zone": response.location.time_zone } except Exception as e: print("fail to get location with geoip2: %s" % str(e)) try: api_key = 'at_IW99hSbVb4uxQq1SbaoIanDbulTbU' api_url = 'https://geo.ipify.org/api/v1?' url = api_url + 'apiKey=' + api_key + '&ipAddress=' + ip location = loads( urlopen(url).read().decode('utf8'))["location"] except URLError: location = "network error" return location
class GeoLite(Database): """ru, en, de languages, country, continent, city""" def __init__(self, path): super().__init__(path) self._database = Reader(path) def _get_main_information(self, location): if location: return location.city.names.get('ru', '') else: return '' def get_data(self, ips, proc_num, return_dict): result = [] for ip in ips: try: location = self._database.city(Database.int2ip(ip)) except AddressNotFoundError: location = None result.append(self._get_main_information(location)) return_dict[proc_num] = result
def city(): geoip2_reader = Reader('GeoLite2-City.mmdb') geoip2_reader.city()
class GeoIPHandler(object): def __init__(self, data_folder, maxmind_license_key): self.data_folder = data_folder self.maxmind_license_key = maxmind_license_key self.dbfile = abspath(join(self.data_folder, 'GeoLite2-City.mmdb')) self.logger = getLogger() self.reader = None self.reader_manager(action='open') self.logger.info('Opening persistent connection to the MaxMind DB...') def reader_manager(self, action=None): if action == 'open': try: self.reader = Reader(self.dbfile) except FileNotFoundError: self.logger.error("Could not find MaxMind DB! Downloading!") result_status = self.download() if result_status: self.logger.error( "Could not download MaxMind DB! You may need to manually install it." ) exit(1) else: self.reader = Reader(self.dbfile) else: self.reader.close() def lookup(self, ipaddress): ip = ipaddress self.logger.debug( 'Getting lat/long for Tautulli stream using ip with last octet ending in %s', ip.split('.')[-1:][0]) return self.reader.city(ip) def update(self): today = date.today() try: dbdate = date.fromtimestamp(stat(self.dbfile).st_mtime) db_next_update = date.fromtimestamp(stat( self.dbfile).st_mtime) + timedelta(days=30) except FileNotFoundError: self.logger.error("Could not find MaxMind DB as: %s", self.dbfile) self.download() dbdate = date.fromtimestamp(stat(self.dbfile).st_mtime) db_next_update = date.fromtimestamp(stat( self.dbfile).st_mtime) + timedelta(days=30) if db_next_update < today: self.logger.info("Newer MaxMind DB available, Updating...") self.logger.debug( "MaxMind DB date %s, DB updates after: %s, Today: %s", dbdate, db_next_update, today) self.reader_manager(action='close') self.download() self.reader_manager(action='open') else: db_days_update = db_next_update - today self.logger.debug("MaxMind DB will update in %s days", abs(db_days_update.days)) self.logger.debug( "MaxMind DB date %s, DB updates after: %s, Today: %s", dbdate, db_next_update, today) def download(self): tar_dbfile = abspath(join(self.data_folder, 'GeoLite2-City.tar.gz')) maxmind_url = ( 'https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City' f'&suffix=tar.gz&license_key={self.maxmind_license_key}') downloaded = False retry_counter = 0 while not downloaded: self.logger.info('Downloading GeoLite2 DB from MaxMind...') try: urlretrieve(maxmind_url, tar_dbfile) downloaded = True except URLError as e: self.logger.error("Problem downloading new MaxMind DB: %s", e) result_status = 1 return result_status except HTTPError as e: if e.code == 401: self.logger.error( "Your MaxMind license key is incorect! Check your config: %s", e) result_status = 1 return result_status else: self.logger.error( "Problem downloading new MaxMind DB... Trying again: %s", e) sleep(2) retry_counter = (retry_counter + 1) if retry_counter >= 3: self.logger.error( "Retried downloading the new MaxMind DB 3 times and failed... Aborting!" ) result_status = 1 return result_status try: remove(self.dbfile) except FileNotFoundError: self.logger.warning( "Cannot remove MaxMind DB as it does not exist!") self.logger.debug("Opening MaxMind tar file : %s", tar_dbfile) tar = taropen(tar_dbfile, 'r:gz') for files in tar.getmembers(): if 'GeoLite2-City.mmdb' in files.name: self.logger.debug('"GeoLite2-City.mmdb" FOUND in tar file') files.name = basename(files.name) tar.extract(files, self.data_folder) self.logger.debug('%s has been extracted to %s', files, self.data_folder) tar.close() try: remove(tar_dbfile) self.logger.debug('Removed the MaxMind DB tar file.') except FileNotFoundError: self.logger.warning( "Cannot remove MaxMind DB TAR file as it does not exist!")
.getOrCreate() # (1) Define a normal Python function and match arguments to your UDF reader = None def ip2city_py(ip): global reader if reader is None: # assume all work nodes have mmdb installed in the following path <<<<<<< Updated upstream reader = Reader("/home/spark/dataaccess_log_analysis/maxmind/GeoLite2-City.mmdb") ======= reader = Reader("./GeoLite2-City.mmdb") >>>>>>> Stashed changes try: response = reader.city(ip) city = response.city.name if city is None: return None return city except: return None # (2) Register UDF function ip2city = udf(ip2city_py, StringType()) # Use it page_view = spark.read.csv("hdfs://master/user/spark/spark_sql_101/page_views/data", sep="\t", schema="logtime string, userid int, ip string, page string, \ ref string, os string, os_ver string, agent string")
stat_all['vistors'][_ip] += _cnt for _status, _cnt in _stat['status'].items(): stat_all['status'].setdefault(_status, 0) stat_all['status'][_status] += _cnt #统计区域数据 stat_all['city'] = {} from geoip2.database import Reader geoip2_reader = Reader('GeoLite2-City.mmdb') for _ip, _cnt in stat_all['vistors'].items(): _city_name = 'unknow' try: _city = geoip2_reader.city(_ip) _city_name = '{}/{}'.format(_city.country.names.get('en', ''), _city.city.names.get('en', '')) #_city_name = '{}/{}'.format(_city.country.names.get('zh-CN', ''), _city.city.names.get('zh-CN', '')) except BaseException as e: print(e) pass stat_all['city'].setdefault(_city_name, 0) stat_all['city'][_city_name] += _cnt geoip2_reader.close() # 打印结果 print('=' * 70) print('|1. 概览{:>61}|'.format(''))
def logparse( log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention, influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode): # Preparing variables and params ips = {} geohash_fields = {} geohash_tags = {} log_data_fields = {} log_data_tags = {} nginx_log = {} hostname = uname()[1] client = InfluxDBClient( host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database) try: logging.debug('Testing InfluxDB connection') version = client.request('ping', expected_response_code=204).headers['X-Influxdb-Version'] logging.debug(f'Influxdb version: {version}') except ConnectionError as e: logging.critical('Error testing connection to InfluxDB. Please check your url/hostname.\n' f'Error: {e}' ) exit(1) try: databases = [db['name'] for db in client.get_list_database()] if influxdb_database in databases: logging.debug(f'Found database: {influxdb_database}') except InfluxDBClientError as e: logging.critical('Error getting database list! Please check your InfluxDB configuration.\n' f'Error: {e}' ) exit(1) if influxdb_database not in databases: logging.info(f'Creating database: {influxdb_database}') client.create_database(influxdb_database) retention_policies = [policy['name'] for policy in client.get_list_retention_policies(database=influxdb_database)] if f'{influxdb_database} {influxdb_retention}-{influxdb_shard}' not in retention_policies: logging.info(f'Creating {influxdb_database} retention policy ({influxdb_retention}-{influxdb_shard})') client.create_retention_policy(name=f'{influxdb_database} {influxdb_retention}-{influxdb_shard}', duration=influxdb_retention, replication='1', database=influxdb_database, default=True, shard_duration=influxdb_shard) re_ipv4 = IPV4_NGINX_LOG_LINE re_ipv6 = IPV6_NGINX_LOG_LINE gi = Reader(geoip_db_path) if send_nginx_logs in ('true', 'True'): send_logs = True else: send_logs = False re_ipv4 = IPV4_REGEX re_ipv6 = IPV6_REGEX logging.info('SEND_NGINX_LOGS set to false') pass # if not regex_tester(log_path,3): # if send_logs: # re_ipv4 = IPV4_REGEX # re_ipv6 = IPV6_REGEX # send_logs = False # logging.warning('NGINX log metrics disabled! Double check your NGINX custom log format..') # Main loop to parse access.log file in tailf style with sending metrics. with open(log_path, 'r') as log_file: logging.info('Starting log parsing') str_results = stat(log_path) st_size = str_results[6] log_file.seek(st_size) while True: geo_metrics = [] log_metrics = [] where = log_file.tell() line = log_file.readline() inodenew = stat(log_path).st_ino if inode != inodenew: break if not line: sleep(1) log_file.seek(where) else: if re_ipv4.match(line): m = re_ipv4.match(line) ip = m.group(1) log = re_ipv4 elif re_ipv6.match(line): m = re_ipv6.match(line) ip = m.group(1) log = re_ipv6 else: logging.warning('Failed to match regex that previously matched!? Skipping this line!\n' 'If you think the regex should have mathed the line, please share the log line below on https://discord.gg/HSPa4cz or Github: https://github.com/gilbN/geoip2influx\n' f'Line: {line}' ) continue if ipadd(ip).iptype() == 'PUBLIC' and ip: info = gi.city(ip) if info is not None: geohash = encode(info.location.latitude, info.location.longitude) geohash_fields['count'] = 1 geohash_tags['geohash'] = geohash geohash_tags['ip'] = ip geohash_tags['host'] = hostname geohash_tags['country_code'] = info.country.iso_code geohash_tags['country_name'] = info.country.name geohash_tags['state'] = info.subdivisions.most_specific.name geohash_tags['state_code'] = info.subdivisions.most_specific.iso_code geohash_tags['city'] = info.city.name geohash_tags['postal_code'] = info.postal.code geohash_tags['latitude'] = info.location.latitude geohash_tags['longitude'] = info.location.longitude ips['tags'] = geohash_tags ips['fields'] = geohash_fields ips['measurement'] = geo_measurement geo_metrics.append(ips) logging.debug(f'Geo metrics: {geo_metrics}') try: client.write_points(geo_metrics) except (InfluxDBServerError, ConnectionError) as e: logging.error('Error writing data to InfluxDB! Check your database!\n' f'Error: {e}' ) if send_logs: data = search(log, line) if ipadd(ip).iptype() == 'PUBLIC' and ip: info = gi.city(ip) if info is not None: datadict = data.groupdict() log_data_fields['count'] = 1 log_data_fields['bytes_sent'] = int(datadict['bytes_sent']) log_data_fields['request_time'] = float(datadict['request_time']) if datadict['connect_time'] == '-': log_data_fields['connect_time'] = 0.0 else: log_data_fields['connect_time'] = float(datadict['connect_time']) log_data_tags['ip'] = datadict['ipaddress'] log_data_tags['datetime'] = datetime.strptime(datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z') log_data_tags['remote_user'] = datadict['remote_user'] log_data_tags['method'] = datadict['method'] log_data_tags['referrer'] = datadict['referrer'] log_data_tags['host'] = datadict['host'] log_data_tags['http_version'] = datadict['http_version'] log_data_tags['status_code'] = datadict['status_code'] log_data_tags['bytes_sent'] = datadict['bytes_sent'] log_data_tags['url'] = datadict['url'] log_data_tags['user_agent'] = datadict['user_agent'] log_data_tags['request_time'] = datadict['request_time'] log_data_tags['connect_time'] = datadict['connect_time'] log_data_tags['city'] = datadict['city'] log_data_tags['country_code'] = datadict['country_code'] log_data_tags['country_name'] = info.country.name nginx_log['tags'] = log_data_tags nginx_log['fields'] = log_data_fields nginx_log['measurement'] = log_measurement log_metrics.append(nginx_log) logging.debug(f'NGINX log metrics: {log_metrics}') try: client.write_points(log_metrics) except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e: logging.error('Error writing data to InfluxDB! Check your database!\n' f'Error: {e}' )
flatten(current[k], new_key, result) else: result[key] = current return result for message in consumer: # message value and key are raw bytes -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` # print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, # message.offset, message.key, # message.value)) data = json.loads(message.value) try: res = mmdb.city(data['dest_ip']) asn = asndb.asn(data['dest_ip']) data['dest_latitude'] = res.location.latitude data['dest_longitude'] = res.location.longitude data['dest_country_name'] = res.country.names['en'] data['dest_asn'] = asn.autonomous_system_number data['dest_autonomous_system'] = asn.autonomous_system_organization except: data['dest_latitude'] = 0.0 data['dest_longitude'] = 0.0 data['dest_country_name'] = 'unknown' data['dest_asn'] = 'unknown' data['dest_autonomous_system'] = 'unknown' try: res = mmdb.city(data['src_ip']) asn = asndb.asn(data['src_ip'])
class GeoIPReader: """Slim wrapper around GeoIP API""" def __init__(self): self.__reader: Optional[Reader] = None self.__last_mtime: float = 0.0 self.__open() def __open(self): """Get GeoIP Reader, if configured, otherwise none""" path = CONFIG.y("geoip") if path == "" or not path: return try: self.__reader = Reader(path) self.__last_mtime = stat(path).st_mtime LOGGER.info("Loaded GeoIP database", last_write=self.__last_mtime) except OSError as exc: LOGGER.warning("Failed to load GeoIP database", exc=exc) def __check_expired(self): """Check if the geoip database has been opened longer than 8 hours, and re-open it, as it will probably will have been re-downloaded""" now = time() diff = datetime.fromtimestamp(now) - datetime.fromtimestamp( self.__last_mtime) diff_hours = diff.total_seconds() // 3600 if diff_hours >= 8: LOGGER.info("GeoIP databased loaded too long, re-opening", diff=diff) self.__open() @property def enabled(self) -> bool: """Check if GeoIP is enabled""" return bool(self.__reader) def city(self, ip_address: str) -> Optional[City]: """Wrapper for Reader.city""" with Hub.current.start_span( op="authentik.events.geo.city", description=ip_address, ): if not self.enabled: return None self.__check_expired() try: return self.__reader.city(ip_address) except (GeoIP2Error, ValueError): return None def city_dict(self, ip_address: str) -> Optional[GeoIPDict]: """Wrapper for self.city that returns a dict""" city = self.city(ip_address) if not city: return None city_dict: GeoIPDict = { "continent": city.continent.code, "country": city.country.iso_code, "lat": city.location.latitude, "long": city.location.longitude, "city": "", } if city.city.name: city_dict["city"] = city.city.name return city_dict
class NginxLogParser(): POLLING_PERIOD = 3 RE_IPV4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') RE_IPV6 = compile( r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' ) # NOQA RE_LOGIPV4 = compile( r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])', IGNORECASE) # NOQA RE_LOGIPV6 = compile( r'(?P<ipaddress>(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])', IGNORECASE) # NOQA DEFAULT_LOG_PATH = '/config/log/nginx/access.log' DEFAULT_INFLUX_HOST = 'localhost' DEFAULT_INFLUX_HOST_PORT = '8086' DEFAULT_INFLUX_DATABASE = 'geoip2influx' DEFAULT_INFLUX_USER = '******' DEFAULT_INFLUX_PASS = '******' DEFAULT_INFLUX_RETENTION = '7d' DEFAULT_INFLUX_SHARD = '1d' DEFAULT_GEO_MEASUREMENT = 'geoip2influx' DEFAULT_LOG_MEASUREMENT = 'nginx_access_logs' DEFAULT_SEND_NGINX_LOGS = 'true' def __init__( self, geoip_db_path='/config/geoip2db/GeoLite2-City.mmdb', log_path=DEFAULT_LOG_PATH, influxdb_host=DEFAULT_INFLUX_HOST, influxdb_port=DEFAULT_INFLUX_HOST_PORT, influxdb_database=DEFAULT_INFLUX_DATABASE, influxdb_user=DEFAULT_INFLUX_USER, influxdb_user_pass=DEFAULT_INFLUX_PASS, influxdb_retention=DEFAULT_INFLUX_RETENTION, influxdb_shard=DEFAULT_INFLUX_SHARD, geo_measurement=DEFAULT_GEO_MEASUREMENT, log_measurement=DEFAULT_LOG_MEASUREMENT, send_nginx_logs=DEFAULT_SEND_NGINX_LOGS, ): self.geoip_db_path = geoip_db_path self.log_path = log_path self.influxdb_host = influxdb_host self.influxdb_port = influxdb_port self.influxdb_database = influxdb_database self.influxdb_user = influxdb_user self.influxdb_user_pass = influxdb_user_pass self.influxdb_retention = influxdb_retention self.influxdb_shard = influxdb_shard self.geo_measurement = geo_measurement self.log_measurement = log_measurement self.send_nginx_logs = send_nginx_logs self.geoip_db_path = geoip_db_path logging.debug('Log parser config:' + f'\n geoip_db_path :: {self.geoip_db_path}' + f'\n log_path :: {self.log_path}' + f'\n influxdb_host :: {self.influxdb_host}' + f'\n influxdb_port :: {self.influxdb_port}' + f'\n influxdb_database :: {self.influxdb_database}' + f'\n influxdb_retention :: {self.influxdb_retention}' + f'\n influxdb_shard :: {self.influxdb_shard}' + f'\n influxdb_user :: {self.influxdb_user}' + f'\n influxdb_user_pass :: {self.influxdb_user_pass}' + f'\n geo_measurement :: {self.geo_measurement}' + f'\n log_measurement :: {self.log_measurement}' + f'\n send_nginx_logs :: {self.send_nginx_logs}') self.influxdb = self.init_influxdb() self.geoip = Reader(self.geoip_db_path) self.hostname = uname()[1] @classmethod def from_env(cls): # Getting params from envs return cls( log_path=env.get('NGINX_LOG_PATH', cls.DEFAULT_LOG_PATH), influxdb_host=env.get('INFLUX_HOST', cls.DEFAULT_INFLUX_HOST), influxdb_port=env.get('INFLUX_HOST_PORT', cls.DEFAULT_INFLUX_HOST_PORT), influxdb_database=env.get('INFLUX_DATABASE', cls.DEFAULT_INFLUX_DATABASE), influxdb_user=env.get('INFLUX_USER', cls.DEFAULT_INFLUX_USER), influxdb_user_pass=env.get('INFLUX_PASS', cls.DEFAULT_INFLUX_PASS), influxdb_retention=env.get('INFLUX_RETENTION', cls.DEFAULT_INFLUX_RETENTION), influxdb_shard=env.get('INFLUX_SHARD', cls.DEFAULT_INFLUX_SHARD), geo_measurement=env.get('GEO_MEASUREMENT', cls.DEFAULT_GEO_MEASUREMENT), log_measurement=env.get('LOG_MEASUREMENT', cls.DEFAULT_LOG_MEASUREMENT), send_nginx_logs=env.get('SEND_NGINX_LOGS', cls.DEFAULT_SEND_NGINX_LOGS), ) def regex_tester(self, N=3): """Verify the regex to use on log file. Try to parse the last N lines of the log file. wait up to 1 min for a valid log. If no enriched log can be parsed, only extract the ip, which assu,e default nginx log format. """ time_out = time() + 60 while True: assert N >= 0 pos = N + 1 lines = [] with open(self.log_path) as f: while len(lines) <= N: try: f.seek(-pos, 2) except IOError: f.seek(0) break finally: lines = list(f) pos *= 2 log_lines = lines[-N:] for line in log_lines: if self.RE_IPV4.match(line): if self.RE_LOGIPV4.match(line): logging.debug( f'Regex is matching {self.log_path} continuing...') return True if self.RE_IPV6.match(line): if self.RE_LOGIPV6.match(line): logging.debug( f'Regex is matching {self.log_path} continuing...') return True else: logging.debug(f'Testing regex on: {self.log_path}') sleep(2) if time() > time_out: logging.warning(f'Failed to match regex on: {self.log_path}') break def file_exists(self): """ Verify the log file and geoip db validity.""" time_out = time() + 30 while True: file_list = [self.log_path, self.geoip_db_path] if not exists(self.log_path): logging.warning((f'File: {self.log_path} not found...')) sleep(1) if not exists(self.geoip_db_path): logging.warning((f'File: {self.geoip_db_path} not found...')) sleep(1) if all([isfile(f) for f in file_list]): for f in file_list: logging.debug(f'Found: {f}') return True if time() > time_out: if not exists(self.geoip_db_path) and not exists( self.log_path): logging.critical( f"Can't find: {self.geoip_db_path} or {self.log_path} exiting!" ) break elif not exists(self.geoip_db_path): logging.critical( f"Can't find: {self.geoip_db_path}, exiting!") break elif not exists(self.log_path): logging.critical(f"Can't find: {self.log_path}, exiting!") break def init_influxdb(self): client = InfluxDBClient( host=self.influxdb_host, port=self.influxdb_port, username=self.influxdb_user, password=self.influxdb_user_pass, database=self.influxdb_database, ) try: logging.debug('Testing InfluxDB connection') version = client.request( 'ping', expected_response_code=204).headers['X-Influxdb-Version'] logging.debug(f'Influxdb version: {version}') except ConnectionError as e: logging.critical( f'Error testing connection to InfluxDB. Please check your url/hostname.\nError: {e}' ) raise try: databases = [db['name'] for db in client.get_list_database()] if self.influxdb_database in databases: logging.debug(f'Found database: {self.influxdb_database}') except InfluxDBClientError as e: logging.critical( f'Error getting database list! Please check your InfluxDB configuration.\nError: {e}' ) raise if self.influxdb_database not in databases: logging.info(f'Creating database: {self.influxdb_database}') client.create_database(self.influxdb_database) retention_policies = [ policy['name'] for policy in client.get_list_retention_policies( database=self.influxdb_database) ] if f'{self.influxdb_database} {self.influxdb_retention}-{self.influxdb_shard}' not in retention_policies: logging.info( f'Creating {self.influxdb_database} retention policy ({self.influxdb_retention}-{self.influxdb_shard})' ) client.create_retention_policy( name= f'{self.influxdb_database} {self.influxdb_retention}-{self.influxdb_shard}', duration=self.influxdb_retention, replication='1', database=self.influxdb_database, default=True, shard_duration=self.influxdb_shard) return client def store_geo_metric(self, ip, geo_info, log_data): geo_metrics = [] geohash = encode(geo_info.location.latitude, geo_info.location.longitude) geohash_fields = {'count': 1} geohash_tags = {} geohash_tags['geohash'] = geohash geohash_tags['ip'] = ip geohash_tags['host'] = self.hostname geohash_tags['country_code'] = geo_info.country.iso_code geohash_tags['country_name'] = geo_info.country.name geohash_tags['state'] = geo_info.subdivisions.most_specific.name geohash_tags[ 'state_code'] = geo_info.subdivisions.most_specific.iso_code geohash_tags['city'] = geo_info.city.name geohash_tags['postal_code'] = geo_info.postal.code geohash_tags['latitude'] = geo_info.location.latitude geohash_tags['longitude'] = geo_info.location.longitude geo_metrics = [{ 'tags': geohash_tags, 'fields': geohash_fields, 'measurement': self.geo_measurement, }] logging.debug(f'Geo metrics: {geo_metrics}') try: self.influxdb.write_points(geo_metrics) except (InfluxDBServerError, ConnectionError) as e: logging.error( f'Error writing data to InfluxDB! Check your database!\nError: {e}' ) def store_log_metric(self, ip, geo_info, log_data): log_data_fields = { 'count': 1, 'bytes_sent': int(log_data['bytes_sent']), 'request_time': float(log_data['request_time']), } # If several connection times are provided, use the last one log_data['connect_time'] = log_data['connect_time'].split(',')[-1] if log_data['connect_time'] == '-': log_data_fields['connect_time'] = 0.0 else: log_data_fields['connect_time'] = float(log_data['connect_time']) log_data_tags = {} log_data_tags['ip'] = log_data['ipaddress'] log_data_tags['datetime'] = datetime.strptime(log_data['dateandtime'], '%d/%b/%Y:%H:%M:%S %z') log_data_tags['remote_user'] = log_data['remote_user'] log_data_tags['method'] = log_data['method'] log_data_tags['referrer'] = log_data['referrer'] log_data_tags['host'] = log_data['host'] log_data_tags['http_version'] = log_data['http_version'] log_data_tags['status_code'] = log_data['status_code'] log_data_tags['bytes_sent'] = log_data['bytes_sent'] log_data_tags['url'] = log_data['url'] log_data_tags['user_agent'] = log_data['user_agent'] log_data_tags['request_time'] = log_data['request_time'] log_data_tags['connect_time'] = log_data['connect_time'] log_data_tags['city'] = geo_info.city.name if geo_info else "-" log_data_tags[ 'country_code'] = geo_info.country.iso_code if geo_info else "-" log_data_tags[ 'country_name'] = geo_info.country.name if geo_info else "-" log_metrics = [{ 'tags': log_data_tags, 'fields': log_data_fields, 'measurement': self.log_measurement, }] logging.debug(f'NGINX log metrics: {log_metrics}') try: self.influxdb.write_points(log_metrics) except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e: logging.error( f'Error writing data to InfluxDB! Check your database!\nError: {e}' ) def logparse(self): inode = stat(self.log_path).st_ino # Determine whether to use enriched or basic log parsing send_logs = self.send_nginx_logs.lower() == 'true' if not self.regex_tester() and send_logs: send_logs = False logging.warning( 'NGINX log metrics disabled! Double check your NGINX custom log format..' ) if send_logs: re_log_ipv4 = self.RE_LOGIPV4 re_log_ipv6 = self.RE_LOGIPV6 else: re_log_ipv4 = self.RE_IPV4 re_log_ipv6 = self.RE_IPV6 # Main loop to parse access.log file in tailf style with sending metrics. with open(self.log_path, 'r') as log_file: logging.info('Starting log parsing') str_results = stat(self.log_path) st_size = str_results[6] log_file.seek(st_size) # Keep waiting for new logs while True: where = log_file.tell() line = log_file.readline() inodenew = stat(self.log_path).st_ino if inode != inodenew: # File has changed, we need to reload it, exit this parsing loop break if not line: # No new data, wait for a bit sleep(self.POLLING_PERIOD) log_file.seek(where) else: re_match = re_log_ipv4.match(line) if not re_match: re_match = re_log_ipv6.match(line) if not re_match: logging.warning( 'Failed to match regex that previously matched!? Skipping this line!\n' 'Please share the log line below on Discord or Github!\n' f'Line: {line}') continue log_data = re_match.groupdict() ip = log_data.get('ipaddress', re_match.group(1)) if ipadd(ip).iptype() == 'PUBLIC' and ip: geo_info = self.geoip.city(ip) if geo_info: self.store_geo_metric(ip, geo_info, log_data) if send_logs: self.store_log_metric(ip, geo_info, log_data)
def logparse(log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention, influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode): # Preparing variables and params ips = {} geohash_fields = {} geohash_tags = {} log_data_fields = {} log_data_tags = {} nginx_log = {} hostname = uname()[1] client = InfluxDBClient(host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database) try: logging.debug('Testing InfluxDB connection') version = client.request( 'ping', expected_response_code=204).headers['X-Influxdb-Version'] logging.debug(f'Influxdb version: {version}') except ConnectionError as e: logging.critical( 'Error testing connection to InfluxDB. Please check your url/hostname.\n' f'Error: {e}') exit(1) try: databases = [db['name'] for db in client.get_list_database()] if influxdb_database in databases: logging.debug(f'Found database: {influxdb_database}') except InfluxDBClientError as e: logging.critical( 'Error getting database list! Please check your InfluxDB configuration.\n' f'Error: {e}') exit(1) if influxdb_database not in databases: logging.info(f'Creating database: {influxdb_database}') client.create_database(influxdb_database) retention_policies = [ policy['name'] for policy in client.get_list_retention_policies( database=influxdb_database) ] if f'{influxdb_database} {influxdb_retention}-{influxdb_shard}' not in retention_policies: logging.info( f'Creating {influxdb_database} retention policy ({influxdb_retention}-{influxdb_shard})' ) client.create_retention_policy( name= f'{influxdb_database} {influxdb_retention}-{influxdb_shard}', duration=influxdb_retention, replication='1', database=influxdb_database, default=True, shard_duration=influxdb_shard) re_ipv4 = compile( r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA re_ipv6 = compile( r'(?P<ipaddress>(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA gi = Reader(geoip_db_path) if send_nginx_logs in ('true', 'True'): send_logs = True else: send_logs = False re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') re_ipv6 = compile( r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' ) # NOQA logging.info('SEND_NGINX_LOGS set to false') pass if not regex_tester(log_path, 3): if send_logs: re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') re_ipv6 = compile( r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' ) # NOQA send_logs = False logging.warning( 'NGINX log metrics disabled! Double check your NGINX custom log format..' ) # Main loop to parse access.log file in tailf style with sending metrics. with open(log_path, 'r') as log_file: logging.info('Starting log parsing') str_results = stat(log_path) st_size = str_results[6] log_file.seek(st_size) while True: geo_metrics = [] log_metrics = [] where = log_file.tell() line = log_file.readline() inodenew = stat(log_path).st_ino if inode != inodenew: break if not line: sleep(1) log_file.seek(where) else: if re_ipv4.match(line): m = re_ipv4.match(line) ip = m.group(1) log = re_ipv4 elif re_ipv6.match(line): m = re_ipv6.match(line) ip = m.group(1) log = re_ipv6 else: logging.warning( 'Failed to match regex that previously matched!? Skipping this line!\n' 'If you think the regex should have mathed the line, please share the log line below on https://discord.gg/HSPa4cz or Github: https://github.com/gilbN/geoip2influx\n' f'Line: {line}') continue ip_type = ipadd(ip).iptype() if ip_type in monitored_ip_types and ip: info = gi.city(ip) if info: geohash = encode(info.location.latitude, info.location.longitude) geohash_fields['count'] = 1 geohash_tags['geohash'] = geohash geohash_tags['ip'] = ip geohash_tags['host'] = hostname geohash_tags['country_code'] = info.country.iso_code geohash_tags['country_name'] = info.country.name geohash_tags[ 'state'] = info.subdivisions.most_specific.name if info.subdivisions.most_specific.name else "-" geohash_tags[ 'state_code'] = info.subdivisions.most_specific.iso_code if info.subdivisions.most_specific.iso_code else "-" geohash_tags[ 'city'] = info.city.name if info.city.name else "-" geohash_tags[ 'postal_code'] = info.postal.code if info.postal.code else "-" geohash_tags[ 'latitude'] = info.location.latitude if info.location.latitude else "-" geohash_tags[ 'longitude'] = info.location.longitude if info.location.longitude else "-" ips['tags'] = geohash_tags ips['fields'] = geohash_fields ips['measurement'] = geo_measurement geo_metrics.append(ips) logging.debug(f'Geo metrics: {geo_metrics}') try: client.write_points(geo_metrics) except (InfluxDBServerError, ConnectionError) as e: logging.error( 'Error writing data to InfluxDB! Check your database!\n' f'Error: {e}') else: logging.debug(f"Incorrect IP type: {ip_type}") if send_logs: data = search(log, line) if ip_type in monitored_ip_types and ip: info = gi.city(ip) if info: datadict = data.groupdict() log_data_fields['count'] = 1 log_data_fields['bytes_sent'] = int( datadict['bytes_sent']) log_data_fields['request_time'] = float( datadict['request_time']) try: log_data_fields['connect_time'] = float( datadict['connect_time'] ) if datadict['connect_time'] != '-' else 0.0 except ValueError: log_data_fields['connect_time'] = str( datadict['connect_time']) log_data_tags['ip'] = datadict['ipaddress'] log_data_tags['datetime'] = datetime.strptime( datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z') log_data_tags['remote_user'] = datadict[ 'remote_user'] log_data_tags['method'] = datadict['method'] log_data_tags['referrer'] = datadict['referrer'] log_data_tags['host'] = datadict['host'] log_data_tags['http_version'] = datadict[ 'http_version'] log_data_tags['status_code'] = datadict[ 'status_code'] log_data_tags['bytes_sent'] = datadict[ 'bytes_sent'] log_data_tags['url'] = datadict['url'] log_data_tags['user_agent'] = datadict[ 'user_agent'] log_data_tags['request_time'] = datadict[ 'request_time'] log_data_tags['connect_time'] = datadict[ 'connect_time'] log_data_tags['city'] = datadict['city'] log_data_tags['country_code'] = datadict[ 'country_code'] log_data_tags['country_name'] = info.country.name nginx_log['tags'] = log_data_tags nginx_log['fields'] = log_data_fields nginx_log['measurement'] = log_measurement log_metrics.append(nginx_log) logging.debug(f'NGINX log metrics: {log_metrics}') try: client.write_points(log_metrics) except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e: logging.error( 'Error writing data to InfluxDB! Check your database!\n' f'Error: {e}')
class GeoIP2(GeoIPOO): reader = None cache = {} def __init__(self, path_to_db): from geoip2.database import Reader try: self.reader = Reader(path_to_db) except: pass self.cache = {} def data(self, ip): try: return self.cache[ip] except KeyError: try: response = self.reader.city(ip) except: return None self.cache[ip] = response return response def country(self, ip, default=None): rsp = self.data(ip) if rsp is None: return default try: return rsp.country.iso_code except: return default def country_name(self, ip, default=None): rsp = self.data(ip) if rsp is None: return default try: return rsp.country.name except: return default def region_name(self, ip, default=None): rsp = self.data(ip) if rsp is None: return default try: return rsp.subdivisions.most_specific.name except: return default def city_name(self, ip, default=None): rsp = self.data(ip) if rsp is None: return default try: return rsp.city.name except: return default def postal_code(self, ip, default=None): rsp = self.data(ip) if rsp is None: return default try: return rsp.postal.code except: return default def latitude(self, ip, default=None): rsp = self.data(ip) if rsp is None: return default try: return rsp.location.latitude except: return default def longitude(self, ip, default=None): rsp = self.data(ip) if rsp is None: return default try: return rsp.location.longitude except: return default def close(self): self.reader.close()