def _wrapper(*args, **kwargs): from . import podstatus_harvester from . import containerstatus_harvester from . import containerlog_harvester with LockSession(podstatus_harvester.get_client(), 3600 * 3) as lock_session1: with LockSession(containerstatus_harvester.get_client(), 3600 * 3) as lock_session2: with LockSession(containerlog_harvester.get_client(), 3600 * 3) as lock_session3: func(*args, **kwargs)
def clean_orphan_resources(repository): with LockSession(repository, 3600, 3000) as lock_session: all_resourceids = set() for meta in repository.resource_metadatas( throw_exception=False, current_resource=True, resource_status=ResourceConstant.ALL_RESOURCE): all_resourceids.add(meta["resource_id"]) total = len(all_resourceids) logger.info("Found {} resources".format(total)) lock_session.renew() data_path = repository.resource_data_path if data_path[-1] != "/": data_path = "{}/".format(data_path) orphan_resources = [] for resource in repository.storage.list_resources(data_path): name = resource.name[len(data_path):] if name not in all_resourceids: orphan_resources.append(resource.name) logger.info("Found {} orphan resources".format(len(orphan_resources))) for resource in orphan_resources: repository.storage.delete(resource) logger.info( "Delete orphan resource '{}' from repository".format(resource)) logger.info("Deleted {} orphan resources".format( len(orphan_resources)))
def archive(): with LockSession(get_resource_repository(), 3600, 3000) as lock_session: #archive the latest files files.archive(get_resource_repository(), folder=settings.ARCHIVE_FOLDER, recursive=True, reserve_folder=settings.RESERVE_FOLDER, archive=False, file_filter=need_archive) #clean expired deleted resources from storage files.clean_expired_deleted_resources(get_resource_repository(), DELETED_RESROURCE_EXPIRED)
def harvest(reconsume=False): with LockSession(get_resource_consume_client(), 3000) as lock_session: if not reconsume: #check whether some nginx configuration has been changed after last consuming. if get_resource_consume_client().is_behind( resources=["nginx-config.yml", "nginx.yml"]): reconsume = True else: return 0 #consume nginx config file return get_resource_consume_client().consume( process_nginx, resources=["nginx-config.yml", "nginx.yml"], reconsume=reconsume)
def clean_resources(repository, delete_resource_filter, batch=None): """ clean resources which is satisified with delete_resource_filter """ logger.info("Begin to find all deleted resources") delete_resourceids = set() with LockSession(repository, 3600, 3000) as lock_session: total_resources = 0 for meta in repository.resource_metadatas( throw_exception=False, current_resource=True, resource_status=ResourceConstant.ALL_RESOURCE): total_resources += 1 if delete_resource_filter(meta): delete_resourceids.add(meta["resource_id"]) total = len(delete_resourceids) logger.info("Found {}/{} deleted resources".format( total, total_resources)) lock_session.renew() deleted = 0 while deleted < total: with MetadataSession() as session: while deleted < total: resourceid = delete_resourceids.pop() repository.delete_resource(resourceid, permanent_delete=True) logger.info( "Permanently delete the file({}) from repository because it doesn't meet the filter condition" .format(resourceid)) deleted += 1 lock_session.renew_if_needed() if batch and deleted % batch == 0: break logger.info("Permanently delete {}/{} resources".format( deleted, total)) clean_orphan_resources(repository)
def clean_expired_deleted_resources(repository, expire_time): """ clean resources which is satisified with deleted_resource_filter """ logger.info("Begin to find all expired deleted resources") expired_resourceids = set() with LockSession(repository, 3600, 3000) as lock_session: total_resources = 0 now = timezone.now() for meta in repository.resource_metadatas( throw_exception=False, current_resource=True, resource_status=ResourceConstant.DELETED_RESOURCE): total_resources += 1 if ResourceConstant.DELETE_TIME_KEY in meta and now > meta[ ResourceConstant.DELETE_TIME_KEY] + expire_time: expired_resourceids.add(meta["resource_id"]) lock_session.renew() total = len(expired_resourceids) logger.info("Found {}/{} expired deleted resources".format( total, total_resources)) deleted = 0 with MetadataSession() as session: for resourceid in expired_resourceids: repository.delete_resource(resourceid, permanent_delete=True) logger.debug( "Permanently delete the file({}) from repository because it doesn't meet the filter condition" .format(resourceid)) deleted += 1 lock_session.renew_if_needed() logger.info("Permanently delete {}/{} resources".format( deleted, total))
def harvest(reconsume=False): try: with LockSession( get_consume_client(), settings.NGINXLOG_MAX_CONSUME_TIME_PER_LOG) as lock_session: if reconsume and get_consume_client().is_client_exist( clientid=settings.RESOURCE_CLIENTID): get_consume_client().delete_clients( clientid=settings.RESOURCE_CLIENTID) if reconsume: WebAppAccessLog.objects.all().delete() WebAppAccessDailyLog.objects.all().delete() context = {"reconsume": reconsume, "lock_session": lock_session} #apply the latest filter change first context["path_normalizers"] = list( RequestPathNormalizer.objects.filter( order__gt=0).order_by("-order")) context["path_filter"] = RequestPathNormalizer.objects.filter( order=0).first() context["parameter_filters"] = list( RequestParameterFilter.objects.all().order_by("-order")) context["path_normalizer_map"] = {} context["parameter_filter_map"] = {} """ don't apply the changed rules in the history data applied = False while not applied: context["path_normalizers"] = list(RequestPathNormalizer.objects.filter(order__gt=0).order_by("-order")) context["path_filter"] = RequestPathNormalizer.objects.filter(order=0).first() context["path_normalizer_map"] = {} context["parameter_filters"] = list(RequestParameterFilter.objects.all().order_by("-order")) context["parameter_filter_map"] = {} applied = apply_rules(context) """ #consume nginx config file result = get_consume_client().consume(process_log(context)) #populate daily log lock_session.renew() #populate daily report WebAppAccessDailyReport.populate_data(lock_session) now = timezone.localtime() if now.hour >= 0 and now.hour <= 2: obj = WebAppAccessLog.objects.all().order_by( "-log_starttime").first() if obj: last_log_datetime = timezone.localtime(obj.log_starttime) earliest_log_datetime = timezone.make_aware( datetime(last_log_datetime.year, last_log_datetime.month, last_log_datetime.day)) - timedelta( days=settings.NGINXLOG_ACCESSLOG_LIFETIME) sql = "DELETE FROM nginx_webappaccesslog where log_starttime < '{}'".format( earliest_log_datetime.strftime( "%Y-%m-%d 00:00:00 +8:00")) with connection.cursor() as cursor: logger.info( "Delete expired web app access log.last_log_datetime={}, sql = {}" .format(last_log_datetime, sql)) cursor.execute(sql) lock_session.renew() obj = WebAppAccessDailyLog.objects.all().order_by( "-log_day").first() if obj: last_log_day = obj.log_day earliest_log_day = last_log_day - timedelta( days=settings.NGINXLOG_ACCESSDAILYLOG_LIFETIME) sql = "DELETE FROM nginx_webappaccessdailylog where log_day < date('{}')".format( earliest_log_day.strftime("%Y-%m-%d")) with connection.cursor() as cursor: logger.info( "Delete expired web app access daily log.last_log_day={}, sql = {}" .format(last_log_day, sql)) cursor.execute(sql) return result except exceptions.AlreadyLocked as ex: msg = "The previous harvest process is still running.{}".format( str(ex)) logger.info(msg) return ([], [(None, None, None, msg)])
def harvest(reconsume=None,max_harvest_files=None,context={}): need_clean = [False] def _post_consume(client_consume_status,consume_result): now = timezone.localtime() if "next_clean_time" not in client_consume_status: client_consume_status["next_clean_time"] = timezone.make_aware(datetime(now.year,now.month,now.day)) + timedelta(days=1) elif now.hour > 6: return elif now >= client_consume_status["next_clean_time"]: need_clean[0] = True client_consume_status["next_clean_time"] = timezone.make_aware(datetime(now.year,now.month,now.day)) + timedelta(days=1) now = timezone.now() harvester = models.Harvester(name=harvestername,starttime=now,last_heartbeat=now,status=models.Harvester.RUNNING) harvester.save() message = None try: with LockSession(get_client(),settings.CONTAINERSTATUS_MAX_CONSUME_TIME_PER_LOG) as lock_session: try: if reconsume and get_client().is_client_exist(clientid=settings.RESOURCE_CLIENTID): get_client().delete_clients(clientid=settings.RESOURCE_CLIENTID) context["containerstatus"] = context.get("containerstatus",{}) context["containerstatus"] = { "reconsume":reconsume if reconsume is not None else context["containerstatus"].get("reconsume",False), "max_harvest_files":max_harvest_files if max_harvest_files is not None else context["containerstatus"].get("max_harvest_files",None), "lock_session":lock_session, "new_deployed_workloads":set(), "terminated_containers":set(), "containers":{}, "harvester":harvester, "harvested_files": 0 } context["resourceclients"] = context.get("resourceclients",{}) context["clusters"] = context.get("clusters",{}) context["namespaces"] = context.get("namespaces",{}) context["workloads"] = context.get("workloads",{}) #consume container status file result = get_client().consume(process_status(context),f_post_consume=_post_consume) #change the status of containers which has no status data harvested in recent half an hour if result[1]: if result[0]: message = """Failed to harvest container status, {} container status files were consumed successfully. {} {} container status files were failed to consume {}""" message = message.format( len(result[0]), "\n ".join(["Succeed to harvest container status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]), len(result[1]), "\n ".join(["Failed to harvest container status '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]]) ) else: message = """Failed to harvest container status,{} container status files were failed to consume {}""" message = message.format( len(result[1]), "\n ".join(["Failed to harvest container status file '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]]) ) elif result[0]: message = """Succeed to harvest container status, {} container status files were consumed successfully. {}""" message = message.format( len(result[0]), "\n ".join(["Succeed to harvest container status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]) ) else: message = "Succeed to harvest container status, no new container status file was added since last harvesting" harvester.status = models.Harvester.FAILED if result[1] else models.Harvester.SUCCEED try: if "last_archive_time" in context: for container in models.Container.objects.filter(status__in=("Waiting",'Running'),last_checked__lt=context["last_archive_time"] - timedelta(minutes=30)): container.status="LostHeartbeat" container.save(update_fields=["status"]) update_latest_containers(context,container) #save workload for workload,workload_update_fields in context["workloads"].values(): if workload_update_fields: workload.save(update_fields=workload_update_fields) except: harvester.status = models.Harvester.FAILED msg = "Failed to save changed Containers or Workloads.{}".format(traceback.format_exc()) logger.error(msg) message = """{} =========Consuming Results================ {}""".format(msg,message) return result except: harvester.status = models.Harvester.FAILED message = "Failed to harvest container status.{}".format(traceback.format_exc()) logger.error(message) return ([],[(None,None,None,message)]) except exceptions.AlreadyLocked as ex: harvester.status = models.Harvester.SKIPPED message = "The previous harvest process is still running.{}".format(str(ex)) logger.warning(message) return ([],[(None,None,None,message)]) finally: if need_clean[0]: try: check_aborted_containers(harvester,context) clean_expired_containers(harvester) message = """Succeed to clean expired containers. {}""".format(message) except: harvester.status = models.Harvester.FAILED msg = "Failed to clean expired containers.{}".format(traceback.format_exc()) logger.error(msg) message = """{} =========Consuming Results================ {}""".format(msg,message) harvester.message = message harvester.endtime = timezone.now() harvester.last_heartbeat = harvester.endtime harvester.save(update_fields=["endtime","message","status","last_heartbeat"])
def harvest(reconsume=None,max_harvest_files=None,context={}): need_clean = [False] def _post_consume(client_consume_status,consume_result): now = timezone.localtime() if "next_clean_time" not in client_consume_status: client_consume_status["next_clean_time"] = timezone.make_aware(datetime.datetime(now.year,now.month,now.day)) + datetime.timedelta(days=1) elif now.hour > 6: return elif now >= client_consume_status["next_clean_time"]: need_clean[0] = True client_consume_status["next_clean_time"] = timezone.make_aware(datetime.datetime(now.year,now.month,now.day)) + datetime.timedelta(days=1) now = timezone.now() harvester = models.Harvester(name=harvestername,starttime=now,last_heartbeat=now,status=models.Harvester.RUNNING) harvester.save() message = None try: with LockSession(get_client(),settings.CONTAINERLOG_MAX_CONSUME_TIME_PER_LOG) as lock_session: try: if reconsume: if get_client().is_client_exist(clientid=settings.RESOURCE_CLIENTID): get_client().delete_clients(clientid=settings.RESOURCE_CLIENTID) modeldata.clean_containerlogs() context["logstatus"] = context.get("logstatus",{}) context["logstatus"] = { "reconsume":reconsume if reconsume is not None else context["logstatus"].get("reconsume",False), "max_harvest_files":max_harvest_files if max_harvest_files is not None else context["logstatus"].get("max_harvest_files",None), "lock_session":lock_session, "containerlogs":{}, "harvester":harvester, "containers":{}, "harvested_files": 0 } context["resourceclients"] = context.get("resourceclients",{}) context["clusters"] = context.get("clusters",{}) context["workloads"] = context.get("workloads",{}) #consume container log file result = get_client().consume(process_status(context),f_post_consume=_post_consume) if result[1]: if result[0]: message = """Failed to harvest container log, {} container log files were consumed successfully. {} {} container log files were failed to consume {}""" message = message.format( len(result[0]), "\n ".join(["Succeed to harvest container log file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]), len(result[1]), "\n ".join(["Failed to harvest container log '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]]) ) else: message = """Failed to harvest container log,{} container log files were failed to consume {}""" message = message.format( len(result[1]), "\n ".join(["Failed to harvest container log file '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]]) ) elif result[0]: message = """Succeed to harvest container log, {} container log files were consumed successfully. {}""" message = message.format( len(result[0]), "\n ".join(["Succeed to harvest container log file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]) ) else: message = "Succeed to harvest container log, no new container log file was added since last harvesting" harvester.status = models.Harvester.FAILED if result[1] else models.Harvester.SUCCEED return result except: harvester.status = models.Harvester.FAILED message = "Failed to harvest container log.{}".format(traceback.format_exc()) logger.error(message) return ([],[(None,None,None,message)]) except exceptions.AlreadyLocked as ex: harvester.status = models.Harvester.SKIPPED message = "The previous harvest process is still running.{}".format(str(ex)) logger.warning(message) return ([],[(None,None,None,message)]) finally: if need_clean[0]: try: clean_expired_containerlogs(harvester) message = """Succeed to clean expired containers. =========Consuming Results================ {}""".format(message) except: harvester.status = models.Harvester.FAILED msg = "Failed to clean expired container logs.{}".format(traceback.format_exc()) logger.error(msg) message = """{} =========Consuming Results================ {}""".format(msg,message) harvester.message = message harvester.endtime = timezone.now() harvester.last_heartbeat = harvester.endtime harvester.save(update_fields=["endtime","message","status","last_heartbeat"])
def harvest(reconsume=None,max_harvest_files=None,context={}): now = timezone.now() harvester = models.Harvester(name=harvestername,starttime=now,last_heartbeat=now,status=models.Harvester.RUNNING) harvester.save() message = None try: with LockSession(get_client(),settings.PODSTATUS_MAX_CONSUME_TIME_PER_LOG) as lock_session: if reconsume and get_client().is_client_exist(clientid=settings.RESOURCE_CLIENTID): get_client().delete_clients(clientid=settings.RESOURCE_CLIENTID) context["podstatus"] = context.get("podstatus",{}) context["podstatus"].update({ "reconsume":reconsume if reconsume is not None else context["podstatus"].get("reconsume",False), "max_harvest_files":max_harvest_files if max_harvest_files is not None else context["podstatus"].get("max_harvest_files",None), "lock_session":lock_session, "removable_workloads":set(), "orphan_namespaces":set(), "harvester":harvester, "harvested_files": 0 }) context["clusters"] = context.get("clusters",{}) context["namespaces"] = context.get("namespaces",{}) context["workloads"] = context.get("workloads",{}) #consume pod status file result = get_client().consume(process_status(context)) if result[1]: if result[0]: message = """Failed to harvest pod status, {} pod status files were consumed successfully. {} {} pod status files were failed to consume {}""" message = message.format( len(result[0]), "\n ".join(["Succeed to harvest pod status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]), len(result[1]), "\n ".join(["Failed to harvest pod status '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]]) ) else: message = """Failed to harvest pod status,{} pod status files were failed to consume {}""" message = message.format( len(result[1]), "\n ".join(["Failed to harvest pod status file '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]]) ) elif result[0]: message = """Succeed to harvest pod status, {} pod status files were consumed successfully. {}""" message = message.format( len(result[0]), "\n ".join(["Succeed to harvest pod status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]) ) else: message = "Succeed to harvest pod status, no new pod status file was added since last harvesting" harvester.status = models.Harvester.FAILED if result[1] else models.Harvester.SUCCEED return result except exceptions.AlreadyLocked as ex: harvester.status = models.Harvester.SKIPPED message = "The previous harvest process is still running.{}".format(str(ex)) logger.warning(message) return ([],[(None,None,None,message)]) except: harvester.status = models.Harvester.FAILED message = "Failed to harvest pod status.{}".format(traceback.format_exc()) logger.error(message) return ([],[(None,None,None,message)]) finally: harvester.message = message harvester.endtime = timezone.now() harvester.last_heartbeat = harvester.endtime harvester.save(update_fields=["endtime","message","status","last_heartbeat"])
def sync_dependent_tree(workload_changetime=None, cluster_lock_sessions=None, rescan=False, rescan_resource=False, rescan_dependency=False): """ Sync the dependent tree if required. This function is synchronized against the rancher configuration storage cluster_lock_sessions is a list of tuple(cluster, cluster_lock_session) workload_changetime if not none, the workloads which were changed after latest_workload_changetime will be processed """ from .rancher_harvester import get_client release_lock = False try: if not cluster_lock_sessions: cluster_lock_sessions = [] release_lock = True for cluster in models.Cluster.objects.filter(added_by_log=False): cluster_lock_sessions.append( (cluster, LockSession(get_client(cluster.name), 3000, 1500))) def _renew_locks(): for cluster, lock_session in cluster_lock_sessions: lock_session.renew_if_needed() scan_time = timezone.now() scan_modules = list( models.EnvScanModule.objects.filter( active=True).order_by("-priority")) qs = models.Workload.objects.filter( cluster__in=[o[0] for o in cluster_lock_sessions]) if workload_changetime: qs = qs.filter( Q(updated__gte=workload_changetime) | Q(deleted__gte=workload_changetime)) qs = qs.order_by("cluster__name", "namespace__name", "name") wl_cache = {} dependency_cache = {} wls = [] #scan resources if required for wl in qs: logger.debug("Scan resource for workload({}<{}>)".format( wl, wl.id)) try: wl.scan_resource(rescan=rescan_resource, scan_modules=scan_modules, scan_time=scan_time) except: logger.error( "Failed to scan the resource of the workload({}).{}". format(wl, traceback.format_exc())) _renew_locks() wl_cache[wl.id] = wl wls.append(wl) #rescan dependency if required for wl in wls: logger.debug("Scan dependency for workload({}<{}>)".format( wl, wl.id)) wl.scan_dependency(rescan=rescan_dependency, f_renew_lock=_renew_locks) # repopulate the dependent tree dep_wlids = set() dep_wls = [] now = timezone.now() for wl in wls: dependency_cache.clear() update_workload_dependent_tree(wl, wl_cache=wl_cache, dependency_cache=dependency_cache, renew_locks=_renew_locks) update_resource_dependent_tree(wl, wl_cache=wl_cache, dependency_cache=dependency_cache, renew_locks=_renew_locks) finally: #release the locks if release_lock: for cluster, lock_session in cluster_lock_sessions: try: lock_session.release() except Exception as ex: logger.error("Failed to release the lock.{}".format( str(ex)))
def archive(repository, files=None, folder=None, recursive=False, file_filter=None, reserve_folder=True, archive=True, checking_policy=[FILE_MD5]): """ Archive the files or files in folder and push it to azure blob resource files: the file or list of files for archive folder: all the files in the folder will be archived recursive: only used for folder, if true, all the files in the folder and nested folder will be archived. file_filter: only used for folder, if not none, only the files which satisfy the filter will be archived reserve_folder: only used for folder, if true, the relative folder in folder will be reserved when push to repository archive: if true, each file version will be saved in repository checking_policy: the policy to check whether file is modified or not. can be single policy or list of policy """ if not files and not folder: raise Exception("Either files or folder must be specified. ") if files and folder: raise Exception("Can't set files or folder at the same time ") if not checking_policy: checking_policy = [FILE_MD5] elif not isinstance(checking_policy, (list, tuple)): checking_policy = [checking_policy] check_md5 = FILE_MD5 in checking_policy with LockSession(repository, 3600, 3000) as lock_session: with MetadataSession() as session: if files: if not isinstance(files, (tuple, list)): archive_files = [(os.path.abspath(files), os.path.split(files)[1])] else: archive_files = [(os.path.abspath(f), os.path.split(f)[1]) for f in files] #check whether file exist or not. for f, resource_id in archive_files: if os.path.exists(f): raise Exception("File {} does not exist".format(f)) elif not os.path.isfile(f): raise Exception("{} is not a file".format(f)) else: _archive_file(repository, f, resource_id, checking_policy, check_md5) lock_session.renew_if_needed() else: non_exist_resourceids = {} for meta in repository.resource_metadatas( throw_exception=False, current_resource=True, resource_status=ResourceConstant.ALL_RESOURCE): non_exist_resourceids[meta["resource_id"]] = meta.get( ResourceConstant.DELETED_KEY, False) folder = os.path.abspath(folder) folders = [folder] f_path = None resource_id = None while folders: cur_folder = folders.pop(0) for f in os.listdir(cur_folder): f_path = os.path.join(cur_folder, f) if os.path.isfile(f_path): if not file_filter or file_filter( os.path.relpath(f_path, folder)): if reserve_folder: resource_id = os.path.relpath( f_path, folder) else: resource_id = os.path.split(f_path)[1] _archive_file(repository, f_path, resource_id, checking_policy, check_md5, metadata={"folder": folder}) lock_session.renew_if_needed() if resource_id in non_exist_resourceids: del non_exist_resourceids[resource_id] else: pass #logger.debug("File({}) is filtered out by file filter,ignore".format(f_path)) elif os.path.isdir(f_path): if recursive: folders.append(f_path) else: logger.debug( "Recursive is False and {} is a sub folder,ignore" .format(f_path)) else: logger.debug( "{} is not a regular file and folder,ignore". format(f_path)) for resourceid, is_deleted in non_exist_resourceids.items(): if not file_filter or file_filter(resourceid): if not is_deleted: repository.delete_resource(resourceid, permanent_delete=False) lock_session.renew_if_needed() logger.debug( "Logically delete the file({}) from repository because it doesn't exist anymore" .format(resourceid)) else: repository.delete_resource(resourceid, permanent_delete=True) logger.debug( "Permanently delete the file({}) from repository because it doesn't meet the filter condition" .format(resourceid))