예제 #1
0
    def get_videos(self):    
        """Get a list of videos included in the paths here: given under "modelname/videos". 

        """
        configdict = self.load_config()
        ensemblesize = max(configdict["ensemble_size"],9)
        ## video list: assumed identical bc we copied all from the same.  
        vidlist = utilsparams3.ls_name(self.bucket,os.path.join(self.jobdir,"process_results","ensemble-model1-2030-01-01","videos/")) ## this is assumed to be a fixed output of dgp models. 
        return vidlist
예제 #2
0
    def copy_logs(self):
        """Copy logs generated by the first step (results/job__{bucketname}_{timestamp}/logs) into a folder (results/job__{bucketname}_{timestamp}/logs_pre_{stepname}) in the target bucket. 

        """
        ## 
        lognames = utilsparams3.ls_name(self.bucket,os.path.join(self.jobdir,"logs"))
        lognames_base = [os.path.basename(l) for l in lognames]
        ## now create keys:
        lognames_full = [os.path.join(self.jobdir,"logs_pre_{}".format(self.stepname),lb) for lb in lognames_base]
        for ln,lf in zip(lognames,lognames_full):
            s3_resource.meta.client.copy({"Bucket":self.bucket,"Key":ln},self.targetbucket,lf)
예제 #3
0
def monitor_updater(event, context):
    """
    Newest version of events monitoring that updates pre-existing logs. 

    """
    ## 1. First, find the instance id.
    ## 2. Go find the appropriate log folder in the bucket [bucket available through os. ]
    ## 3. Now figure out if this is an "running" or "shutting-down" statechange. "
    ## 4. accordingly, either update the log [running] or update the log and move it to the appropriate folder [given by the log contents.]
    ## Include exception handling for the case where one of the fields is not completed.
    time = event['time']
    instanceid = event['detail']['instance-id']
    logname = "{}.json".format(instanceid)
    statechange = event['detail']['state']
    bucket_name = os.environ["BUCKET_NAME"]

    if statechange in ["running", "shutting-down"]:
        print(logname)
        print(utilsparams3.ls_name(bucket_name, "logs/active/"))
        log = utilsparams3.update_monitorlog(bucket_name, logname, statechange,
                                             time)
        path_to_data = log["datapath"]
        jobname = os.path.basename(log["jobpath"]).replace(
            ":", "_")  ## Monitoring names cannot have
        groupname = re.findall('.+?(?=/' + os.environ["INDIR"] + ')',
                               path_to_data)[0]
        ## Log name for the group that make this job:
        current_job_log = os.path.join("logs", "active", logname)
        completed_job_log = os.path.join("logs", groupname, logname)
        if statechange == "shutting-down":
            utilsparams3.mv(bucket_name, current_job_log, completed_job_log)
            timepkg.sleep(5)
            ## Now check if we can delete this rule:
            rulename = "Monitor{}".format(jobname)
            instances_under_rule = utilsparamevents.get_monitored_instances(
                rulename)
            condition = [
                utilsparams3.exists(
                    bucket_name,
                    os.path.join("logs", "active", "{}.json".format(inst)))
                for inst in instances_under_rule
            ]
            ## Delete the rule
            if not any(condition):
                ## get the target:
                response = utilsparamevents.full_delete_rule(rulename)

            else:
                pass

    else:
        print("unhandled state change. quitting")
        raise ValueError("statechange {} not expected".format(statechange))
예제 #4
0
    def get_costmonitoring(self):
        """
        Gets the cost incurred by a given group so far by looking at the logs bucket of the appropriate s3 folder.  
         
        """
        ## first get the path to the log folder we should be looking at.
        group_name = self.path
        assert len(group_name) > 0
        "group_name must exist."
        logfolder_path = "logs/{}/".format(group_name)
        full_reportpath = os.path.join(logfolder_path, "i-")
        ## now get all of the computereport filenames:
        all_files = utilsparams3.ls_name(self.bucket_name, full_reportpath)

        ## for each, we extract the contents:
        jobdata = {}
        cost = 0
        ## now calculate the cost:
        for jobfile in all_files:
            instanceid = jobfile.split(full_reportpath)[1].split(".json")[0]
            jobdata = utilsparams3.load_json(self.bucket_name, jobfile)
            price = jobdata["price"]
            start = jobdata["start"]
            end = jobdata["end"]
            try:
                starttime = datetime.strptime(start, "%Y-%m-%dT%H:%M:%SZ")
                endtime = datetime.strptime(end, "%Y-%m-%dT%H:%M:%SZ")
                diff = endtime - starttime
                duration = abs(diff.seconds)
                cost = price * duration / 3600.
            except TypeError:
                ## In rare cases it seems one or the other of these things don't actually have entries. This is a problem. for now, charge for the hour:
                cost = price
            cost += cost

        ## Now compare with budget:
        budget = float(os.environ["MAXCOST"])

        if cost < budget:
            message = "Incurred cost so far: ${}. Remaining budget: ${}".format(
                cost, budget - cost)
            self.logger.append(message)
            self.logger.write()
            validjob = True
        elif cost >= budget:
            message = "Incurred cost so far: ${}. Over budget (${}), cancelling job. Contact administrator.".format(
                cost, budget)
            self.logger.append(message)
            self.logger.write()
            validjob = False
        return validjob
예제 #5
0
    def get_costmonitoring(self):
        """
        Gets the cost incurred by a given group so far by looking at the logs bucket of the appropriate s3 folder.  
         
        """
        ## first get the path to the log folder we should be looking at.
        group_name = self.path
        assert len(group_name) > 0
        "[JOB TERMINATE REASON] Can't locate the group that triggered analysis, making it impossible to determine incurred cost."
        logfolder_path = "logs/{}/".format(group_name)
        full_reportpath = os.path.join(logfolder_path, "i-")
        ## now get all of the computereport filenames:
        all_files = utilsparams3.ls_name(self.bucket_name, full_reportpath)

        ## for each, we extract the contents:
        jobdata = {}
        cost = 0
        ## now calculate the cost:
        for jobfile in all_files:
            instanceid = jobfile.split(full_reportpath)[1].split(".json")[0]
            jobdata = utilsparams3.load_json(self.bucket_name, jobfile)
            price = jobdata["price"]
            start = jobdata["start"]
            end = jobdata["end"]
            try:
                starttime = datetime.strptime(start, "%Y-%m-%dT%H:%M:%SZ")
                endtime = datetime.strptime(end, "%Y-%m-%dT%H:%M:%SZ")
                diff = endtime - starttime
                duration = abs(diff.seconds)
                instcost = price * duration / 3600.
            except TypeError:
                ## In rare cases it seems one or the other of these things don't actually have entries. This is a problem. for now, charge for the hour:
                message = "        [Internal (get_costmonitoring)] Duration of past jobs not found. Pricing for an hour"
                self.logger.append(message)
                self.logger.printlatest()
                instcost = price
            cost += instcost

        ## Now compare against the cost of the job you're currently running:
        ## need duration from config (self.parse_config), self.instance_type, and self.nb_instances
        ## By assuming they're all standard instances we upper bound the cost.
        try:
            price = utilsparampricing.get_price(
                utilsparampricing.get_region_name(utilsparampricing.region_id),
                self.instance_type,
                os="Linux")
            nb_instances = len(self.filenames)
            if self.jobduration is None:
                duration = defaultduration / 60  ## in hours.
            else:
                duration = self.jobduration / 60
            jobpricebound = duration * price * nb_instances
            cost += jobpricebound
        except Exception as e:
            print(e)
            raise Exception(
                "        [Internal (get_costmonitoring)] Unexpected Error: Unable to estimate cost of current job."
            )

        ## Now compare agains the expected cost of instances with the current ami:
        try:
            ami = os.environ["AMI"]
            total_activeprice = self.prices_active_instances_ami(ami)

        except Exception as e:
            print(e)
            try:
                activeprice = utilsparampricing.get_price(
                    utilsparampricing.get_region_name(
                        utilsparampricing.region_id),
                    self.instance_type,
                    os="Linux")
                number = len(
                    [i for i in utilsparamec2.get_active_instances_ami(ami)])
                activeduration = defaultduration * number / 60  ## default to the default duration instead if not given.
                total_activeprice = activeprice * activeduration
            except Exception as e:
                print(e)
                raise Exception(
                    "        [Internal (get_costmonitoring)] Unexpected Error: Unable to estimate cost of active jobs."
                )

        cost += total_activeprice

        ## Now compare with budget:
        try:
            budget = float(
                utilsparamssm.get_budget_parameter(self.path,
                                                   self.bucket_name))
        except ClientError as e:
            try:
                assert e.response["Error"]["Code"] == "ParameterNotFound"
                budget = float(os.environ["MAXCOST"])
                message = "        [Internal (get_costmonitoring)] Customized budget not found. Using default budget value of {}".format(
                    budget)
                self.logger.append(message)
                self.logger.printlatest()
            except:
                raise Exception(
                    "        [Internal (get_costmonitoring)] Unexpected Error: Unable to get budget."
                )
        except Exception:
            raise Exception(
                "        [Internal (get_costmonitoring)] Unexpected Error: Unable to get budget."
            )

        if cost < budget:
            message = "        [Internal (get_costmonitoring)] Projected total costs: ${}. Remaining budget: ${}".format(
                cost, budget - cost)
            self.logger.append(message)
            self.logger.printlatest()
            self.logger.write()
            validjob = True
        elif cost >= budget:
            message = "        [Internal (get_costmonitoring)] Projected total costs: ${}. Over budget (${}), cancelling job. Contact administrator.".format(
                cost, budget)
            self.logger.append(message)
            self.logger.printlatest()
            self.logger.write()
            validjob = False
        return validjob