Exemplo n.º 1
0
    def __init__(self, bucket_name, key, time):
        ## Initialize as before:
        # Get Upload Location Information
        self.bucket_name = bucket_name
        ## Get directory above the input directory.
        self.path = re.findall('.+?(?=/' + os.environ["SUBMITDIR"] + ')',
                               key)[0]
        ## Now add in the time parameter:
        self.time = time
        ## We will index by the submit file name prefix if it exists:
        submit_search = re.findall('.+?(?=/submit.json)',
                                   os.path.basename(key))
        try:
            submit_name = submit_search[0]
        except IndexError as e:
            ## If the filename is just "submit.json, we just don't append anything to the job name. "
            submit_name = ""

        #### Parse submit file
        submit_file = utilsparams3.load_json(bucket_name, key)

        ## Machine formatted fields (error only available in lambda)
        ## These next three fields check that the submit file is correctly formatted
        try:
            self.timestamp = submit_file["timestamp"]
            ## KEY: Now set up logging in the input folder too:
        except KeyError as ke:
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing timestamp when data was uploaded.")

        ## Initialize s3 directory for this job.
        self.jobname = "job_{}_{}_{}".format(submit_name, bucket_name,
                                             self.timestamp)
        jobpath = os.path.join(self.path, os.environ['OUTDIR'], self.jobname)
        self.jobpath = jobpath
        ## And create a corresponding directory in the submit area.
        create_jobdir = utilsparams3.mkdir(
            self.bucket_name, os.path.join(self.path, os.environ['OUTDIR']),
            self.jobname)

        ## Create a logging object and write to it.
        ## a logger for the submit area.
        self.logger = utilsparams3.JobLogger_demo(self.bucket_name,
                                                  self.jobpath)
        self.logger.append("Unique analysis version id: {}".format(
            os.environ['versionid'].split("\n")[0]))
        self.logger.append("Initializing analysis.")
        self.logger.write()
        ########################
        ## Now parse the rest of the file.
        try:
            self.instance_type = submit_file[
                'instance_type']  # TODO default option from config
        except KeyError as ke:
            msg = "Using default instance type {} from config file".format(
                os.environ["INSTANCE_TYPE"])
            self.instance_type = os.environ["INSTANCE_TYPE"]
            # Log this message
            self.logger.append(msg)
            self.logger.write()

        ## Check that we have a dataname field:
        submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data."
        try:
            self.data_name = submit_file[
                'dataname']  # TODO validate extensions
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing data name to analyze")

        try:
            self.config_name = submit_file["configname"]
            self.logger.assign_config(self.config_name)
        except KeyError as ke:
            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError(os.environ["MISSING_CONFIG_ERROR"])

        self.logger.append(
            "Analysis request detected with dataset(s): {}, config file {}. Reading analysis blueprint."
            .format(self.data_name, self.config_name))

        self.logger.write()
        ##########################
        ## Check for the existence of the corresponding data and config in s3.
        ## Check that we have the actual data in the bucket.
        exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}"
        if type(self.data_name) is str:
            check_data_exists = utilsparams3.exists(self.bucket_name,
                                                    self.data_name)
        elif type(self.data_name) is list:
            check_data_exists = all([
                utilsparams3.exists(self.bucket_name, name)
                for name in self.data_name
            ])
        else:
            raise TypeError("dataname should be string or list.")

        if not check_data_exists:
            msg = exists_errmsg.format(self.data_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("dataname given does not exist in bucket.")
        elif not utilsparams3.exists(self.bucket_name, self.config_name):
            msg = exists_errmsg.format(self.config_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("configname given does not exist in bucket.")
        ###########################

        ## Now get the actual paths to relevant data from the foldername:
        if type(self.data_name) is str:
            self.filenames = utilsparams3.extract_files(self.bucket_name,
                                                        self.data_name,
                                                        ext=None)
        elif type(self.data_name) is list:
            self.filenames = self.data_name
        assert len(self.filenames) > 0, "we must have data to analyze."
Exemplo n.º 2
0
    def __init__(self,bucket_name,key,time):
        ## Initialize as before:
        # Get Upload Location Information
        self.bucket_name = bucket_name
        ## Get directory above the input directory. 
        self.path = re.findall('.+?(?=/'+os.environ["INDIR"]+')',key)[0] 
        ## Now add in the time parameter: 
        self.time = time
        ## We will index by the submit file name prefix if it exists: 
        submit_search = re.findall('.+?(?=/submit.json)',os.path.basename(key))
        try:
            submit_name = submit_search[0]
        except IndexError as e:
            ## If the filename is just "submit.json, we just don't append anything to the job name. "
            submit_name = ""
            
        ## Now we're going to get the path to the results directory: 
        self.jobname = "job"+submit_name+self.time
        jobpath = os.path.join(self.path,os.environ['OUTDIR'],self.jobname)
        self.jobpath = jobpath
        create_jobdir  = utilsparams3.mkdir(self.bucket_name, os.path.join(self.path,os.environ['OUTDIR']),self.jobname)
        
        print(self.path,'path')
        self.logger = utilsparams3.JobLogger(self.bucket_name, self.jobpath)
        #self.out_path = utilsparams3.mkdir(self.bucket_name, self.path, config.OUTDIR)
        #self.in_path = utilsparams3.mkdir(self.bucket_name, self.path, config.INDIR)

        # Load Content Of Submit File 
        submit_file = utilsparams3.load_json(bucket_name, key)
        ## Check what instance we should use. 
        try:
            self.instance_type = submit_file['instance_type'] # TODO default option from config
        except KeyError as ke: 
            msg = "Instance type {} does not exist, using default from config file".format(ke)
            self.instance_type = os.environ["INSTANCE_TYPE"]
            ## Log this message.
            self.logger.append(msg)
            self.logger.write()

        ## These next two check that the submit file is correctly formatted
        ## Check that we have a dataname field:
        submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data."
        try: 
            self.data_name = submit_file['dataname'] # TODO validate extensions 
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.  
            raise ValueError("Missing data name to analyze")

        try:
            self.config_name = submit_file["configname"] 
            self.logger.assign_config(self.config_name)
        except KeyError as ke:
            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.  
            raise ValueError(os.environ["MISSING_CONFIG_ERROR"])

        ## Check that we have the actual data in the bucket.  
        exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}"
        if not utilsparams3.exists(self.bucket_name,self.data_name): 
            msg = exists_errmsg.format(self.data_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("dataname given does not exist in bucket.")
        elif not utilsparams3.exists(self.bucket_name,self.config_name): 
            msg = exists_errmsg.format(self.config_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("configname given does not exist in bucket.")
        ###########################

        ## Now get the actual paths to relevant data from the foldername: 

        self.filenames = utilsparams3.extract_files(self.bucket_name,self.data_name,ext = None) 
        assert len(self.filenames) > 0, "we must have data to analyze."
Exemplo n.º 3
0
    def __init__(self, bucket_name, key, time):
        #### Declare basic parameters:
        # Get Upload Location Information
        self.bucket_name = bucket_name

        ## Important paths:
        ## Get directory above the input directory where the job was submitted.
        self.path = re.findall('.+?(?=/' + os.environ["INDIR"] + ')', key)[0]
        ## The other important directory is the actual base directory of the input bucket itself.

        ## Now add in the time parameter:
        self.time = time

        #### Set up basic logging so we can get a trace when errors happen.
        ## We will index by the submit file name prefix if it exists:
        submit_search = re.findall('.+?(?=/submit.json)',
                                   os.path.basename(key))
        try:
            submit_name = submit_search[0]
        except IndexError as e:
            ## If the filename is just "submit.json, we just don't append anything to the job name. "
            submit_name = ""
        #### Parse submit file
        submit_file = utilsparams3.load_json(bucket_name, key)

        ## These next three fields check that the submit file is correctly formatted
        try:
            self.timestamp = submit_file["timestamp"]
            ## KEY: Now set up logging in the input folder too:
        except KeyError as ke:
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing timestamp when data was uploaded.")

        ## Now we're going to get the path to the results directory in the submit folder:
        self.jobname = "job_{}_{}_{}".format(submit_name, bucket_name,
                                             self.timestamp)
        jobpath = os.path.join(self.path, os.environ['OUTDIR'], self.jobname)
        self.jobpath_submit = jobpath
        ## And create a corresponding directory in the submit area.
        create_jobdir = utilsparams3.mkdir(
            self.bucket_name, os.path.join(self.path, os.environ['OUTDIR']),
            self.jobname)
        ## a logger for the submit area.
        self.logger = utilsparams3.JobLogger_demo(self.bucket_name,
                                                  self.jobpath)
        self.logger.append(
            "Initializing EPI analysis: Parameter search for 2D LDS.")
        self.logger.write()

        try:
            self.instance_type = submit_file[
                'instance_type']  # TODO default option from config
        except KeyError as ke:
            msg = "Using default instance type {} from config file".format(
                os.environ["INSTANCE_TYPE"])
            self.instance_type = os.environ["INSTANCE_TYPE"]

        ## Check that we have a dataname field:
        submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data."
        try:
            self.input_bucket_name = submit_file["bucketname"]
            ## KEY: Now set up logging in the input folder too:
            self.inputlogger = utilsparams3.JobLogger(
                self.input_bucket_name,
                os.path.join(os.environ['OUTDIR'], self.jobname)
            )  ##TODO: this relies upon "OUTDIR" being the same in the submit and input buckets. Make sure to alter this later.
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.submitlogger.append(submit_errmsg.format(ke))
            self.submitlogger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing bucket name where data is located.")

        try:
            self.data_name = submit_file[
                'dataname']  # TODO validate extensions
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.submitlogger.append(submit_errmsg.format(ke))
            self.submitlogger.write()
            self.inputlogger.append(submit_errmsg.format(ke))
            self.inputlogger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing data name to analyze")

        try:
            self.config_name = submit_file["configname"]
            self.submitlogger.assign_config(self.config_name)
        except KeyError as ke:
            print(submit_errmsg.format(ke))
            ## Write to logger
            self.submitlogger.append(submit_errmsg.format(ke))
            self.submitlogger.write()
            self.inputlogger.append(submit_errmsg.format(ke))
            self.inputlogger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError(os.environ["MISSING_CONFIG_ERROR"])

        ## Check that we have the actual data in the bucket.
        exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}"
        if not utilsparams3.exists(self.input_bucket_name, self.data_name):
            msg = exists_errmsg.format(self.data_name)
            self.submitlogger.append(msg)
            self.submitlogger.write()
            self.inputlogger.append(msg)
            self.inputlogger.write()
            raise ValueError("dataname given does not exist in bucket.")
        elif not utilsparams3.exists(self.input_bucket_name, self.config_name):
            msg = exists_errmsg.format(self.config_name)
            self.submitlogger.append(msg)
            self.submitlogger.write()
            self.inputlogger.append(msg)
            self.inputlogger.write()
            raise ValueError("configname given does not exist in bucket.")

        ## Check what instance we should use.
        try:
            self.instance_type = submit_file['instance_type']
        except KeyError as ke:
            msg = "Instance type {} does not exist, using default from config file".format(
                ke)
            self.instance_type = os.environ["INSTANCE_TYPE"]
            ## Log this message.
            self.submitlogger.append(msg)
            self.submitlogger.write()
            self.inputlogger.append(msg)
            self.inputlogger.write()
        ###########################

        ## Now get the actual paths to relevant data from the foldername:

        self.filenames = utilsparams3.extract_files(self.input_bucket_name,
                                                    self.data_name,
                                                    ext=None)
        assert len(self.filenames) > 0, "we must have data to analyze."
Exemplo n.º 4
0
    def __init__(self,bucket_name,key,time):
        ## Initialize as before:
        # Get Upload Location Information
        self.bucket_name = bucket_name
        ## Get directory above the input directory. 
        self.path = re.findall('.+?(?=/'+os.environ["SUBMITDIR"]+')',key)[0] 
        ## Now add in the time parameter: 
        self.time = time
        ## We will index by the submit file name prefix if it exists: 
        submit_search = re.findall('.+?(?=/submit.json)',os.path.basename(key))
        try:
            submit_name = submit_search[0]
        except IndexError as e:
            ## If the filename is just "submit.json, we just don't append anything to the job name. "
            submit_name = ""

        try:
            #### Parse submit file 
            submit_file = utilsparams3.load_json(bucket_name, key)
        except ClientError as e:
            print(e.response["Error"])
            raise ClientError("[JOB TERMINATE REASON] 'json not loaded.'")
        
        ## Machine formatted fields (error only available in lambda) 
        ## These next three fields check that the submit file is correctly formatted
        try: 
            self.timestamp = submit_file["timestamp"]
            ## KEY: Now set up logging in the input folder too: 
        except KeyError as ke:
            ## Now raise an exception to halt processing, because this is a catastrophic error.  
            raise ValueError("[JOB TERMINATE REASON] 'timestamp' field not given in submit.json file.")

        ## Initialize s3 directory for this job. 
        self.jobname = "job_{}_{}_{}".format(submit_name,bucket_name,self.timestamp)
        jobpath = os.path.join(self.path,os.environ['OUTDIR'],self.jobname)
        self.jobpath = jobpath
        try:
            ## And create a corresponding directory in the submit area. 
            create_jobdir  = utilsparams3.mkdir(self.bucket_name, os.path.join(self.path,os.environ['OUTDIR']),self.jobname)

            ## Create a logging object and write to it. 
            ## a logger for the submit area.  
            self.logger = utilsparams3.JobLogger_demo(self.bucket_name, self.jobpath)
            msg = "REQUEST START TIME: {} (GMT)".format(str(self.logger.basetime)[:-4])
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()
            msg = "ANALYSIS VERSION ID: {}".format(os.environ['versionid'].split("\n")[0])
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()
            msg = "JOB ID: {}".format(self.timestamp)
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()
            self.logger._logs.append("\n ")
            msg = "[Job Manager] Detected new job: starting up."
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()
            msg = "        [Internal (init)] Initializing job manager."
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()
            ########################
            ## Now parse the rest of the file. 
            print("finished logging setup.")
        except ClientError as e:
            print("error with logging:", e.response["Error"])
        try:
            self.instance_type = submit_file['instance_type'] # TODO default option from config
        except KeyError as ke: 
            msg = "        [Internal (init)] Using default instance type {} from config file.".format(os.environ["INSTANCE_TYPE"])
            self.instance_type = os.environ["INSTANCE_TYPE"]
            # Log this message 
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()

        ## Check that we have a dataname field:
        submit_errmsg = "        [Internal (init)] INPUT ERROR: Submit file does not contain field {}, needed to analyze data."
        try: 
            self.data_name = submit_file['dataname'] # TODO validate extensions 
        except KeyError as ke:

            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.printlatest()
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.  
            raise ValueError("[JOB TERMINATE REASON] 'dataname' field not given in submit.json file")

        try:
            self.config_name = submit_file["configname"] 
            self.logger.assign_config(self.config_name)
        except KeyError as ke:
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.printlatest()
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.  
            raise ValueError("[JOB TERMINATE REASON] 'configname' field not given in submit.json file")

        msg = "        [Internal (init)] Analysis request with dataset(s): {}, config file {}".format(self.data_name,self.config_name)
        self.logger.append(msg)
        self.logger.printlatest()
        self.logger.write()