Exemplo n.º 1
0
    def publish_report(self, report=None):

        # Exit as nothing to output
        if report is None:
            return

        # Generate report file for transfer
        with tempfile.NamedTemporaryFile(delete=False) as report_file:
            report_file.write(str(report).encode("utf8"))
            report_filepath = report_file.name

        # Generate destination file path
        dest_path = os.path.join(self.final_output_dir,
                                 "%s_final_report.json" % self.name)

        # Transfer report file to bucket
        options_fast = '-m -o "GSUtil:sliced_object_download_max_components=200"'
        cmd = "gsutil %s cp -r %s %s 1>/dev/null 2>&1 " % (
            options_fast, report_filepath, dest_path)
        GoogleCloudHelper.run_cmd(
            cmd,
            err_msg=
            "Could not transfer final report to the final output directory!")

        # Send report to the Pub/Sub report topic if it's known to exist
        if self.report_topic_validated:
            GoogleCloudHelper.send_pubsub_message(self.report_topic,
                                                  message=dest_path,
                                                  encode=True,
                                                  compress=True)
Exemplo n.º 2
0
    def __init__(self, name, nr_cpus, mem, disk_space, **kwargs):
        # Call super constructor
        super(Instance, self).__init__(name, nr_cpus, mem, disk_space, **kwargs)

        # Get required arguments
        self.zone               = kwargs.pop("zone")
        self.service_acct       = kwargs.pop("service_acct")
        self.disk_image         = kwargs.pop("disk_image")

        # Get optional arguments
        self.is_boot_disk_ssd   = kwargs.pop("is_boot_disk_ssd",    False)
        self.nr_local_ssd       = kwargs.pop("nr_local_ssd",        0)

        # Initialize the region of the instance
        self.region             = GoogleCloudHelper.get_region(self.zone)

        # Initialize instance random id
        self.rand_instance_id   = self.name.rsplit("-",1)[-1]

        # Indicates that instance is not resettable
        self.is_preemptible = False

        # Google instance type. Will be set at creation time based on google price scheme
        self.instance_type = None

        # Initialize the price of the run and the total cost of the run
        self.price = 0
        self.cost = 0

        # Flag for whether startup script has completed running
        self.__startup_script_complete = False
Exemplo n.º 3
0
    def create(self):

        # Begin running command to create the instance on Google Cloud
        if not self.get_status() == Processor.OFF:
            logging.error("(%s) Cannot create processor! One with that name already exits with current status: %s" % (
                self.name, self.get_status()))
            raise RuntimeError("Processor can only be created if it's 'OFF'!")

        elif self.is_locked():
            logging.error("(%s) Failed to create processor. Processor locked!" % self.name)
            raise RuntimeError("Cannot create processor while locked!")

        # Set status to indicate that commands can't be run on processor because it's busy
        logging.info("(%s) Process 'create' started!" % self.name)
        # Determine instance type and actual resource usage based on current Google prices in instance zone
        self.nr_cpus, self.mem, self.instance_type = GoogleCloudHelper.get_optimal_instance_type(self.nr_cpus,
                                                                                                 self.mem,
                                                                                                 self.zone,
                                                                                                 self.is_preemptible)

        # Determine instance price at time of creation
        self.price = GoogleCloudHelper.get_instance_price(self.nr_cpus,
                                                          self.mem,
                                                          self.disk_space,
                                                          self.instance_type,
                                                          self.zone,
                                                          self.is_preemptible,
                                                          self.is_boot_disk_ssd,
                                                          self.nr_local_ssd)
        logging.debug("(%s) Instance type is %s. Price per hour: %s cents" % (self.name, self.instance_type, self.price))

        # Generate gcloud create cmd
        cmd = self.__get_gcloud_create_cmd()

        # Try to create instance until either it's successful, we're out of retries, or the processor is locked
        self.processes["create"] = Process(cmd,
                                           cmd=cmd,
                                           stdout=sp.PIPE,
                                           stderr=sp.PIPE,
                                           shell=True,
                                           num_retries=self.default_num_cmd_retries)
        self.wait_process("create")

        # Wait for startup script to completely finish
        logging.debug("(%s) Waiting for instance startup-script completion..." % self.name)
        self.wait_until_ready()
        logging.debug("(%s) Instance startup complete! %s Now live and ready to run commands!" % (self.name, self.name))
Exemplo n.º 4
0
 def __poll_startup_script(self):
     # Return true if instance is currently available for running commands
     data = GoogleCloudHelper.describe(self.name, self.zone)
     # Check to see if "READY" has been added to instance metadata indicating startup-script has complete
     for item in data["metadata"]["items"]:
         if item["key"] == "READY":
             return True
     return False
Exemplo n.º 5
0
    def validate(self):
        # Check that final output dir begins with gs://
        if not self.final_output_dir.startswith("gs://"):
            logging.error(
                "Invalid final output directory: %s. Google bucket paths must begin with 'gs://'"
                % self.final_output_dir)
            raise IOError("Invalid final output directory!")

        # Make gs bucket if it doesn't exists already
        gs_bucket = GoogleCloudHelper.get_bucket_from_path(
            self.final_output_dir)
        if not GoogleCloudHelper.bucket_exists(gs_bucket):
            logging.info("Bucket {0} does not exists. Creating it now!".format(
                gs_bucket))
            region = GoogleCloudHelper.get_region(self.zone)
            GoogleCloudHelper.mb(gs_bucket,
                                 project=self.google_project,
                                 region=region)

        # Set the minimum disk size based on size of disk image
        disk_image = self.config["task_processor"]["disk_image"]
        disk_image_info = GoogleCloudHelper.get_disk_image_info(disk_image)
        self.MIN_DISK_SPACE = int(disk_image_info["diskSizeGb"])

        # Check to see if the reporting Pub/Sub topic exists
        if not GoogleCloudHelper.pubsub_topic_exists(self.report_topic):
            logging.error("Reporting topic '%s' was not found!" %
                          self.report_topic)
            raise IOError("Reporting topic '%s' not found!" %
                          self.report_topic)

        # Indicate that report topic exists and has been validated
        self.report_topic_validated = True
Exemplo n.º 6
0
    def __get_instance_config(self):
        # Returns complete config for a task processor
        params = {}
        inst_params = self.config["task_processor"]
        for param, value in inst_params.items():
            params[param] = value

        # Add platform-specific options
        params["zone"] = self.zone
        params["service_acct"] = self.service_acct

        # Randomize the zone within the region if specified
        if self.randomize_zone:
            region = GoogleCloudHelper.get_region(self.zone)
            params["zone"] = GoogleCloudHelper.select_random_zone(region)

        # Get instance type
        return params
Exemplo n.º 7
0
    def update_status(self):

        # Initialize the number of retries
        retries = 0

        # Get status from the cloud
        while True:

            try:
                # Obtain the instance information
                data = GoogleCloudHelper.describe(self.name, self.zone)

                # Update the external IP address
                self.external_IP = data["networkInterfaces"][0][
                    "accessConfigs"][0].get("natIP", None)

                # Set the status accordingly
                if data["status"] in ["TERMINATED", "STOPPING"]:
                    self.set_status(Processor.DESTROYING)
                    break

                elif data["status"] in ["PROVISIONING", "STAGING"]:
                    self.set_status(Processor.CREATING)
                    break

                elif data["status"] == "RUNNING":
                    self.set_status(Processor.AVAILABLE if self.
                                    ssh_ready else Processor.CREATING)
                    break

                else:
                    raise RuntimeError(
                        "Unkown Google Compute Engine instance status: %s!" %
                        data["status"])

            # If no resource found, then the processor was manually deleted by someone
            except GoogleResourceNotFound:

                # Update the external IP address
                self.external_IP = None

                # Set the status to OFF
                self.set_status(Processor.OFF)
                break

            # For any other error, retry again
            except BaseException:

                # Raise an error when retried the default number of retries
                if retries >= self.default_num_cmd_retries:
                    raise

                # Sleep for 5 seconds and retry again
                else:
                    time.sleep(5)
                    retries += 1
Exemplo n.º 8
0
    def __init__(self, name, platform_config_file, final_output_dir):
        # Call super constructor from Platform
        super(GooglePlatform, self).__init__(name, platform_config_file,
                                             final_output_dir)

        # Get google access fields from JSON file
        self.key_file = self.config["service_account_key_file"]
        self.service_acct = GoogleCloudHelper.get_field_from_key_file(
            self.key_file, field_name="client_email")
        self.google_project = GoogleCloudHelper.get_field_from_key_file(
            self.key_file, field_name="project_id")

        # Get Google compute zone from config
        self.zone = self.config["zone"]

        # Determine whether to distribute processors across zones randomly
        self.randomize_zone = self.config["randomize_zone"]

        # Obtain the reporting topic
        self.report_topic = self.config["report_topic"]
        self.report_topic_validated = False

        # Boolean for whether worker instance create by platform will be preemptible
        self.is_preemptible = self.config["task_processor"]["is_preemptible"]

        # Use authentication key file to gain access to google cloud project using Oauth2 authentication
        GoogleCloudHelper.authenticate(self.key_file)

        # Create local gcloud SSH key to be able to directly use SSH
        GoogleCloudHelper.configure_gcloud_ssh()
Exemplo n.º 9
0
    def __poll_status(self):

        if not GoogleCloudHelper.instance_exists(self.name):
            self.__startup_script_complete = False
            return Processor.OFF

        # Try to get instance status
        status = GoogleCloudHelper.get_instance_status(self.name, self.zone)
        if status in ["TERMINATED", "STOPPING"]:
            self.__startup_script_complete = False
            return Processor.DESTROYING

        elif status in ["PROVISIONING", "STAGING"]:
            self.__startup_script_complete = False
            return Processor.CREATING

        elif status == "RUNNING":
            if self.__startup_script_complete or self.__poll_startup_script():
                self.__startup_script_complete = True
                return Processor.AVAILABLE
            self.__startup_script_complete = False
            return Processor.CREATING
Exemplo n.º 10
0
    def create(self):

        if self.is_locked():
            logging.error(
                "(%s) Failed to create processor. Processor locked!" %
                self.name)
            raise RuntimeError("Cannot create processor while locked!")

        # Set status to indicate that commands can't be run on processor because it's busy
        logging.info("(%s) Process 'create' started!" % self.name)
        # Determine instance type and actual resource usage based on current Google prices in instance zone
        self.nr_cpus, self.mem, self.instance_type = GoogleCloudHelper.get_optimal_instance_type(
            self.nr_cpus, self.mem, self.zone, self.is_preemptible)

        # Determine instance price at time of creation
        self.price = GoogleCloudHelper.get_instance_price(
            self.nr_cpus, self.mem, self.disk_space, self.instance_type,
            self.zone, self.is_preemptible, self.is_boot_disk_ssd,
            self.nr_local_ssd)
        logging.debug("(%s) Instance type is %s. Price per hour: %s cents" %
                      (self.name, self.instance_type, self.price))

        # Generate gcloud create cmd
        cmd = self.__get_gcloud_create_cmd()

        # Try to create instance until either it's successful, we're out of retries, or the processor is locked
        self.processes["create"] = Process(
            cmd,
            cmd=cmd,
            stdout=sp.PIPE,
            stderr=sp.PIPE,
            shell=True,
            num_retries=self.default_num_cmd_retries)
        self.wait_process("create")

        # Wait for instance to be accessible through SSH
        logging.debug("(%s) Waiting for instance to be accessible" % self.name)
        self.wait_until_ready()
Exemplo n.º 11
0
    def __init__(self, name, nr_cpus, mem, disk_space, **kwargs):
        # Call super constructor
        super(Instance, self).__init__(name, nr_cpus, mem, disk_space,
                                       **kwargs)

        # Get required arguments
        self.zone = kwargs.pop("zone")
        self.service_acct = kwargs.pop("service_acct")
        self.disk_image = kwargs.pop("disk_image")

        # Get optional arguments
        self.is_boot_disk_ssd = kwargs.pop("is_boot_disk_ssd", False)
        self.nr_local_ssd = kwargs.pop("nr_local_ssd", 0)

        # Initialize the region of the instance
        self.region = GoogleCloudHelper.get_region(self.zone)

        # Initialize instance random id
        self.rand_instance_id = self.name.rsplit("-", 1)[-1]

        # Indicates that instance is not resettable
        self.is_preemptible = False

        # Google instance type. Will be set at creation time based on google price scheme
        self.instance_type = None

        # Initialize the price of the run and the total cost of the run
        self.price = 0
        self.cost = 0

        # Initialize the SSH status
        self.ssh_connections_increased = False
        self.ssh_ready = False

        # Number of times creation has been reset
        self.creation_resets = 0

        # API Rate limit errors count
        self.api_rate_limit_retries = 0

        # Initialize extenal IP
        self.external_IP = None
Exemplo n.º 12
0
    def clean_up(self):

        logging.info("Cleaning up Google Cloud Platform.")
        # Remove dummy files from output directory
        try:
            logging.debug("Looking for dummy files...")
            dummy_search_string = os.path.join(self.final_output_dir,
                                               "**dummy.txt")
            dummy_outputs = GoogleCloudHelper.ls(dummy_search_string)
            if len(dummy_outputs) > 0:
                cmd = "gsutil rm {0}".format(" ".join(dummy_outputs))
                proc = sp.Popen(cmd,
                                stderr=sp.PIPE,
                                stdout=sp.PIPE,
                                shell=True)
                proc.communicate()
            logging.debug("Done killing dummy files!")
        except:
            logging.warning(
                "(%s) Could not remove dummy input files on google cloud!")

        # Initiate destroy process on all the instances that haven't been destroyed
        for instance_name, instance_obj in self.processors.items():
            try:
                if instance_name not in self.dealloc_procs:
                    instance_obj.destroy(wait=False)
            except RuntimeError:
                logging.warning("(%s) Could not destroy instance!" %
                                instance_name)

        # Now wait for all destroy processes to finish
        for instance_name, instance_obj in self.processors.items():
            try:
                #if instance_obj.get_status() != Processor.OFF:
                if instance_name not in self.dealloc_procs:
                    instance_obj.wait_process("destroy")
            except RuntimeError:
                logging.warning("(%s) Unable to destroy instance!" %
                                instance_name)

        logging.info("Clean up complete!")
Exemplo n.º 13
0
import importlib

configure_import_paths()
configure_logging(3)

ss_file = "/home/alex/Desktop/projects/gap/test/test_runs/qc_report_merge/ss.json"
ss = SampleSet(ss_file)
print ss.get_paths()
print ss.get_data(data_type="qc_report", samples="Jijoye_early2")
print ss.get_data(data_type="qc_report", samples="Jijoye_early1")

exit(0)

######################### Test GCH ls command

print GoogleCloudHelper.ls("gs://derp_test/new_gap_test_1/**dummy.txt")
print GoogleCloudHelper.ls("gs://derp_test/*gasdf")

exit(0)

######################### Test GAPfile updating with modules
from System.Platform.Google import GooglePlatform
from System.Datastore import Datastore

# Test cycle checking algorithm
graph_file = "/home/alex/Desktop/projects/gap/test/test_runs/simple_success/graph.config"
graph = Graph(graph_file)

#print graph
rk_file = "/home/alex/Desktop/projects/gap/test/test_runs/simple_success/rk.config"
rk = ResourceKit(rk_file)