def test_databricks_workers_graph_multiple(self): databricks_graph = DatabricksWorkersGraph() with open(self.db_usage_multiple_file, "r") as f: valid_ouput = f.read() self.assertEqual(databricks_graph.create(usage_list=self.db_usage_multiple), valid_ouput)
def test_databricks_workers_graph_history(self): databricks_graph = DatabricksWorkersGraph() with open(self.db_history_json_file) as j: history_list = json.load(j) with open(self.db_usage_multiple_file, "r") as f: valid_ouput = f.read() self.assertEqual(databricks_graph.create(usage_list=self.db_usage_for_history, history_list=history_list), valid_ouput)
def main(save_bucket, log_level=logging.INFO): logging.basicConfig(level=log_level, format='%(asctime)s %(name)s %(levelname)s %(message)s', handlers=[logging.StreamHandler()]) logging.info('STARTED: databricks-workers') logging.debug("bucket: %s", save_bucket) databricks_username = os.environ.get("DATABRICKS_USERNAME", None) databricks_password = os.environ.get("DATABRICKS_PASSWORD", None) if databricks_username is None or databricks_password is None: logging.info("Missing databricks_username, databricks_password") return None else: logging.debug("databricks_username: %s", databricks_username) logging.debug("databricks_password: %s", databricks_password[:3]) logging.info("Using AWS storage") storage = S3() # Construct the upload_directory based on the year and week of the year upload_directory = "databricks/workers/%s/%s" % (datetime.now().strftime("%Y"), datetime.now().strftime("%W")) logging.info("Upload directory: %s", upload_directory) databricks_usage = DatabricksWorkersUsage(databricks_username, databricks_password) try: logging.info("Connecting to Databricks API") databricks_workers = databricks_usage.get() logging.debug("databricks_workers: %s", databricks_workers) except requests.exceptions.ConnectionError: logging.info("Unable to connect to Databricks API") return False # Download the databricks workers history from the storage downloaded_history = storage.download(save_bucket, upload_directory + "/history.json") logging.debug("downloaded_history: %s", downloaded_history) if downloaded_history is None: # Upload directory listing index.html logging.info("Uploading indexes to AWS: %s / %s", save_bucket, upload_directory) storage.upload_index(save_bucket, upload_directory) # Upload graph index.html index_html = resource_string("usagereports", "html/graph/index.html") storage.upload(save_bucket, "%s/index.html" % upload_directory, index_html) # Upload graph-data.js databricks_graph = DatabricksWorkersGraph() storage.upload(save_bucket, "%s/graph-data.js" % upload_directory, databricks_graph.create(usage_list=databricks_workers)) # Upload history.json history_list = transform_history_dict(databricks_workers) history_json = json.dumps(history_list, ensure_ascii=True, sort_keys=True, indent=4, separators=(',', ': ')) storage.upload(save_bucket, "%s/history.json" % upload_directory, history_json) else: history_dict = json.loads(downloaded_history) # Upload graph-data.js databricks_graph = DatabricksWorkersGraph() storage.upload(save_bucket, "%s/graph-data.js" % upload_directory, databricks_graph.create(usage_list=databricks_workers, history_list=history_dict)) # Upload history.json history_dict.extend(databricks_workers) logging.debug("history_dict: %s", history_dict) history_list = transform_history_dict(history_dict) history_json = json.dumps(history_list, ensure_ascii=True, sort_keys=True, indent=4, separators=(',', ': ')) storage.upload(save_bucket, "%s/history.json" % upload_directory, history_json) logging.info('FINISHED: databricks-workers')