def download_all_logs(ip, port):

    # default
    log_location = h2o.download_all_logs()
    assert os.path.exists(
        log_location
    ), "Expected h2o logs to be saved in {0}, but they weren't".format(
        log_location)
    os.remove(log_location)

    # dirname and filename
    log_location = h2o.download_all_logs(".", "h2o_logs.txt")
    assert os.path.exists(
        log_location
    ), "Expected h2o logs to be saved in {0}, but they weren't".format(
        log_location)
    os.remove(log_location)

    # dirname
    log_location = h2o.download_all_logs(dirname=".")
    assert os.path.exists(
        log_location
    ), "Expected h2o logs to be saved in {0}, but they weren't".format(
        log_location)
    os.remove(log_location)

    # filename
    log_location = h2o.download_all_logs(filename="h2o_logs.txt")
    assert os.path.exists(
        log_location
    ), "Expected h2o logs to be saved in {0}, but they weren't".format(
        log_location)
    os.remove(log_location)
示例#2
0
def download_logs():
    results_dir = pyunit_utils.locate("results")

    logs_path = h2o.download_all_logs()
    assert os.path.exists(logs_path)

    logs_path_explicit = h2o.download_all_logs(dirname=results_dir,
                                               filename="logs.zip")
    assert logs_path_explicit == os.path.join(results_dir, "logs.zip")
    assert os.path.exists(logs_path_explicit)
示例#3
0
def save_artifacts(automl, dataset, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        lb = automl.leaderboard.as_data_frame()
        log.debug("Leaderboard:\n%s", lb.to_string())
        if 'leaderboard' in artifacts:
            models_dir = output_subdir("models", config)
            write_csv(lb, os.path.join(models_dir, "leaderboard.csv"))
        if 'models' in artifacts:
            models_dir = output_subdir("models", config)
            all_models_se = next(
                (mid for mid in lb['model_id']
                 if mid.startswith("StackedEnsemble_AllModels")), None)
            mformat = 'mojo' if 'mojos' in artifacts else 'json'
            if all_models_se and mformat == 'mojo':
                save_model(all_models_se, dest_dir=models_dir, mformat=mformat)
            else:
                for mid in lb['model_id']:
                    save_model(mid, dest_dir=models_dir, mformat=mformat)
                models_archive = os.path.join(models_dir, "models.zip")
                zip_path(models_dir, models_archive)

                def delete(path, isdir):
                    if path != models_archive and os.path.splitext(
                            path)[1] in ['.json', '.zip']:
                        os.remove(path)

                walk_apply(models_dir, delete, max_depth=0)

        if 'models_predictions' in artifacts:
            predictions_dir = output_subdir("predictions", config)
            test = h2o.get_frame(frame_name('test', config))
            for mid in lb['model_id']:
                model = h2o.get_model(mid)
                save_predictions(model,
                                 test,
                                 dataset=dataset,
                                 config=config,
                                 predictions_file=os.path.join(
                                     predictions_dir, mid, 'predictions.csv'),
                                 preview=False)
            zip_path(predictions_dir,
                     os.path.join(predictions_dir, "models_predictions.zip"))

            def delete(path, isdir):
                if isdir:
                    shutil.rmtree(path, ignore_errors=True)

            walk_apply(predictions_dir, delete, max_depth=0)

        if 'logs' in artifacts:
            logs_dir = output_subdir("logs", config)
            h2o.download_all_logs(dirname=logs_dir)
    except Exception:
        log.debug("Error when saving artifacts.", exc_info=True)
def hadoop_download_logs():


    # Check if we are running inside the H2O network by seeing if we can touch
    # the namenode.
    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible()

    if hadoop_namenode_is_accessible:
        results_dir = pyunit_utils.locate("results")
        zip_file = h2o.download_all_logs()
        extracted_dir = os.path.abspath("extracted")
        print("Logs extracted into: " + extracted_dir)
        if os.path.isdir(extracted_dir):
            shutil.rmtree(extracted_dir)
        zip_ref = zipfile.ZipFile(zip_file, 'r')
        zip_ref.extractall(extracted_dir)
        zip_ref.close()
        nodes_log_dir = extracted_dir + "/" + os.listdir(extracted_dir)[0]
        nodes_log_file_names = os.listdir(nodes_log_dir)

        for f in expected_log_files(nodes_log_dir, nodes_log_file_names, "INFO"):
            print("Checking if file " + f + " exists")
            # check that all expected files exist
            assert os.path.isfile(f) == True
    else:
        raise EnvironmentError
def h2odownload_all_logs():
    """
    Python API test: h2o.download_all_logs(dirname=u'.', filename=None)
    """
    training_data = h2o.import_file(
        pyunit_utils.locate("smalldata/logreg/benign.csv"))
    Y = 3
    X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]

    model = H2OGeneralizedLinearEstimator(family="binomial",
                                          alpha=0,
                                          Lambda=1e-5)
    model.train(x=X, y=Y, training_frame=training_data)
    try:
        results_dir = pyunit_utils.locate(
            "results")  # find directory path to results folder
        filename = "logs.csv"
        dir_path = h2o.download_all_logs(results_dir,
                                         filename)  # save logs csv
        full_path_filename = os.path.join(results_dir, filename)
        assert dir_path == full_path_filename, "h2o.download_all_logs() command is not working."
        assert os.path.isfile(
            full_path_filename
        ), "h2o.download_all_logs() command is not working."
    except Exception as e:
        if 'File not found' in e.args[0]:
            print(
                "Directory is not writable.  h2o.download_all_logs() command is not tested."
            )
        else:
            assert False, "h2o.download_all_logs() command is not working."
示例#6
0
def get_all_variables_csv(i):
    ivd={}
    try:
      iv = pd.read_csv(i,header=None)
    except:
      logging.critical('read csv error') 
      h2o.download_all_logs(dirname=logs_path, filename=logfile)
      h2o.cluster().shutdown()     
      sys.exit(10)             
    col=iv.values.tolist()[0]
    dt=iv.values.tolist()[1]
    i=0
    for c in col:
      ivd[c.strip()]=dt[i].strip()
      i+=1        
    return ivd
def hadoop_download_logs():

    # Check if we are running inside the H2O network by seeing if we can touch
    # the namenode.
    hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible(
    )

    if hadoop_namenode_is_accessible:
        results_dir = pyunit_utils.locate("results")
        zip_file = h2o.download_all_logs()
        extracted_dir = os.path.abspath("extracted")
        print("Logs extracted into: " + extracted_dir)
        if os.path.isdir(extracted_dir):
            shutil.rmtree(extracted_dir)
        zip_ref = zipfile.ZipFile(zip_file, 'r')
        zip_ref.extractall(extracted_dir)
        zip_ref.close()
        nodes_log_dir = extracted_dir + "/" + os.listdir(extracted_dir)[0]
        nodes_log_file_names = os.listdir(nodes_log_dir)

        for f in expected_log_files(nodes_log_dir, nodes_log_file_names,
                                    "INFO"):
            print("Checking if file " + f + " exists")
            # check that all expected files exist
            assert os.path.isfile(f) == True
    else:
        raise EnvironmentError
示例#8
0
def test_download_txt_logs():
    marker = "txt-logs-marker"
    results_dir = pyunit_utils.locate(
        "results")  # find directory path to results folder

    # log something unique so that we can try to search for it in the downloaded logs
    h2o.log_and_echo(marker)

    log_path = h2o.download_all_logs(results_dir,
                                     filename="plain_text_logs.log_ignore",
                                     container="LOG")

    assert not zipfile.is_zipfile(log_path)
    # logs are trimmed (we can only check smaller files)
    assert find_marker(log_path,
                       marker) or os.path.getsize(log_path) > 10 * 1024 * 1042

    # Now make sure we get a zip file if we don't specify the container format
    zip_path = h2o.download_all_logs(results_dir, filename="zip_logs.zip")
    assert zipfile.is_zipfile(zip_path)
def download_all_logs(ip,port):
    # Connect to h2o
    h2o.init(ip,port)

    # default
    log_location = h2o.download_all_logs()
    assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location)
    os.remove(log_location)

    # dirname and filename
    log_location = h2o.download_all_logs(".","h2o_logs.txt")
    assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location)
    os.remove(log_location)

    # dirname
    log_location = h2o.download_all_logs(dirname=".")
    assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location)
    os.remove(log_location)

    # filename
    log_location = h2o.download_all_logs(filename="h2o_logs.txt")
    assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location)
    os.remove(log_location)
示例#10
0
def save_artifacts(automl, dataset, config):
    artifacts = config.framework_params.get('_save_artifacts', ['leaderboard'])
    try:
        lb = automl.leaderboard.as_data_frame()
        log.debug("Leaderboard:\n%s", lb.to_string())
        if 'leaderboard' in artifacts:
            models_dir = make_subdir("models", config)
            write_csv(lb, os.path.join(models_dir, "leaderboard.csv"))
        if 'models' in artifacts:
            models_dir = make_subdir("models", config)
            all_models_se = next(
                (mid for mid in lb['model_id']
                 if mid.startswith("StackedEnsemble_AllModels")), None)
            mformat = 'mojo' if 'mojos' in artifacts else 'json'
            if all_models_se:
                save_model(all_models_se, dest_dir=models_dir, mformat=mformat)
            else:
                for mid in lb['model_id']:
                    save_model(mid, dest_dir=models_dir, mformat=mformat)

        if 'models_predictions' in artifacts:
            predictions_dir = make_subdir("predictions", config)
            test = h2o.get_frame(frame_name('test', config))
            for mid in lb['model_id']:
                model = h2o.get_model(mid)
                save_predictions(model,
                                 test,
                                 dataset=dataset,
                                 config=config,
                                 predictions_file=os.path.join(
                                     predictions_dir, mid, 'predictions.csv'))

        if 'logs' in artifacts:
            logs_dir = make_subdir("logs", config)
            h2o.download_all_logs(dirname=logs_dir)
    except:
        log.debug("Error when saving artifacts.", exc_info=True)
示例#11
0
def hadoop_download_logs():
    zip_file = h2o.download_all_logs()
    extracted_dir = os.path.abspath("extracted")
    print("Logs extracted into: " + extracted_dir)
    if os.path.isdir(extracted_dir):
        shutil.rmtree(extracted_dir)
    zip_ref = zipfile.ZipFile(zip_file, 'r')
    zip_ref.extractall(extracted_dir)
    zip_ref.close()
    nodes_log_dir = extracted_dir + "/" + os.listdir(extracted_dir)[0]
    nodes_log_file_names = os.listdir(nodes_log_dir)

    for f in expected_log_files(nodes_log_dir, nodes_log_file_names, "INFO"):
        print("Checking if file " + f + " exists")
        # check that all expected files exist
        assert os.path.isfile(f)
def h2odownload_all_logs():
    """
    Python API test: h2o.download_all_logs(dirname=u'.', filename=None)
    """
    training_data = h2o.import_file(pyunit_utils.locate("smalldata/logreg/benign.csv"))
    Y = 3
    X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]

    model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5)
    model.train(x=X, y=Y, training_frame=training_data)
    try:
        results_dir = pyunit_utils.locate("results")    # find directory path to results folder
        filename = "logs.csv"
        dir_path = h2o.download_all_logs(results_dir, filename)       # save logs csv
        full_path_filename = os.path.join(results_dir, filename)
        assert dir_path == full_path_filename, "h2o.download_all_logs() command is not working."
        assert os.path.isfile(full_path_filename), "h2o.download_all_logs() command is not working."
    except Exception as e:
        if 'File not found' in e.args[0]:
            print("Directory is not writable.  h2o.download_all_logs() command is not tested.")
        else:
            assert False, "h2o.download_all_logs() command is not working."
示例#13
0
  log_file=run_id+'.log'
  log_file = os.path.join(run_dir,log_file)
  logging.basicConfig(filename=log_file,level=logging.INFO,format="%(asctime)s:%(levelname)s:%(message)s")
  logging.info(start) 
  

  
  # 65535 Highest port no
  port_no=random.randint(5555,55555)
  
  #  h2o.init(strict_version_check=False,min_mem_size_GB=min_mem_size,port=port_no) # start h2o
  try:
    h2o.init(strict_version_check=False,min_mem_size_GB=min_mem_size,max_mem_size_GB=max_mem_size,port=port_no) # start h2o
  except:
    logging.critical('h2o.init')
    h2o.download_all_logs(dirname=logs_path, filename=logfile)      
    h2o.cluster().shutdown()
    sys.exit(2)
  
  
  # In[108]:
  
  # meta data
  meta_data = set_meta_data(no_rows,analysis,run_id,server_path,data_path,test_path,model_path,target,run_time,classification,scale,max_models,balance_y,balance_threshold,name,run_dir,nthreads,min_mem_size,orig_path)
  
  
  # In[109]:
  
  # predictions only
  
  if model_path is not None: