示例#1
0
logger.debug('Now parsing configuration file esk302_histogram_filler_plotter.')

#########################################################################################
# --- minimal analysis information
settings = process_manager.service(ConfigObject)
settings['analysisName'] = 'esk302_histogram_filler_plotter'
settings['version'] = 0

#########################################################################################

msg = r"""

The plots and latex files produced by link hist_summary can be found in dir:
{path}
"""
logger.info(msg, path=settings['resultsDir'] + '/' + settings['analysisName'] + '/data/v0/report/')

# --- Analysis configuration flags.
#     E.g. use these flags turn on or off certain chains with links.
#     by default all set to false, unless already configured in
#     configobject or vars()

settings['do_loop'] = True

chunk_size = 400

#########################################################################################
# --- create dummy example dataset, which is read in below

input_files = [resources.fixture('mock_accounts.csv.gz'),
               resources.fixture('mock_accounts.csv.gz')]
msg = r"""

The setup consists of three simple chains that add progressively more information to the datastore.
In the examples the datastore gets persisted after the execution of each chain, and can be picked
up again as input for the next chain.

- The pickled datastore(s) can be found in the data directory:
{data_path}

- The pickled configuration object(s) and backed-up configuration file can be found in:
{conf_path}
"""
logger.info(msg,
            data_path=settings['resultsDir'] + '/' + settings['analysisName'] +
            '/data/v0/',
            conf_path=settings['resultsDir'] + '/' + settings['analysisName'] +
            '/config/v0/')

# dummy information used in this macro, added to each chain below.
f = {'hello': 'world', 'v': [3, 1, 4, 1, 5], 'n_favorite': 7}
g = {'a': 1, 'b': 2, 'c': 3}
h = [2, 7]

#########################################################################################
# --- now set up the chains and links based on configuration flags

#########
# chain 1
ch = Chain('chain1')
示例#3
0
#########################################################################################
# --- minimal analysis information

settings = process_manager.service(ConfigObject)
settings['analysisName'] = 'esk411_weibull_predictive_maintenance'
settings['version'] = 0

#########################################################################################
# --- Analysis values, settings, helper functions, configuration flags.

msg = r"""

The plots and latex report produced by link WsUtils can be found in dir:
{path}
"""
logger.info(msg, path=persistence.io_path('results_data', 'report'))

settings['generate'] = True
# settings['read_data'] = not settings['generate']
settings['model'] = True
settings['process'] = True
settings['fit_plot'] = True
settings['summary'] = True

fitpdf = 'sum3pdf'
n_percentile_bins = 300

#########################################################################################
# --- now set up the chains and links based on configuration flags

if settings['model']:
示例#4
0
###############################################################################
# --- minimal analysis information
settings = process_manager.service(ConfigObject)
settings['analysisName'] = 'tutorial_5'
settings['version'] = 0

###############################################################################
# - First create, compile and load your pdf model. We can either create it
#   on the fly or load if it has already been created.
pdf_name = 'MyPdf'
pdf_lib_base = pdf_name + '_cxx'
pdf_lib_ext = '.so'
pdf_lib_name = pdf_lib_base + pdf_lib_ext

if ROOT.gSystem.Load(pdf_lib_name) != 0:
    logger.info('Building and compiling RooFit pdf {name}.', name=pdf_name)
    # building a roofit pdf class called MyPdfV
    ROOT.RooClassFactory.makePdf(
        pdf_name, "x,A,B", "", "A*fabs(x)+pow(x-B,2)", True, False,
        "x:(A/2)*(pow(x.max(rangeName),2)+pow(x.min(rangeName),2))"
        "+(1./3)*(pow(x.max(rangeName)-B,3)-pow(x.min(rangeName)-B,3))")
    # compiling this class and loading it into ROOT on the fly.
    ROOT.gROOT.ProcessLineSync(".x {}.cxx+".format(pdf_name))

# --- check existence of class MyPdf in ROOT
logger.info('Now checking existence of ROOT class {name}.', name=pdf_name)
cl = ROOT.TClass.GetClass(pdf_name)
if not cl:
    logger.fatal(
        'Could not find ROOT class {name}. Did you build and compile it correctly?',
        name=pdf_name)
# --- minimal analysis information
settings = process_manager.service(ConfigObject)
settings['analysisName'] = 'esk601_spark_configuration'
settings['version'] = 0

##########################################################################
# --- get Spark Manager to start/stop Spark
sm = process_manager.service(SparkManager)

##########################################################################
# --- METHOD 1: configuration file

spark = sm.create_session(eskapade_settings=settings)
sc = spark.sparkContext

logger.info('---> METHOD 1: configuration file')
logger.info(str(sc.getConf().getAll()))

##########################################################################
# --- METHOD 2: link

conf_link = SparkConfigurator(name='SparkConfigurator', log_level='WARN')
conf_link.spark_settings = [('spark.app.name',
                             settings['analysisName'] + '_link'),
                            ('spark.master', 'local[42]'),
                            ('spark.driver.host', '127.0.0.1')]

config = Chain('Config')
config.add(conf_link)

logger.info('---> METHOD 2: link')
示例#6
0
import pandas as pd

from eskapade import analysis, process_manager, visualization, ConfigObject, Chain
from eskapade.logger import Logger

logger = Logger()

#########################################################################################

msg = r"""

Be sure to download the input dataset:

$ wget https://s3-eu-west-1.amazonaws.com/kpmg-eskapade-share/data/LAozone.data
"""
logger.info(msg)

#########################################################################################
# --- minimal analysis information

settings = process_manager.service(ConfigObject)
settings['analysisName'] = 'Tutorial_1'

#########################################################################################
# --- analysis values, settings, helper functions, configuration flags.

VAR_LABELS = dict(doy='Day of year', date='Date', vis='Visibility', vis_km='Visibility')
VAR_UNITS = dict(vis='mi', vis_km='km')


def comp_date(day):