Exemplo n.º 1
0
import time
from awrams.models.model import Model
from .clustered import build_sim_pickle, launch_sim_from_pickle
import shutil
import os
from awrams.utils import config_manager
from awrams.cluster.support import build_mpi_call_str, build_pbs_header, \
                                   build_full_pbs_file, get_pbs_header_options,\
                                   RemoteJobSpec

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('server')


class SimulationServer:
    def __init__(self, model, sys_settings=None):

        if not isinstance(model, Model):
            raise TypeError("model must be of type awrams.models.model.Model")
        ### defaults

        if sys_settings is None:
            sys_settings = config_manager.get_system_profile().get_settings()

        self.sys_settings = sys_settings

        sim_settings = sys_settings['SIMULATION']

        self.spatial_chunk = sim_settings['SPATIAL_CHUNK']
        self.time_chunk = sim_settings['TIME_CHUNK']
        self.min_cells_per_worker = sim_settings['MIN_CELLS_PER_WORKER']
Exemplo n.º 2
0
import h5py
import numpy as np
from collections import OrderedDict
import types

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('awrams.utils.io')

#from awrams.utils.settings import VAR_CHUNK_CACHE_SIZE, VAR_CHUNK_CACHE_NELEMS, VAR_CHUNK_CACHE_PREEMPTION#pylint: disable=no-name-in-module

from awrams.utils.config_manager import get_system_profile

sys_settings = get_system_profile().get_settings()
VAR_CHUNK_CACHE_SIZE = sys_settings['IO_SETTINGS']['VAR_CHUNK_CACHE_SIZE']
VAR_CHUNK_CACHE_NELEMS = sys_settings['IO_SETTINGS']['VAR_CHUNK_CACHE_NELEMS']
VAR_CHUNK_CACHE_PREEMPTION = sys_settings['IO_SETTINGS'][
    'VAR_CHUNK_CACHE_PREEMPTION']

propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS)
settings = list(propfaid.get_cache())
#settings[1]        # size of hash table
settings[2] = 0  #2**17 # =131072 size of chunk cache in bytes
# which is big enough for 5x(75, 1, 50 chunks;
# default is 2**20 =1048576
settings[3] = 1.  # preemption 1 suited to whole chunk read/write
propfaid.set_cache(*settings)
propfaid.set_fapl_sec2()
propfaid.set_sieve_buf_size(0)
propfaid.set_fclose_degree(h5py.h5f.CLOSE_STRONG)
#propfaid.set_fapl_stdio()
Exemplo n.º 3
0
import os

import awrams.utils.datetools as dt
from awrams.utils.io.general import h5py_cleanup_nc_mess
from awrams.utils.datetools import resample_dti, truncate_resample_dti, truncate_dti
from awrams.utils.processing.time_conversion import resample_data

from awrams.utils.awrams_log import get_module_logger

logger = get_module_logger('daily_monthly_sched')


def process(var_map, out_path, period, to_freq, method='mean', file_mode='w'):
    '''

    :param var_map: {var_name:file_name(with wildcard)
    :param period: pandas DatetimeIndex
    :param to_freq: monthly or annual
    :param method: mean or sum
    :param file_mode: w to replace existing or a to append
    '''

    if to_freq.lower() == 'monthly':
        to_freq = 'M'
    elif to_freq.lower() == 'annual':
        to_freq = 'A'

    for variable in var_map:
        h5py_cleanup_nc_mess()
        try:
            logger.info("Converting to %s: %s",
Exemplo n.º 4
0
import json
import numpy as np
import datetime
import dateutil
from awrams.utils import mapping_types as mt
from awrams.utils.messaging.buffer_group import DataSpec
from awrams.utils.io.data_mapping import FlatFileManager, AnnualSplitFileManager
from awrams.utils.mapping_types import extent_to_spatial_coords, CoordinateSet
from awrams.utils.extents import default

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('nodes')


def callable_to_funcspec(c):
    func_spec = dict(name=c.__name__)
    if hasattr(c, '__module__'):
        func_spec['module'] = c.__module__
    else:
        func_spec['module'] = c.__class__.__module__
    return func_spec


def funcspec_to_callable(func_spec):
    import importlib
    m = importlib.import_module(func_spec['module'])
    c = getattr(m, func_spec['name'])
    return c


class GraphNode:
Exemplo n.º 5
0
import numpy as np
from awrams.utils import geo
from awrams.utils.helpers import Indexer as _ix
from awrams.utils.helpers import index
from awrams.utils.geo import get_geounit, get_geopoint
from copy import deepcopy
from awrams.utils.awrams_log import get_module_logger
from awrams.utils import config_manager
import os

logger = get_module_logger('awrams.utils.extents')
import warnings


class Extent:
    def __init__(self,
                 parent_ref,
                 lat_offset=0,
                 lon_offset=0,
                 nlats=None,
                 nlons=None,
                 mask=False,
                 areas=None,
                 area_sum=None):
        if nlats is None:
            nlats = parent_ref.nlats
        if nlons is None:
            nlons = parent_ref.nlons

        if mask is not False:
            if (nlats, nlons) != mask.shape:
Exemplo n.º 6
0
import awrams.utils.datetools as dt
from awrams.utils.fs import FileMatcher
from awrams.utils.io.netcdf_wrapper import geospatial_reference_from_nc, set_chunk_cache, start_date, end_date, epoch_from_nc
from awrams.utils.ts.time_series_infilling import FailOnDataGaps, FillWithZeros
from awrams.utils.awrams_log import get_module_logger
from awrams.utils.io import db_open_with
import pandas as pd
from awrams.utils.helpers import aquantize
from awrams.utils.settings import VAR_CHUNK_CACHE_SIZE, VAR_CHUNK_CACHE_NELEMS, VAR_CHUNK_CACHE_PREEMPTION, DB_OPEN_WITH #pylint: disable=no-name-in-module
from awrams.utils.io.input_buffer import InputReader
#from db_helper import mdc, _h5py
import re
from awrams.utils.messaging.general import NULL_CHUNK
from collections import Iterable

logger = get_module_logger('climate_data')

def isiterable(obj):
    return isinstance(obj,Iterable)

class BridgedDataSet:
    def connect_reader_bridge(self,bridge):
        self.bridge = bridge
        self.cur_chunk = NULL_CHUNK
        self.cur_chunk_idx = -1
        self.cur_period_idx = -1

    def set_active_period(self,period_idx):
        self.cur_period_idx += 1
        self.cur_chunk_idx = -1
        self.cur_chunk = NULL_CHUNK
Exemplo n.º 7
0
import matplotlib.pylab as plt
import numpy as np
import pandas as pd

import awrams.utils.datetools as dt
from awrams.utils.metatypes import ObjectDict, New

from .stats import build_stats_df, standard_percentiles
from .utils import valid_only, infer_freq, resample_to_months_df, resample_to_years_df
from .model import Selector
import awrams.benchmarking.config as cfg

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('comparison')

SAMPLE_RATE = {
    'd': 1,
    'daily': 1,
    'm': 2,
    'monthly': 2,
    'y': 3,
    'yearly': 3,
    'annually': 3
}


class ComparisonSet(object):
    def __init__(self,
                 obs_df,
                 ref_name,
                 var_name,
Exemplo n.º 8
0
import multiprocessing as mp

try:
    mp.set_start_method('forkserver')

    mp.set_forkserver_preload([
        'numpy', 'pandas', 'WIP.robust', 'Support.Interaction.datetools',
        'netCDF4'
    ])

except:
    pass

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('time_conversion')


def resample_data(in_path,
                  in_pattern,
                  variable,
                  period,
                  out_path,
                  to_freq,
                  method,
                  mode='w',
                  enforce_mask=True,
                  extent=None,
                  use_weights=False):
    '''
    method is 'sum' or 'mean'
    if no extent is supplied then the full (unmasked) input will be used
Exemplo n.º 9
0
from awrams.utils.messaging.robust import PollingChild, SharedMemClient, Chunk, to_chunks
from awrams.utils.messaging.general import message
from awrams.utils.nodegraph import graph
from awrams.utils.mapping_types import gen_coordset
from awrams.utils import datetools as dt

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('writer')

import time

class OutputGraphRunner(PollingChild,SharedMemClient):
    def __init__(self,qin,qout,buffers,extents,periods,mapping):
        PollingChild.__init__(self,qin,qout)
        SharedMemClient.__init__(self,buffers)

        self._set_chunks(extents,periods)
        self.mapping = mapping
        self.cur_chunk = None
        self.cur_chunk_count = 0
        self.completed = 0
        self.finished = False

        self.daemon = True

    def run_setup(self):
        # import os
        # self.pid = os.getpid()
        # print("writer pid: %d"%self.pid,flush=True)
        #logger.info("writer pid: %d",self.pid)
        self.rebuild_buffers()
Exemplo n.º 10
0
# import scipy.stats as stats
import numpy as np
import pandas as pd

from .utils import valid_only

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('stats')


def nse(observed,modeled):
    '''
    Return Nash-Suttcliffe efficiency of observed/modelled series
    '''

    obs_mean = np.mean(observed)
    n = sum((observed - modeled)**2.0)
    d = sum((observed - obs_mean)**2.0)

    try:
        return 1 - (n/d)
    except ZeroDivisionError:
        return None


class StatsPair:
    def __init__(self,observed,predicted,drop_nan=True,o_name='Observed',p_name='Predicted',v_name=''):
        if drop_nan:
            self.observed = valid_only(observed)
            self.predicted = valid_only(predicted)
        else:
Exemplo n.º 11
0
#sys.path.append("LocalPackages")
import netCDF4 as nc
import numpy as np
import datetime as dt
import tempfile
from awrams.utils.ts import gridded_time_series
import re
import pandas as pd
from calendar import isleap
from awrams.utils.ts.gridded_time_series import ClimateDataSet, FileMatcher, NoMatchingFilesException
from nose.tools import nottest, with_setup, assert_almost_equal, assert_equal, assert_true, assert_tuple_equal, raises
from numpy.testing import assert_array_equal
import os
from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('test_gridded_time_series')
TEST_DATA_NUM_ROWS = 50
TEST_DATA_NUM_COLS = 100


def days_in_year(year):
    if isleap(year):
        return 366
    return 365


def days_upto(year):
    """
    Return the number of days from the beginning of the test period to the
    beginning of the year specified
    """
Exemplo n.º 12
0
import cffi
import numpy as np
#from .template import _SOURCE_FN,_SOURCE_T_FN,_HEADER_FN,_HEADER_T_FN,_LIB_FN
from numbers import Number
import os
import shutil
import tempfile
from awrams.models.model import ModelRunner
from awrams.utils import config_manager
from hashlib import md5
from awrams.utils.awrams_log import get_module_logger

from .template import gen_templates, BASE_TEMPLATE
from awrams.utils import templates

logger = get_module_logger()

TYPEMAP = {np.float64: "double *", np.float32: "float *", np.dtype('float64'): "double *", np.dtype('float32'): "float *"}

def ccast(ndarr,ffi,to_type=np.float64,promote=True):
    if ndarr.dtype != to_type:
        if promote:
            ndarr = ndarr.astype(to_type)
        else:
            raise Exception("Incorrect dtype",ndarr.dtype,to_type)

    typestr = TYPEMAP[to_type]
    return ffi.cast(typestr,ndarr.ctypes.data)

def build_model(build_str):
    import subprocess
Exemplo n.º 13
0
'''
Provides functionality for filling gaps in gridded time series using, for example,
precomputed climatologies.
'''

import numpy as np
import awrams.utils.datetools as dt
from awrams.utils.fs import md5_file

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('climate_data_infilling')

FILL_VALUE = -999.0


class ClimateDataGapFiller(object):
    def any_nans(self, series):
        # Optimisation for rapid detection of nans
        # See http://stackoverflow.com/a/6736970
        return np.isnan(np.sum(series))

    def has_gaps(self, series, location):
        if self.any_nans(series):
            msg = "NaNs in Series at %s"
            logger.debug(msg, str(location))
            return msg % str(location)

        if (series == FILL_VALUE).any():
            msg = "Series masked at %s"
            logger.debug(msg, str(location))
            return msg % str(location)
Exemplo n.º 14
0
import multiprocessing as mp
from awrams.utils.messaging import message
from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('mp_parent')


class MultiprocessingParent(object):
    """
    Parent for classes that need to manage multiple, concurrent workers
    using Python multiprocessing
    """
    def __init__(self):
        self.control_q = mp.Queue()

        self.child_procs = {}
        self.acknowledgements = {}

    def add_child_proc(self, process, msg_q):
        '''
        Register a child process to ensure correct termination
        when it has finished working; wait on this message before terminating
        '''
        if not process.is_alive():
            process.start()

        self.child_procs[process.pid] = {'process': process, 'msg_q': msg_q}

    def terminate_children(self):
        '''
        Terminate the simulation; close any open subprocesses
        '''
Exemplo n.º 15
0
from collections import OrderedDict
from numbers import Number
from .nodes import DataSpec, InputNode, ProcessNode, get_expanded, get_flattened, static
import time
from awrams.utils.mapping_types import gen_coordset
import pandas as pd

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('graph')


def find_heads(nodes):
    '''
    Separate nodes into no-upstream-dependency (heads) and others (tails)
    '''
    #heads = []
    heads = OrderedDict()
    tails = {}
    for k, n in nodes.items():
        if n is None:
            raise Exception("Node value unspecified", k)
        if len(n.inputs) == 0:
            heads[k] = n
            #heads.append(k)
        else:
            tails[k] = n
    return heads, tails


def find_endpoint_keys(nodes):
    '''
Exemplo n.º 16
0
from multiprocessing import Process
from multiprocessing.queues import Empty, Full
from awrams.utils.messaging.general import message

from awrams.utils.awrams_log import get_module_logger

logger = get_module_logger('robust')


class ControlInterrupt(Exception):
    pass


class ChunksComplete(Exception):
    pass


def chunk_message(chunk_idx, period_idx, data=None):
    chunk_msg = message('chunk')
    content = chunk_msg['content']
    content['chunk_idx'] = chunk_idx
    content['period_idx'] = period_idx

    if data is None:
        data = {}

    content['data'] = data

    return chunk_msg

Exemplo n.º 17
0
from awrams.utils.messaging.robust import *
from awrams.utils.nodegraph import graph
from awrams.utils import mapping_types as mt
from copy import deepcopy

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('reader')


class InputGraphRunner(PollingChild,SharedMemClient):
    '''
    Runs an ExecutionGraph of input nodes and streams their
    outputs via shared memory.
    '''

    def __init__(self,inq,outq,buffers,extents,periods,mapping,state_keys):
        PollingChild.__init__(self,inq,outq)
        SharedMemClient.__init__(self,buffers)

        # self.finished = False
        self._set_chunks(extents,periods)
        self.mapping = mapping
        self.state_period = dict([(i,-1) for i in range(len(extents))])
        self.state_buffers = dict([(i,None) for i in range(len(extents))])
        self.state_keys = state_keys
        self.recycle_states = False
        self.cur_chunk = None
        self.finished = False
        
        self.daemon = True
Exemplo n.º 18
0
import numpy as np
import sys
import traceback
import os
import pickle
import multiprocessing as mp
import awrams.utils.datetools as dt
import awrams.utils.extents as extents

from awrams.utils.messaging.general import *
from awrams.utils.profiler import Profiler
from awrams.utils.io.db_helper import _nc as db_opener
# from awrams.utils.io.db_helper import _h5py as db_opener

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('input_reader')


def to_chunk_idx(cell, c_shape):
    return (cell[0] / c_shape[0], cell[1] / c_shape[1])


def offset_slice(in_slice, offset):
    return slice(in_slice.start + offset, in_slice.stop + offset)


def build_chunk_map(ref_extent, subset_extent, c_shape):
    '''
    Returns a pair of chunk maps in the coordinates of both the reference and subset extents
    Assumes just 2d (spatial) chunking; ignore time
    '''
Exemplo n.º 19
0
import csv
import numpy as np
import pandas as pd

import awrams.utils.datetools as dt
from awrams.utils.helpers import sanitize_cell
import awrams.utils.extents as extents

from awrams.utils import config_manager
from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('utils')

system_profile = config_manager.get_system_profile().get_settings()

BENCHMARK_SITES = system_profile['BENCHMARKING']['BENCHMARK_SITES']
MONTHLY_REJECTION_THRESHOLD = system_profile['BENCHMARKING'][
    'MONTHLY_REJECTION_THRESHOLD']
ANNUAL_REJECTION_THRESHOLD = system_profile['BENCHMARKING'][
    'ANNUAL_REJECTION_THRESHOLD']


def infer_freq(df):
    if 'M' in df.index.inferred_freq:
        return 'm'
    elif 'A' in df.index.inferred_freq:
        return 'y'
    elif 'D' in df.index.inferred_freq:
        return 'd'
    else:
        return 'd'
Exemplo n.º 20
0
import pandas as pd

from awrams.models import awral
from awrams.simulation.ondemand import OnDemandSimulator
from .evaluators import Evaluator
from .objectives import NSE

from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('calibrate')

MODEL = None
input_map = None
objective = NSE


def set_model(model=awral):
    global MODEL, input_map
    MODEL = model
    input_map = model.get_default_mapping()


set_model()


def get_parameter_df(mapping):
    params = [(k, v) for (k, v) in mapping.items() if 'Min' in v.properties]
    params = [{
        'Name': k,
        'Min': v.properties['Min'],
        'Max': v.properties['Max'],
        'Value': v.args['value']
Exemplo n.º 21
0
import numpy as np
import pandas as pd
from awrams.utils.awrams_log import get_module_logger
import multiprocessing as mp
from awrams.utils.messaging.binding import MessageHandler, MultiprocessingParent, QueueChild, bound_proxy
from awrams.utils.messaging.general import message
import time

logger = get_module_logger('SCE')


class ShuffledOptimizer:
    def __init__(self,
                 complex_sz,
                 n_complexes,
                 parameters,
                 eval_fac,
                 min_complexes=1):
        '''
        s : pop_size (initial population)
        m : complex size
        p : number of complexes
        pmin : minimum number of complexes
        '''
        self.complex_sz = complex_sz
        self.n_complexes = n_complexes
        self._n_complexes = n_complexes
        self.pop_size = complex_sz * n_complexes
        self.parameters = parameters
        self.min_complexes = min_complexes
Exemplo n.º 22
0
import re
import sys
import numpy as np
import datetime as dt

from awrams.utils.metatypes import ObjectDict
from awrams.utils.helpers import iround

# +++ Should probably stop logging from this file...
from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('netcdf_wrapper')

from awrams.utils.settings import DEFAULT_CHUNKSIZE, VAR_CHUNK_CACHE_SIZE, VAR_CHUNK_CACHE_NELEMS, VAR_CHUNK_CACHE_PREEMPTION, VARIABLE_PRECISION, DEFAULT_PRECISION  #pylint: disable=no-name-in-module


def set_chunk_cache(dataset, variable, **params):
    p = dict(var_chunk_cache_size=VAR_CHUNK_CACHE_SIZE,
             var_chunk_cache_nelems=VAR_CHUNK_CACHE_NELEMS,
             var_chunk_cache_preemption=VAR_CHUNK_CACHE_PREEMPTION)
    p.update(**params)

    dataset.variables[variable].set_var_chunk_cache(
        size=p['var_chunk_cache_size'],
        nelems=p['var_chunk_cache_nelems'],
        preemption=p['var_chunk_cache_preemption'])


def dtype_for_variable(var):
    if var in VARIABLE_PRECISION:
        return np.dtype(VARIABLE_PRECISION[var])
    else:
Exemplo n.º 23
0
import numpy as np
import datetime as dt
from awrams.utils.ts.time_series_infilling import FillWithZeros, FillWithClimatology
from .test_gridded_time_series import create_mock_data, create_dummy_dataset
from nose.tools import with_setup, raises, assert_list_equal, assert_equal
from awrams.utils.awrams_log import get_module_logger
logger = get_module_logger('test_data_infilling')


def setup_gappy():
    global dataset
    dataset = create_dummy_dataset(opener=mock_open_gappy_data)


def mock_open_gappy_data(self, fn):
    return create_gappy_data(fn)


def create_gappy_data(fn):
    # insert_gaps method used because broadcasting doesn't seem to work
    # with diskless netCDF files with python-netCDF4
    def insert_gaps(array):
        array[5, :, :] = np.nan
        array[10, 20, 30] = np.nan
        array[15:30, 20, 30] = np.nan
        array[50, 10:18, 30] = np.nan

    data = create_mock_data(fn, data_modifier=insert_gaps)
    #    data.variables['temp_min'][5,:,:] = np.nan
    #    data.variables['temp_min'][10,20,30] = np.nan
    #    data.variables['temp_min'][15:30,20,30] = np.nan