import time from awrams.models.model import Model from .clustered import build_sim_pickle, launch_sim_from_pickle import shutil import os from awrams.utils import config_manager from awrams.cluster.support import build_mpi_call_str, build_pbs_header, \ build_full_pbs_file, get_pbs_header_options,\ RemoteJobSpec from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('server') class SimulationServer: def __init__(self, model, sys_settings=None): if not isinstance(model, Model): raise TypeError("model must be of type awrams.models.model.Model") ### defaults if sys_settings is None: sys_settings = config_manager.get_system_profile().get_settings() self.sys_settings = sys_settings sim_settings = sys_settings['SIMULATION'] self.spatial_chunk = sim_settings['SPATIAL_CHUNK'] self.time_chunk = sim_settings['TIME_CHUNK'] self.min_cells_per_worker = sim_settings['MIN_CELLS_PER_WORKER']
import h5py import numpy as np from collections import OrderedDict import types from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('awrams.utils.io') #from awrams.utils.settings import VAR_CHUNK_CACHE_SIZE, VAR_CHUNK_CACHE_NELEMS, VAR_CHUNK_CACHE_PREEMPTION#pylint: disable=no-name-in-module from awrams.utils.config_manager import get_system_profile sys_settings = get_system_profile().get_settings() VAR_CHUNK_CACHE_SIZE = sys_settings['IO_SETTINGS']['VAR_CHUNK_CACHE_SIZE'] VAR_CHUNK_CACHE_NELEMS = sys_settings['IO_SETTINGS']['VAR_CHUNK_CACHE_NELEMS'] VAR_CHUNK_CACHE_PREEMPTION = sys_settings['IO_SETTINGS'][ 'VAR_CHUNK_CACHE_PREEMPTION'] propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS) settings = list(propfaid.get_cache()) #settings[1] # size of hash table settings[2] = 0 #2**17 # =131072 size of chunk cache in bytes # which is big enough for 5x(75, 1, 50 chunks; # default is 2**20 =1048576 settings[3] = 1. # preemption 1 suited to whole chunk read/write propfaid.set_cache(*settings) propfaid.set_fapl_sec2() propfaid.set_sieve_buf_size(0) propfaid.set_fclose_degree(h5py.h5f.CLOSE_STRONG) #propfaid.set_fapl_stdio()
import os import awrams.utils.datetools as dt from awrams.utils.io.general import h5py_cleanup_nc_mess from awrams.utils.datetools import resample_dti, truncate_resample_dti, truncate_dti from awrams.utils.processing.time_conversion import resample_data from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('daily_monthly_sched') def process(var_map, out_path, period, to_freq, method='mean', file_mode='w'): ''' :param var_map: {var_name:file_name(with wildcard) :param period: pandas DatetimeIndex :param to_freq: monthly or annual :param method: mean or sum :param file_mode: w to replace existing or a to append ''' if to_freq.lower() == 'monthly': to_freq = 'M' elif to_freq.lower() == 'annual': to_freq = 'A' for variable in var_map: h5py_cleanup_nc_mess() try: logger.info("Converting to %s: %s",
import json import numpy as np import datetime import dateutil from awrams.utils import mapping_types as mt from awrams.utils.messaging.buffer_group import DataSpec from awrams.utils.io.data_mapping import FlatFileManager, AnnualSplitFileManager from awrams.utils.mapping_types import extent_to_spatial_coords, CoordinateSet from awrams.utils.extents import default from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('nodes') def callable_to_funcspec(c): func_spec = dict(name=c.__name__) if hasattr(c, '__module__'): func_spec['module'] = c.__module__ else: func_spec['module'] = c.__class__.__module__ return func_spec def funcspec_to_callable(func_spec): import importlib m = importlib.import_module(func_spec['module']) c = getattr(m, func_spec['name']) return c class GraphNode:
import numpy as np from awrams.utils import geo from awrams.utils.helpers import Indexer as _ix from awrams.utils.helpers import index from awrams.utils.geo import get_geounit, get_geopoint from copy import deepcopy from awrams.utils.awrams_log import get_module_logger from awrams.utils import config_manager import os logger = get_module_logger('awrams.utils.extents') import warnings class Extent: def __init__(self, parent_ref, lat_offset=0, lon_offset=0, nlats=None, nlons=None, mask=False, areas=None, area_sum=None): if nlats is None: nlats = parent_ref.nlats if nlons is None: nlons = parent_ref.nlons if mask is not False: if (nlats, nlons) != mask.shape:
import awrams.utils.datetools as dt from awrams.utils.fs import FileMatcher from awrams.utils.io.netcdf_wrapper import geospatial_reference_from_nc, set_chunk_cache, start_date, end_date, epoch_from_nc from awrams.utils.ts.time_series_infilling import FailOnDataGaps, FillWithZeros from awrams.utils.awrams_log import get_module_logger from awrams.utils.io import db_open_with import pandas as pd from awrams.utils.helpers import aquantize from awrams.utils.settings import VAR_CHUNK_CACHE_SIZE, VAR_CHUNK_CACHE_NELEMS, VAR_CHUNK_CACHE_PREEMPTION, DB_OPEN_WITH #pylint: disable=no-name-in-module from awrams.utils.io.input_buffer import InputReader #from db_helper import mdc, _h5py import re from awrams.utils.messaging.general import NULL_CHUNK from collections import Iterable logger = get_module_logger('climate_data') def isiterable(obj): return isinstance(obj,Iterable) class BridgedDataSet: def connect_reader_bridge(self,bridge): self.bridge = bridge self.cur_chunk = NULL_CHUNK self.cur_chunk_idx = -1 self.cur_period_idx = -1 def set_active_period(self,period_idx): self.cur_period_idx += 1 self.cur_chunk_idx = -1 self.cur_chunk = NULL_CHUNK
import matplotlib.pylab as plt import numpy as np import pandas as pd import awrams.utils.datetools as dt from awrams.utils.metatypes import ObjectDict, New from .stats import build_stats_df, standard_percentiles from .utils import valid_only, infer_freq, resample_to_months_df, resample_to_years_df from .model import Selector import awrams.benchmarking.config as cfg from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('comparison') SAMPLE_RATE = { 'd': 1, 'daily': 1, 'm': 2, 'monthly': 2, 'y': 3, 'yearly': 3, 'annually': 3 } class ComparisonSet(object): def __init__(self, obs_df, ref_name, var_name,
import multiprocessing as mp try: mp.set_start_method('forkserver') mp.set_forkserver_preload([ 'numpy', 'pandas', 'WIP.robust', 'Support.Interaction.datetools', 'netCDF4' ]) except: pass from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('time_conversion') def resample_data(in_path, in_pattern, variable, period, out_path, to_freq, method, mode='w', enforce_mask=True, extent=None, use_weights=False): ''' method is 'sum' or 'mean' if no extent is supplied then the full (unmasked) input will be used
from awrams.utils.messaging.robust import PollingChild, SharedMemClient, Chunk, to_chunks from awrams.utils.messaging.general import message from awrams.utils.nodegraph import graph from awrams.utils.mapping_types import gen_coordset from awrams.utils import datetools as dt from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('writer') import time class OutputGraphRunner(PollingChild,SharedMemClient): def __init__(self,qin,qout,buffers,extents,periods,mapping): PollingChild.__init__(self,qin,qout) SharedMemClient.__init__(self,buffers) self._set_chunks(extents,periods) self.mapping = mapping self.cur_chunk = None self.cur_chunk_count = 0 self.completed = 0 self.finished = False self.daemon = True def run_setup(self): # import os # self.pid = os.getpid() # print("writer pid: %d"%self.pid,flush=True) #logger.info("writer pid: %d",self.pid) self.rebuild_buffers()
# import scipy.stats as stats import numpy as np import pandas as pd from .utils import valid_only from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('stats') def nse(observed,modeled): ''' Return Nash-Suttcliffe efficiency of observed/modelled series ''' obs_mean = np.mean(observed) n = sum((observed - modeled)**2.0) d = sum((observed - obs_mean)**2.0) try: return 1 - (n/d) except ZeroDivisionError: return None class StatsPair: def __init__(self,observed,predicted,drop_nan=True,o_name='Observed',p_name='Predicted',v_name=''): if drop_nan: self.observed = valid_only(observed) self.predicted = valid_only(predicted) else:
#sys.path.append("LocalPackages") import netCDF4 as nc import numpy as np import datetime as dt import tempfile from awrams.utils.ts import gridded_time_series import re import pandas as pd from calendar import isleap from awrams.utils.ts.gridded_time_series import ClimateDataSet, FileMatcher, NoMatchingFilesException from nose.tools import nottest, with_setup, assert_almost_equal, assert_equal, assert_true, assert_tuple_equal, raises from numpy.testing import assert_array_equal import os from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('test_gridded_time_series') TEST_DATA_NUM_ROWS = 50 TEST_DATA_NUM_COLS = 100 def days_in_year(year): if isleap(year): return 366 return 365 def days_upto(year): """ Return the number of days from the beginning of the test period to the beginning of the year specified """
import cffi import numpy as np #from .template import _SOURCE_FN,_SOURCE_T_FN,_HEADER_FN,_HEADER_T_FN,_LIB_FN from numbers import Number import os import shutil import tempfile from awrams.models.model import ModelRunner from awrams.utils import config_manager from hashlib import md5 from awrams.utils.awrams_log import get_module_logger from .template import gen_templates, BASE_TEMPLATE from awrams.utils import templates logger = get_module_logger() TYPEMAP = {np.float64: "double *", np.float32: "float *", np.dtype('float64'): "double *", np.dtype('float32'): "float *"} def ccast(ndarr,ffi,to_type=np.float64,promote=True): if ndarr.dtype != to_type: if promote: ndarr = ndarr.astype(to_type) else: raise Exception("Incorrect dtype",ndarr.dtype,to_type) typestr = TYPEMAP[to_type] return ffi.cast(typestr,ndarr.ctypes.data) def build_model(build_str): import subprocess
''' Provides functionality for filling gaps in gridded time series using, for example, precomputed climatologies. ''' import numpy as np import awrams.utils.datetools as dt from awrams.utils.fs import md5_file from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('climate_data_infilling') FILL_VALUE = -999.0 class ClimateDataGapFiller(object): def any_nans(self, series): # Optimisation for rapid detection of nans # See http://stackoverflow.com/a/6736970 return np.isnan(np.sum(series)) def has_gaps(self, series, location): if self.any_nans(series): msg = "NaNs in Series at %s" logger.debug(msg, str(location)) return msg % str(location) if (series == FILL_VALUE).any(): msg = "Series masked at %s" logger.debug(msg, str(location)) return msg % str(location)
import multiprocessing as mp from awrams.utils.messaging import message from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('mp_parent') class MultiprocessingParent(object): """ Parent for classes that need to manage multiple, concurrent workers using Python multiprocessing """ def __init__(self): self.control_q = mp.Queue() self.child_procs = {} self.acknowledgements = {} def add_child_proc(self, process, msg_q): ''' Register a child process to ensure correct termination when it has finished working; wait on this message before terminating ''' if not process.is_alive(): process.start() self.child_procs[process.pid] = {'process': process, 'msg_q': msg_q} def terminate_children(self): ''' Terminate the simulation; close any open subprocesses '''
from collections import OrderedDict from numbers import Number from .nodes import DataSpec, InputNode, ProcessNode, get_expanded, get_flattened, static import time from awrams.utils.mapping_types import gen_coordset import pandas as pd from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('graph') def find_heads(nodes): ''' Separate nodes into no-upstream-dependency (heads) and others (tails) ''' #heads = [] heads = OrderedDict() tails = {} for k, n in nodes.items(): if n is None: raise Exception("Node value unspecified", k) if len(n.inputs) == 0: heads[k] = n #heads.append(k) else: tails[k] = n return heads, tails def find_endpoint_keys(nodes): '''
from multiprocessing import Process from multiprocessing.queues import Empty, Full from awrams.utils.messaging.general import message from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('robust') class ControlInterrupt(Exception): pass class ChunksComplete(Exception): pass def chunk_message(chunk_idx, period_idx, data=None): chunk_msg = message('chunk') content = chunk_msg['content'] content['chunk_idx'] = chunk_idx content['period_idx'] = period_idx if data is None: data = {} content['data'] = data return chunk_msg
from awrams.utils.messaging.robust import * from awrams.utils.nodegraph import graph from awrams.utils import mapping_types as mt from copy import deepcopy from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('reader') class InputGraphRunner(PollingChild,SharedMemClient): ''' Runs an ExecutionGraph of input nodes and streams their outputs via shared memory. ''' def __init__(self,inq,outq,buffers,extents,periods,mapping,state_keys): PollingChild.__init__(self,inq,outq) SharedMemClient.__init__(self,buffers) # self.finished = False self._set_chunks(extents,periods) self.mapping = mapping self.state_period = dict([(i,-1) for i in range(len(extents))]) self.state_buffers = dict([(i,None) for i in range(len(extents))]) self.state_keys = state_keys self.recycle_states = False self.cur_chunk = None self.finished = False self.daemon = True
import numpy as np import sys import traceback import os import pickle import multiprocessing as mp import awrams.utils.datetools as dt import awrams.utils.extents as extents from awrams.utils.messaging.general import * from awrams.utils.profiler import Profiler from awrams.utils.io.db_helper import _nc as db_opener # from awrams.utils.io.db_helper import _h5py as db_opener from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('input_reader') def to_chunk_idx(cell, c_shape): return (cell[0] / c_shape[0], cell[1] / c_shape[1]) def offset_slice(in_slice, offset): return slice(in_slice.start + offset, in_slice.stop + offset) def build_chunk_map(ref_extent, subset_extent, c_shape): ''' Returns a pair of chunk maps in the coordinates of both the reference and subset extents Assumes just 2d (spatial) chunking; ignore time '''
import csv import numpy as np import pandas as pd import awrams.utils.datetools as dt from awrams.utils.helpers import sanitize_cell import awrams.utils.extents as extents from awrams.utils import config_manager from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('utils') system_profile = config_manager.get_system_profile().get_settings() BENCHMARK_SITES = system_profile['BENCHMARKING']['BENCHMARK_SITES'] MONTHLY_REJECTION_THRESHOLD = system_profile['BENCHMARKING'][ 'MONTHLY_REJECTION_THRESHOLD'] ANNUAL_REJECTION_THRESHOLD = system_profile['BENCHMARKING'][ 'ANNUAL_REJECTION_THRESHOLD'] def infer_freq(df): if 'M' in df.index.inferred_freq: return 'm' elif 'A' in df.index.inferred_freq: return 'y' elif 'D' in df.index.inferred_freq: return 'd' else: return 'd'
import pandas as pd from awrams.models import awral from awrams.simulation.ondemand import OnDemandSimulator from .evaluators import Evaluator from .objectives import NSE from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('calibrate') MODEL = None input_map = None objective = NSE def set_model(model=awral): global MODEL, input_map MODEL = model input_map = model.get_default_mapping() set_model() def get_parameter_df(mapping): params = [(k, v) for (k, v) in mapping.items() if 'Min' in v.properties] params = [{ 'Name': k, 'Min': v.properties['Min'], 'Max': v.properties['Max'], 'Value': v.args['value']
import numpy as np import pandas as pd from awrams.utils.awrams_log import get_module_logger import multiprocessing as mp from awrams.utils.messaging.binding import MessageHandler, MultiprocessingParent, QueueChild, bound_proxy from awrams.utils.messaging.general import message import time logger = get_module_logger('SCE') class ShuffledOptimizer: def __init__(self, complex_sz, n_complexes, parameters, eval_fac, min_complexes=1): ''' s : pop_size (initial population) m : complex size p : number of complexes pmin : minimum number of complexes ''' self.complex_sz = complex_sz self.n_complexes = n_complexes self._n_complexes = n_complexes self.pop_size = complex_sz * n_complexes self.parameters = parameters self.min_complexes = min_complexes
import re import sys import numpy as np import datetime as dt from awrams.utils.metatypes import ObjectDict from awrams.utils.helpers import iround # +++ Should probably stop logging from this file... from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('netcdf_wrapper') from awrams.utils.settings import DEFAULT_CHUNKSIZE, VAR_CHUNK_CACHE_SIZE, VAR_CHUNK_CACHE_NELEMS, VAR_CHUNK_CACHE_PREEMPTION, VARIABLE_PRECISION, DEFAULT_PRECISION #pylint: disable=no-name-in-module def set_chunk_cache(dataset, variable, **params): p = dict(var_chunk_cache_size=VAR_CHUNK_CACHE_SIZE, var_chunk_cache_nelems=VAR_CHUNK_CACHE_NELEMS, var_chunk_cache_preemption=VAR_CHUNK_CACHE_PREEMPTION) p.update(**params) dataset.variables[variable].set_var_chunk_cache( size=p['var_chunk_cache_size'], nelems=p['var_chunk_cache_nelems'], preemption=p['var_chunk_cache_preemption']) def dtype_for_variable(var): if var in VARIABLE_PRECISION: return np.dtype(VARIABLE_PRECISION[var]) else:
import numpy as np import datetime as dt from awrams.utils.ts.time_series_infilling import FillWithZeros, FillWithClimatology from .test_gridded_time_series import create_mock_data, create_dummy_dataset from nose.tools import with_setup, raises, assert_list_equal, assert_equal from awrams.utils.awrams_log import get_module_logger logger = get_module_logger('test_data_infilling') def setup_gappy(): global dataset dataset = create_dummy_dataset(opener=mock_open_gappy_data) def mock_open_gappy_data(self, fn): return create_gappy_data(fn) def create_gappy_data(fn): # insert_gaps method used because broadcasting doesn't seem to work # with diskless netCDF files with python-netCDF4 def insert_gaps(array): array[5, :, :] = np.nan array[10, 20, 30] = np.nan array[15:30, 20, 30] = np.nan array[50, 10:18, 30] = np.nan data = create_mock_data(fn, data_modifier=insert_gaps) # data.variables['temp_min'][5,:,:] = np.nan # data.variables['temp_min'][10,20,30] = np.nan # data.variables['temp_min'][15:30,20,30] = np.nan