def __importConfigFile(self, file): """Import configuration (.ini) file.""" # search definition file if os.path.isfile(file): configFile = file elif os.path.isfile(self.__basepath['workspace'] + file): configFile = self.__basepath['workspace'] + file else: nemoa.log('warning', "configuration file '%s' does not exist!" % (file)) return False # logger info nemoa.log("parsing configuration file: '" + configFile + "'") nemoa.setLog(indent='+1') # import and register objects without testing importer = configFileImporter(self) objConfList = importer.load(configFile) for objConf in objConfList: self.__addObjToStore(objConf) self.__check(objConfList) nemoa.setLog(indent='-1') return True
def _is_compatible_lff(self): """Test compatibility to layered feed forward networks. Returns: Boolean value which is True if the following conditions are satisfied: (1) All layers of the network are not empty (2) The first and last layer of the network are visible, the layers between them are hidden """ # test if network contains empty layers for layer in self._get_layers(): if not len(self._get_layer(layer)['nodes']) > 0: return nemoa.log( 'error', """Feedforward networks do not allow empty layers.""") # test if and only if the first and the last layer are visible for layer in self._get_layers(): if not self._get_layer(layer)['visible'] \ == (layer in [self._get_layers()[0], self._get_layers()[-1]]): return nemoa.log( 'error', """The first and the last layer of a Feedforward network have to be visible, middle layers have to be hidden!""") return True
def stratifyData(self, algorithm = 'auto'): """Stratify data. Keyword arguments: algorithm -- name of algorithm used for stratification 'none': probabilities of sources are number of all samples / number of samples in source 'auto': probabilities of sources are hierarchical distributed as defined in the configuration 'equal': probabilities of sources are 1 / number of sources""" nemoa.log('stratify data using \'%s\'' % (algorithm)) if algorithm.lower() in ['none']: allSize = 0 for src in self.data: allSize += self.data[src]['array'].shape[0] for src in self.data: self.data[src]['fraction'] = \ float(allSize) / float(self.data[src]['array'].shape[0]) return True if algorithm.lower() in ['auto']: return True if algorithm.lower() in ['equal']: frac = 1.0 / float(len(self.data)) for src in self.data: self.data[src]['fraction'] = frac return True return False
def __importModelFromFile(self, file): """Return new model instance and set configuration and parameters from file.""" nemoa.log('import model from file') nemoa.setLog(indent = '+1') # check file if not os.path.exists(file): if os.path.exists( nemoa.workspace.path('models') + file + '.mp'): file = nemoa.workspace.path('models') + file + '.mp' else: return nemoa.log('error', """ could not load model '%s': file does not exist.""" % file) # load model parameters and configuration from file nemoa.log('load model: \'%s\'' % file) modelDict = nemoa.common.dictFromFile(file) model = self.__getModelInstance( name = modelDict['config']['name'], config = modelDict['config'], dataset = modelDict['dataset']['cfg'], network = modelDict['network']['cfg'], system = modelDict['system']['config']) if nemoa.type.isModel(model): model._set(modelDict) else: return None nemoa.setLog(indent = '-1') return model
def get(self, type=None, name=None, merge=['params'], params=None, id=None): """Return configuration as dictionary for given object.""" if not type in self.__store.keys(): return nemoa.log( 'warning', """ could not get configuration: object class '%s' is not known """ % type) # search 'name' or 'id' in 'section' or take first entry cfg = None # get configuration from type and name if name: if not name in self.__store[type].keys(): return False cfg = self.__store[type][name].copy() # get configuration from type and id elif id: for name in self.__store[type].keys(): if not self.__store[type][name]['id'] == id: continue cfg = self.__store[type][name] if cfg == None: return nemoa.log( 'warning', """ could not get configuration: no %s with id %i could be found """ % (type, id)) # could not identify configuration else: return nemoa.log( 'warning', "could not get configuration: 'id' or 'name' of object is needed!" ) if not cfg: return None # optionaly merge sub dictionaries # defined by a list of keys and a dictionary if params == None \ or not isinstance(params, dict)\ or not nemoa.common.isList(merge): return cfg subMerge = cfg for key in merge: if not isinstance(subMerge, dict): return cfg if not key in subMerge.keys(): subMerge[key] = {} subMerge = subMerge[key] for key in params.keys(): subMerge[key] = params[key] cfg['id'] += self.__getObjIDByName('.'.join(merge) + '.' + key, params[key]) return cfg
def __importModelFromFile(self, file): """Return new model instance and set configuration and parameters from file.""" nemoa.log('import model from file') nemoa.setLog(indent='+1') # check file if not os.path.exists(file): if os.path.exists(nemoa.workspace.path('models') + file + '.mp'): file = nemoa.workspace.path('models') + file + '.mp' else: return nemoa.log( 'error', """ could not load model '%s': file does not exist.""" % file) # load model parameters and configuration from file nemoa.log('load model: \'%s\'' % file) modelDict = nemoa.common.dictFromFile(file) model = self.__getModelInstance(name=modelDict['config']['name'], config=modelDict['config'], dataset=modelDict['dataset']['cfg'], network=modelDict['network']['cfg'], system=modelDict['system']['config']) if nemoa.type.isModel(model): model._set(modelDict) else: return None nemoa.setLog(indent='-1') return model
def save(network, path=None, filetype=None, plot=None, **kwargs): # test if filetype is supported if not filetype in filetypes(): return nemoa.log( 'error', """could not create plot: filetype '%s' is not supported.""" % filetype) # get class for plotting from attribute 'plot' if not plot: plot = 'graph' class_name = plot.lower().title() module_name = save.__module__ try: module = importlib.import_module(module_name) if not hasattr(module, class_name): raise ImportError() except ImportError: return nemoa.log( 'error', """could not plot network '%s': plot type '%s' is not supported.""" % (network.name, plot)) # create plot instance plot = getattr(module, class_name)(**kwargs) # create plot and save to file if plot.create(network): plot.save(path) # clear figures and release memory plot.release() return path
def _isNetworkDBNCompatible(self, network=None): """Check if a network is compatible to deep beliefe networks. Keyword Arguments: network -- nemoa network instance Description: Return True if the following conditions are satisfied: (1) The network is MPL compatible (2) The network contains an odd number of layers (3) The hidden layers are symmetric to the central layer related to their number of nodes""" if not nemoa.type.isNetwork(network): return nemoa.log("error", "could not test network: invalid network instance given!") if not self._isNetworkMLPCompatible(network): return False if not len(network.layers()) % 2 == 1: return nemoa.log("error", "DBN / Autoencoder networks expect an odd number of layers!") layers = network.layers() size = len(layers) for id in range(1, (size - 1) / 2): symmetric = len(network.layer(layers[id])["nodes"]) == len(network.layer(layers[-id - 1])["nodes"]) if not symmetric: return nemoa.log( "error", """DBN / Autoencoder networks expect a symmetric number of hidden nodes, related to their central layer!""", ) return True
def getFormatedData(self, data, cols = '*', output = 'array'): """Return data in given format. Keyword Arguments: cols -- name of column group default: value '*' does not filter columns format""" # check columns if cols == '*': cols = self.getColLabels() elif not len(cols) == len(set(cols)): return nemoa.log('error', 'could not retrieve data: columns are not unique!') elif [c for c in cols if c not in self.getColLabels()]: \ return nemoa.log('error', 'could not retrieve data: unknown columns!') # check format if isinstance(output, str): fmtTuple = (output, ) elif isinstance(output, tuple): fmtTuple = output else: return nemoa.log('error', "could not retrieve data: invalid 'format' argument!") # format data retTuple = () for fmtStr in fmtTuple: if fmtStr == 'array': retTuple += ( data[cols].view('<f8').reshape(data.size, len(cols)), ) elif fmtStr == 'recarray': retTuple += ( data[['label'] + cols], ) elif fmtStr == 'cols': retTuple += ( [col.split(':')[1] for col in cols], ) elif fmtStr in ['rows', 'list']: retTuple += ( data['label'].tolist(), ) if isinstance(output, str): return retTuple[0] return retTuple
def _set(self, dict): """Set configuration, parameters and data of model from given dictionary return true if no error occured""" # get config from dict config = self.importConfigFromDict(dict) # check self if not nemoa.type.isDataset(self.dataset): return nemoa.log('error', 'could not configure dataset: model does not contain dataset instance!') if not nemoa.type.isNetwork(self.network): return nemoa.log('error', 'could not configure network: model does not contain network instance!') if not nemoa.type.isSystem(self.system): return nemoa.log('error', 'could not configure system: model does not contain system instance!') self.__config = config['config'].copy() self.network._set(**config['network']) self.dataset._set(**config['dataset']) ## prepare if not 'update' in config['system']['config']: config['system']['config']['update'] = {'A': False} ## 2do system._set(...) shall create system ## and do something like self.configure ... # create system self.system = nemoa.system.new( config = config['system']['config'].copy(), network = self.network, dataset = self.dataset ) self.system._set(**config['system']) return self
def __addObjToStore(self, objConf): """link object configuration to object dictionary.""" if not isinstance(objConf, dict): return False type = objConf['class'] name = objConf['name'] config = objConf['config'] nemoa.log('adding %s: %s' % (type, name)) key = None objID = 0 if not type in self.__store.keys(): nemoa.log('error', 'could not register object \'%s\': not supported object type \'%s\'!' % (name, type)) return False key = self.__getNewKey(self.__store[type], name) objID = self.__getObjIDByName(type, key) # add configuration to object tree self.__store[type][key] = config self.__store[type][key]['id'] = objID # add entry to index self.__index[objID] = {'type': type, 'name': key, 'project': objConf['project']} return objID
def _get_default(self, key=None, *args, **kwargs): """Get default value.""" import copy if not key: return copy.deepcopy(self._config['default']) elif not key in self._config['default']: return nemoa.log( 'error', """could not get default value: key '%s' is not valid.""" % key) retval = self._config['default'][key] parent = key while args: if not isinstance(args[0], str) \ or not args[0] in retval: return nemoa.log( 'error', """could not get default value: '%s' does not contain key '%s'.""" % (parent, args[0])) parent = args[0] retval = retval[args[0]] args = args[1:] if isinstance(retval, dict): return copy.deepcopy(retval) return retval
def __importConfigFile(self, file): """Import configuration (.ini) file.""" # search definition file if os.path.isfile(file): configFile = file elif os.path.isfile(self.__basepath['workspace'] + file): configFile = self.__basepath['workspace'] + file else: nemoa.log('warning', "configuration file '%s' does not exist!" % (file)) return False # logger info nemoa.log("parsing configuration file: '" + configFile + "'") nemoa.setLog(indent = '+1') # import and register objects without testing importer = configFileImporter(self) objConfList = importer.load(configFile) for objConf in objConfList: self.__addObjToStore(objConf) self.__check(objConfList) nemoa.setLog(indent = '-1') return True
def _isNetworkMLPCompatible(self, network=None): """Check if a network is compatible to multilayer perceptrons. Keyword Arguments: network -- nemoa network instance Description: Return True if the following conditions are satisfied: (1) The network contains at least three layers (2) All layers of the network are not empty (3) The first and last layer of the network are visible, all middle layers of the network are hidden""" if not nemoa.type.isNetwork(network): return nemoa.log("error", "could not test network: invalid network instance given!") if len(network.layers()) < 3: return nemoa.log("error", "Multilayer networks need at least three layers!") for layer in network.layers(): if not len(network.layer(layer)["nodes"]) > 0: return nemoa.log("error", "Feedforward networks do not allow empty layers!") if not network.layer(layer)["visible"] == (layer in [network.layers()[0], network.layers()[-1]]): return nemoa.log( "error", """The first and the last layer of a multilayer feedforward network have to be visible, middle layers have to be hidden!""", ) return True
def _graph_decode(graph): # no decoding if not 'coding' in graph.graph \ or not graph.graph['coding'] \ or graph.graph['coding'].lower() == 'none': return graph # base64 decoding elif graph.graph['coding'] == 'base64': graph.graph['params'] = nemoa.common.compress.loads( graph.graph['params'], encode='base64') for node in graph.nodes(): graph.node[node]['params'] = nemoa.common.compress.loads( graph.node[node]['params'], encode='base64') for edge in graph.edges(): graph.edges[edge]['params'] = nemoa.common.compress.loads( graph.edges[edge]['params'], encode='base64') graph.graph['coding'] == 'none' return graph else: nemoa.log( 'error', """could not decode graph parameters: unsupported coding '%s'.""" % coding) return {}
def csvGetData(self, name): conf = self.cfg['table'][name]['source'] file = conf['file'] delim = conf['delimiter'] if 'delimiter' in conf \ else nemoa.common.csvGetDelimiter(file) cols = conf['usecols'] names = tuple(self.getColLabels()) formats = tuple(['<f8' for x in names]) if not 'rows' in conf or conf['rows']: cols = (0,) + cols names = ('label',) + names formats = ('<U12',) + formats dtype = {'names': names, 'formats': formats} nemoa.log("import data from csv file: " + file) try: #data = numpy.genfromtxt(file, skiprows = 1, delimiter = delim, #usecols = cols, dtype = dtype) data = numpy.loadtxt(file, skiprows = 1, delimiter = delim, usecols = cols, dtype = dtype) except: nemoa.log('error', 'could not import data from file!') return None return data
def _is_compatible_dbn(self): """Test compatibility to deep beliefe networks. Returns: Boolean value which is True if the following conditions are satisfied: (1) The network is compatible to multilayer feedforward networks: see function _is_compatible_mlff (2) The network contains an odd number of layers (3) The hidden layers are symmetric to the central middle layer related to their number of nodes """ # test if network is MLFF compatible if not self._is_compatible_mlff(): return False # test if the network contains odd number of layers if not len(self._get_layers()) % 2 == 1: return nemoa.log( 'error', """DBNs expect an odd number of layers.""") # test if the hidden layers are symmetric layers = self._get_layers() size = len(layers) for lid in range(1, (size - 1) / 2): symmetric = len(self._get_layer(layers[lid])['nodes']) \ == len(self._get_layer(layers[-lid-1])['nodes']) if not symmetric: return nemoa.log( 'error', """DBNs expect a symmetric number of hidden nodes, related to their central middle layer!""") return True
def __confDataset(self, dataset = None, network = None, **kwargs): """Configure model.dataset to given dataset and network. Keyword Arguments: dataset -- dataset instance network -- network instance """ dataset = self.dataset network = self.network # link dataset instance if nemoa.type.isDataset(dataset): self.dataset = dataset # check if dataset instance is valid if not nemoa.type.isDataset(self.dataset): nemoa.log('error', 'could not configure dataset: no dataset instance available!') return False # check if dataset is empty if self.dataset.isEmpty(): return True # prepare params if not network and not self.network: nemoa.log('error', 'could not configure dataset: no network instance available!') return False return self.dataset.configure(network = network \ if not network == None else self.network)
def initialize(self): """Initialize model parameters.""" if not nemoa.common.type.isdataset(self.dataset): return nemoa.log( 'error', """could not initialize model: dataset is not valid.""") if not nemoa.common.type.isnetwork(self.network): return nemoa.log( 'error', """could not initialize model: network is not valid.""") if not nemoa.common.type.issystem(self.system): return nemoa.log( 'error', """could not initialize model: system is not valid.""") retval = True # initialize dataset to system including normalization retval &= bool(self.dataset.initialize(self.system)) # initialize system parameters by using statistics from dataset retval &= bool(self.system.initialize(self.dataset)) # initialize network parameters with system parameters retval &= bool(self.network.initialize(self.system)) return retval
def getUnitRelation(self, dataset, stat = 10000, units = None, relation = 'correlation()', format = 'array', **kwargs): # get mapping, data and units mapping = self.getMapping() data = dataset.getData(cols = (mapping[0], mapping[-1])) units = self._assertUnitTuple(units) # get method and method specific parameters method, ukwargs = nemoa.common.strSplitParams(relation) params = nemoa.common.dictMerge(ukwargs, kwargs) # get relation as numpy array if method == 'correlation': M = self.getUnitCorrelation(\ units = units, data = data, **params) elif method == 'interaction': M = self.getUnitInteraction(\ units = units, data = data, mapping = mapping, **params) elif method == 'knockout': M = self.getUnitKnockout(\ units = units, data = data, mapping = mapping, **params) else: return nemoa.log('error', "could not evaluate unit relations: unknown relation '%s'" % (method)) # transform matrix if 'transform' in params: if 'C' in params['transform']: C = self.getUnitCorrelationMatrix(units = units, data = data) try: T = eval(params['transform']) M = T except: return nemoa.log('error', 'could not transform unit relation matrix: invalid syntax!') # create formated output if format == 'array': return M if format == 'dict': return nemoa.common.dictFromArray(M, units)
def loadProject(self, project): """Import configuration files from user project.""" nemoa.log('import private resources') nemoa.setLog(indent='+1') # check if project exists if not project in self.__listUserWorkspaces(): nemoa.log( 'warning', """ could not open project '%s': project folder could not be found in '%s'! """ % (project, self.__basepath['user'])) return False # set project self.__project = project # update paths self.__updatePaths(base='user') # update path of cache self.__updateCachePaths() # update logger to current logfile nemoa.initLogger(logfile=self.__path['logfile']) # import object configurations for current project self.__scanForConfigFiles() self.__scanForScripts() self.__scanForNetworks() nemoa.setLog(indent='-1') return True
def _isNetworkDBNCompatible(self, network = None): """Check if a network is compatible to deep beliefe networks. Keyword Arguments: network -- nemoa network instance Description: Return True if the following conditions are satisfied: (1) The network is MPL compatible (2) The network contains an odd number of layers (3) The hidden layers are symmetric to the central layer related to their number of nodes""" if not nemoa.type.isNetwork(network): return nemoa.log('error', 'could not test network: invalid network instance given!') if not self._isNetworkMLPCompatible(network): return False if not len(network.layers()) % 2 == 1: return nemoa.log('error', 'DBN / Autoencoder networks expect an odd number of layers!') layers = network.layers() size = len(layers) for id in range(1, (size - 1) / 2): symmetric = len(network.layer(layers[id])['nodes']) \ == len(network.layer(layers[-id-1])['nodes']) if not symmetric: return nemoa.log('error', """DBN / Autoencoder networks expect a symmetric number of hidden nodes, related to their central layer!""") return True
def _isDatasetGaussNormalized(self, dataset = None): """Test if a dataset contains gauss normalized data. Keyword Arguments: dataset -- nemoa dataset instance Description: Return True if the following conditions are satisfied: (1) The absolute mean value of a given number of random samples of the dataset is below a given maximum (default 0.05) (2) The standard deviation of a given number of random samples of the dataset is below a given maximum (default 1.05)""" if not nemoa.type.isDataset(dataset): return nemoa.log('error', 'could not test dataset: invalid dataset instance given!') size = 100000 # number of samples maxMean = 0.05 # allowed maximum for absolute mean value maxSdev = 1.05 # allowed maximum for standard deviation data = dataset.getData(size) mean = data.mean() if numpy.abs(mean) >= maxMean: return nemoa.log('error', """ The dataset does not contain gauss normalized data: The mean value is %.3f!""" % (mean)) sdev = data.std() if sdev >= maxSdev: return nemoa.log('error', """ The dataset does not contain gauss normalized data: The standard deviation is %.3f!""" % (sdev)) return True
def get_plot(dataset, func=None, plot=None, **kwargs): import importlib # get evaluation function fname = func or 'sample' fdict = dataset.get('algorithm', fname) if not isinstance(fdict, dict): return nemoa.log( 'error', "could not plot dataset '%s': " "dataset evaluation function '%s' " "is not supported." % (dataset.name, fname)) # get plot type plot = plot or fdict.get('plot', None) or 'histogram' # get plot class and module name cname = plot.lower().title() mname = save.__module__ try: module = importlib.import_module(mname) if not hasattr(module, cname): raise ImportError() except ImportError: return nemoa.log( 'error', "could not plot dataset '%s': " "plot type '%s' is not supported." % (dataset.name, plot)) # create plot plot = getattr(module, cname)(func=fname, **kwargs) if plot.create(dataset): return plot plot.release() return None
def _isNetworkMLPCompatible(self, network = None): """Check if a network is compatible to multilayer perceptrons. Keyword Arguments: network -- nemoa network instance Description: Return True if the following conditions are satisfied: (1) The network contains at least three layers (2) All layers of the network are not empty (3) The first and last layer of the network are visible, all middle layers of the network are hidden""" if not nemoa.type.isNetwork(network): return nemoa.log('error', 'could not test network: invalid network instance given!') if len(network.layers()) < 3: return nemoa.log('error', 'Multilayer networks need at least three layers!') for layer in network.layers(): if not len(network.layer(layer)['nodes']) > 0: return nemoa.log( 'error', 'Feedforward networks do not allow empty layers!') if not network.layer(layer)['visible'] \ == (layer in [network.layers()[0], network.layers()[-1]]): return nemoa.log('error', """The first and the last layer of a multilayer feedforward network have to be visible, middle layers have to be hidden!""") return True
def loadProject(self, project): """Import configuration files from user project.""" nemoa.log('import private resources') nemoa.setLog(indent = '+1') # check if project exists if not project in self.__listUserWorkspaces(): nemoa.log('warning', """ could not open project '%s': project folder could not be found in '%s'! """ % (project, self.__basepath['user'])) return False # set project self.__project = project # update paths self.__updatePaths(base = 'user') # update path of cache self.__updateCachePaths() # update logger to current logfile nemoa.initLogger(logfile = self.__path['logfile']) # import object configurations for current project self.__scanForConfigFiles() self.__scanForScripts() self.__scanForNetworks() nemoa.setLog(indent = '-1') return True
def __checkSystemConf(self, objConf): """Check and update system configuration""" type = objConf['class'] name = objConf['name'] conf = objConf['config'] if not 'package' in conf: return nemoa.log( 'warning', "skipping system '" + name + "': missing parameter 'package'!") if not 'class' in conf: return nemoa.log( 'warning', "skipping system '" + name + "': missing parameter 'class'!") conf['description'] = conf['description'].replace('\n', '') \ if 'description' in conf else conf['name'] # check if system exists try: exec "import nemoa.system." + conf['package'] except: return nemoa.log( 'warning', "skipping system '%s': package 'system.%s' could not be found!" % (name, conf['package'])) return objConf
def normalizeData(self, algorithm = 'gauss'): """Normalize stratified data Keyword arguments: algorithm -- name of algorithm used for data normalization 'gauss', 'z-trans': Gaussian normalization (aka z-transformation)""" nemoa.log('normalize data using \'%s\'' % (algorithm)) if algorithm.lower() in ['gauss', 'z-trans']: # get data for calculation of mean and variance # for single source datasets take all data # for multi source datasets take a big bunch of stratified data if len(self.data.keys()) > 1: data = \ self.getData(size = 1000000, output = 'recarray') else: data = self.getSingleSourceData(source = self.data.keys()[0]) # iterative update sources # get mean and standard deviation per column (recarray) # and update the values for src in self.data: if self.data[src]['array'] == None: continue for col in self.data[src]['array'].dtype.names[1:]: self.data[src]['array'][col] = \ (self.data[src]['array'][col] - data[col].mean()) \ / data[col].std() return True return False
def load(self, path): """Return network configuration as dictionary. Args: path: configuration file used to generate network configuration dictionary. """ structure = {'network': {'type': 'str'}} network = nemoa.common.inifile.load(path, structure) if not network \ or not 'network' in network \ or not 'type' in network['network']: return nemoa.log( 'error', """could not import network: configuration file '%s' is not valid.""" % path) if network['network']['type'] in \ ['layer.MultiLayer', 'layer.Shallow', 'layer.Factor']: return self._parse_layer_network(path) return nemoa.log( 'error', """could not import network configuration file '%s' contains unsupported network type '%s'.""" % (path, network['network']['type']))
def __createNewModel(self, name, config=None, dataset=None, network=None, system=None, configure=True, initialize=True): nemoa.log('create new model') nemoa.setLog(indent='+1') model = self.__getModelInstance(name=name, config=config, dataset=dataset, network=network, system=system) if not nemoa.type.isModel(model): nemoa.log('error', 'could not create new model!') nemoa.setLog(indent='-1') return False # configure model if configure: model.configure() # initialize model parameters if initialize: model.initialize() nemoa.setLog(indent='-1') return model
def _install_pkg(self, pkg=None): if not pkg: nemoa.log('note', """trying to install bioconductor base""") else: nemoa.log( 'note', """trying to install bioconductor package: '%s'""" % pkg) # try to evaluate the remote R script biocLite() bioclite = "https://bioconductor.org/biocLite.R" sysstout = sys.stdout try: sys.stdout = NullDevice() from rpy2.robjects.packages import importr base = importr('base') base.source(bioclite) base.require('biocLite') sys.stdout = sysstout except: sys.stdout = sysstout return nemoa.log( 'error', """could not evaluate remote R script: '%s'""" % bioclite) # try to install bioconductor packages with biocLite() if not pkg: return self._exec_rcmd("biocLite()") return self._exec_rcmd("biocLite('%s')" % pkg)
def convert(list, input, output = None, filter = False, quiet = True): genericClasses = ['number', 'string', 'float'] if isinstance(list, (numpy.ndarray)): list = list.tolist() inputDtype = 'nparray' else: inputDtype = 'list' # 'input' if input in genericClasses: inputClass = 'generic' inputFormat = input elif ':' in input: inputClass = input.lower().split(':')[0].strip() inputFormat = input.lower().split(':')[1].strip() else: nemoa.log('warning', 'could not convert list: unknown input format "' + input + '"!') return None # 'output' if output in genericClasses: outputClass = 'generic' outputFormat = output elif output == None: outputClass = inputClass outputFormat = None elif ':' in input: outputClass = output.lower().split(':')[0].strip() outputFormat = output.lower().split(':')[1].strip() else: mp_warnung('could not convert list: unknown output format "' + output + '"!') return None # 'input' vs 'output' if inputClass != outputClass: nemoa.log('warning', '"' + inputClass + '" can not be converted to "' + outputClass + '"') return None # trivial cases if inputClass == 'generic' or inputFormat == outputFormat: if inputDtype == 'nparray': return numpy.asarray(list), numpy.asarray([]) else: return list, [] # import annotation module modName = inputClass.lower() import importlib module = importlib.import_module('nemoa.annotation.' + modName) converter = getattr(module, modName)() outList, outLost = converter.convert_list(list, inputFormat, outputFormat, filter, quiet = quiet) if inputDtype == 'nparray': return numpy.asarray(outList), numpy.asarray(outLost) return outList, outLost
def interactive(): try: call(['ipython', 'nemoa.interactive.py', '-i']) except: nemoa.log( 'error', """ could not start interactive nemoa shell: you have to install ipython!""")
def getdelim(path, delimiters = [',', ';', '\t', ' '], minprobesize = 3, maxprobesize = 100): """Get delimiter from CSV file. Args: path (string): file path to CSV file. delimiters (list, optional): list of strings containing delimiter candidates to search for. minprobesize (integer, optional): minimum number of non comment, non empty lines used to detect csv delimiter. maxprobesize (integer, optional): maximum number of non comment, non empty lines used to detect csv delimiter. Returns: String containing delimiter of CSV file or False if delimiter could not be detected. """ # check file if not os.path.isfile(path): return nemoa.log('error', """could not determine delimiter: file '%s' does not exist.""" % path) delimiter = None with open(path, 'r') as csvfile: lines = 1 probe = '' for line in csvfile: # check termination criteria if bool(delimiter) or lines > maxprobesize: break # check exclusion criteria sline = line.lstrip(' ') if sline.startswith('#') or sline in ['\n', '\r\n']: continue # increase probe probe += line lines += 1 # try to detect delimiter of probe if lines > minprobesize: try: dialect = csv.Sniffer().sniff(probe, delimiters) except: continue delimiter = dialect.delimiter if not delimiter: return nemoa.log('warning', """could not determine delimiter of csv file '%s'!""" % path) return delimiter
def interactive(): try: call(["ipython", "nemoa.interactive.py", "-i"]) except: nemoa.log( "error", """ could not start interactive nemoa shell: you have to install ipython!""", )
def _get_edge(self, edge): if not isinstance(edge, tuple): return nemoa.log( 'error', """could not get edge: edge '%s' is unkown.""" % (edge)) if not edge in self._graph.edges: return nemoa.log( 'error', """could not get edge: edge ('%s', '%s') is unkown.""" % edge) return self._graph.edges[edge]
def __checkNetworkConf(self, objConf): """Check and update network configuration.""" type = objConf['class'] name = objConf['name'] conf = objConf['config'] # create 'layer', 'visible' and 'hidden' from 'layers' if 'layers' in conf: conf['layer'] = [] conf['visible'] = [] conf['hidden'] = [] for layer in conf['layers']: if '=' in layer: layerName = layer.split('=')[0].strip() layerType = layer.split('=')[1].strip().lower() else: layerName = layer.strip() layerType = 'visible' conf['layer'].append(layerName) if layerType == 'visible': conf['visible'].append(layerName) else: conf['hidden'].append(layerName) del conf['layers'] # get config from source file if 'source' in conf: if not 'file' in conf['source']: return nemoa.log( 'warning', "skipping network '" + name + "': " "missing source information! (parameter: 'source:file')") file = conf['source']['file'] if not self.getPath(file, check=True): return nemoa.log( 'warning', "skipping network '%s': file '%s' not found!" % (name, file)) objConf['config']['source']['file'] = self.getPath(file) if not 'file_format' in conf['source']: objConf['config']['source'][ 'file_format'] = nemoa.common.getFileExt(file) format = objConf['config']['source']['file_format'] # get network config networkCfg = self.__registerNetwork(file, format) if not networkCfg: return nemoa.log('warning', "skipping network '%s'" % (name)) for key in networkCfg: objConf['config'][key] = networkCfg[key] return objConf
def __triggerKeyEvent(self): """Check Keyboard.""" c = nemoa.common.getch() if isinstance(c, str): if c == "q": nemoa.log("note", "... aborting optimization") self.__system._config["optimize"]["updates"] = self.__state["epoch"] self.__state["abort"] = True return True
def __init__(self): stdout = sys.stdout try: sys.stdout = NullDevice() import rpy2.robjects self.robjects = rpy2.robjects sys.stdout = stdout except: sys.stdout = stdout nemoa.log('error', "could not import python package rpy2!")
def __getObjByID(self, id): """Return object from store by id.""" if not id in self.__index: nemoa.log('warning', '2DO') return None oClass = self.__index[id]['type'] oName = self.__index[id]['name'] oPrj = self.__index[id]['project'] oConf = self.__store[oClass][oName].copy() return {'class': oClass, 'name': oName, 'project': oPrj, 'config': oConf}
def optimize(self, schedule = None, **kwargs): """Optimize system parameters.""" nemoa.log('optimize model') nemoa.setLog(indent = '+1') # check if model is empty if self.isEmpty(): nemoa.log('warning', "empty models can not be optimized!") nemoa.setLog(indent = '-1') return self # check if model is configured if not self.__isConfigured(): nemoa.log('error', 'could not optimize model: model is not yet configured!') nemoa.setLog(indent = '-1') return False # get optimization schedule if schedule == None: schedule = self.system.getType() + '.default' elif not '.' in schedule: schedule = \ self.system.getType() + '.' + schedule schedule = nemoa.workspace.getConfig( type = 'schedule', config = schedule, merge = ['params', self.system.getType()], **kwargs) if not schedule: nemoa.log('error', """ could not optimize system parameters: optimization schedule is not valid!""") nemoa.setLog(indent = '-1') return self # optimization of system parameters nemoa.log("starting optimization schedule: '%s'" % (schedule['name'])) nemoa.setLog(indent = '+1') # 2DO: find better solution for multistage optimization if 'stage' in schedule and len(schedule['stage']) > 0: for stage, params in enumerate(config['stage']): self.system.optimizeParams(self.dataset, **params) elif 'params' in schedule: self.system.optimizeParams( dataset = self.dataset, schedule = schedule) nemoa.setLog(indent = '-1') # update network self.system.updateNetwork(self.network) nemoa.setLog(indent = '-1') return self
def __triggerKeyEvent(self): """Check Keyboard.""" c = nemoa.common.getch() if isinstance(c, str): if c == 'q': nemoa.log('note', '... aborting optimization') self.__system._config['optimize']['updates'] = \ self.__state['epoch'] self.__state['abort'] = True return True
def convert(list, input, output = None, filter = False): generic_types = ['number', 'string', 'float'] if isinstance(list, (numpy.ndarray)): list = list.tolist() input_dtype = 'nparray' else: input_dtype = 'list' # 'input' if input in generic_types: input_class = 'generic' input_format = input elif ':' in input: input_class = input.lower().split(':')[0].strip() input_format = input.lower().split(':')[1].strip() else: return nemoa.log('warning', """could not convert list: unknown input format '%s'.""" % input) # 'output' if output in generic_types: output_class = 'generic' output_format = output elif not output: output_class = input_class output_format = None elif ':' in input: output_class = output.lower().split(':')[0].strip() output_format = output.lower().split(':')[1].strip() else: return nemoa.log('warning', """could not convert list: unknown output format '%s'.""" % output) # 'input' vs 'output' if input_class != output_class: return nemoa.log('warning', "'%s' can not be converted to '%s'" % (input_class, output_class)) # trivial cases if input_class == 'generic' or input_format == output_format: if input_dtype == 'nparray': return numpy.asarray(list), numpy.asarray([]) else: return list, [] # import annotation module module_name = input_class.lower() module = importlib.import_module('nemoa.dataset.commons.labels.' + module_name) converter = getattr(module, module_name)() output_list, output_lost = converter.convert_list( list, input_format, output_format, filter) if input_dtype == 'nparray': return numpy.asarray(output_list), numpy.asarray(output_lost) return output_list, output_lost
def getlabelcolumn(path, delimiter = None): """Get column id for column containing row labels from CSV file. Args: path (string): file path to CSV file. delimiter (string, optional): string containing CSV delimiter. Returns: Integer containing first index of CSV data column containing strings. """ # check file if not os.path.isfile(path): return nemoa.log('error', """could not get csv row label column id: file '%s' does not exist.""" % path) # get delimiter if not delimiter: delimiter = getdelim(path) if not delimiter: return nemoa.log('error', """could not get csv row label column id: unknown delimiter.""") # get first and second non comment and non empty line first = None second = None with open(path, 'r') as csvfile: for line in csvfile: # check exclusion criteria sline = line.lstrip(' ') if sline.startswith('#') or sline in ['\n', '\r\n']: continue if not first: first = line elif not second: second = line break if first == None or second == None: return nemoa.log('error', """could not get csv row label column id: file '%s' is not valid.""" % (path)) colvals = second.split(delimiter) colvals = [col.strip('\"\' \n') for col in colvals] for colid, colval in enumerate(colvals): try: float(colval) except ValueError: return colid return False
def __initState(self): epochTime = time.time() nemoa.log("note", "press 'q' if you want to abort the optimization") self.__state = {"startTime": epochTime, "epoch": 0, "inspection": None, "abort": False} if self.__inspect: self.__state["inspectTime"] = epochTime if self.__estimate: self.__state["estimateStarted"] = False self.__state["estimateEnded"] = False return True
def __getInstance(self, type = None, config = None, empty = False, **kwargs): """Return new instance of given object type and configuration.""" nemoa.log('create%s %s instance' % (' empty' if empty else '', type)) nemoa.setLog(indent = '+1') # import module module = importlib.import_module('nemoa.' + str(type)) # get objects configuration as dictionary config = nemoa.workspace.getConfig(type = type, config = config, **kwargs) if not isinstance(config, dict): nemoa.log('error', """could not create %s instance: unknown configuration!""" % (type)) nemoa.setLog(indent = '-1') return None # create and initialize new instance of given class instance = module.empty() if empty \ else module.new(config = config, **kwargs) # check instance class if not nemoa.type.isInstanceType(instance, type): nemoa.log('error', """could not create %s instance: invalid configuration!""" % (type)) nemoa.setLog(indent = '-1') return None nemoa.log('name of %s is: \'%s\'' % (type, instance.name())) nemoa.setLog(indent = '-1') return instance
def __checkNetworkConf(self, objConf): """Check and update network configuration.""" type = objConf['class'] name = objConf['name'] conf = objConf['config'] # create 'layer', 'visible' and 'hidden' from 'layers' if 'layers' in conf: conf['layer'] = [] conf['visible'] = [] conf['hidden'] = [] for layer in conf['layers']: if '=' in layer: layerName = layer.split('=')[0].strip() layerType = layer.split('=')[1].strip().lower() else: layerName = layer.strip() layerType = 'visible' conf['layer'].append(layerName) if layerType == 'visible': conf['visible'].append(layerName) else: conf['hidden'].append(layerName) del conf['layers'] # get config from source file if 'source' in conf: if not 'file' in conf['source']: return nemoa.log('warning', "skipping network '" + name + "': " "missing source information! (parameter: 'source:file')") file = conf['source']['file'] if not self.getPath(file, check = True): return nemoa.log('warning', "skipping network '%s': file '%s' not found!" % (name, file)) objConf['config']['source']['file'] = self.getPath(file) if not 'file_format' in conf['source']: objConf['config']['source']['file_format'] = nemoa.common.getFileExt(file) format = objConf['config']['source']['file_format'] # get network config networkCfg = self.__registerNetwork(file, format) if not networkCfg: return nemoa.log('warning', "skipping network '%s'" % (name)) for key in networkCfg: objConf['config'][key] = networkCfg[key] return objConf
def save_graph(self, file = None, format = 'gml'): if file == None: nemoa.log('critical', "no save path was given") # create path if not available if not os.path.exists(os.path.dirname(file)): os.makedirs(os.path.dirname(file)) # everythink seems to be fine # nemoa.log("saving graph to %s" % (file)) if format == 'gml': G = self.graph.copy() networkx.write_gml(G, file)
def configure(self): """Configure model.""" nemoa.log('configure model \'%s\'' % (self.name())) nemoa.setLog(indent = '+1') if not 'check' in self.__config: self.__config['check'] = {'dataset': False, 'network': False, 'System': False} self.__config['check']['dataset'] = \ self.dataset.configure(network = self.network) if not self.__config['check']['dataset']: nemoa.log('error', """could not configure model: dataset could not be configured!""") nemoa.setLog(indent = '-1') return False self.__config['check']['network'] = \ self.network.configure(dataset = self.dataset, system = self.system) if not self.__config['check']['network']: nemoa.log('error', """could not configure model: network could not be configured!""") nemoa.setLog(indent = '-1') return False self.__config['check']['system'] = \ self.system.configure(network = self.network, dataset = self.dataset) if not self.__config['check']['system']: nemoa.log('error', """could not configure model: system could not be configured!""") nemoa.setLog(indent = '-1') return False nemoa.setLog(indent = '-1') return True
def __delObjConf(self, objConf): """Unlink object configuration from object dictionary.""" if not objConf: return False id = self.__getObjIDByName(objConf['class'], objConf['name']) if not id in self.__index.keys(): nemoa.log('warning', '2do') return False # delete configuration from tree # delete entry in index self.__index.pop(id) return True
def _assertUnitTuple(self, units=None): """Return tuple with lists of valid input and output units.""" mapping = self.getMapping() if units == None: return (self.getUnits(group=mapping[0])[0], self.getUnits(group=mapping[-1])[0]) if isinstance(units[0], str): units[0] = self.getUnits(group=units[0])[0] elif isinstance(units[0], list): pass if isinstance(units[1], str): units[1] = self.getUnits(group=units[1])[0] elif isinstance(units[1], list): pass elif isinstance(units[0], list) and isinstance(units[1], list): # 2do: test if units are valid pass else: return nemoa.log( "error", """could not evaluate unit relations: parameter units has invalid format!""", ) return units
def getDataMean(data, norm="M"): """Return mean of data. Keyword Arguments: data -- numpy array containing data norm -- data mean norm 'M': Arithmetic Mean of Values 'ME': Mean of Errors 'MSE': Mean of Squared Errors 'RMSE', 'L2': Root Mean of Squared Errors / L2 Norm default: 'M' """ norm = norm.upper() # Mean of Values (M) if norm in ["M"]: return numpy.mean(data, axis=0) # Mean of Errors (ME) if norm in ["ME"]: return numpy.mean(numpy.abs(data), axis=0) # Mean of Squared Errors (MSE) if norm in ["MSE"]: return numpy.mean(data ** 2, axis=0) # Root Mean of Squared Errors (RMSE) / L2-Norm (L2) if norm in ["RMSE", "L2"]: return numpy.sqrt(numpy.mean(data ** 2, axis=0)) return nemoa.log("error", "unknown data mean norm '%s'" % (norm))
def getDataSum(data, norm="S"): """Return sum of data. Keyword Arguments: data -- numpy array containing data norm -- data mean norm 'S': Sum of Values 'SE', 'L1': Sum of Errors / L1 Norm 'SSE': Sum of Squared Errors 'RSSE': Root Sum of Squared Errors default: 'S' """ norm = norm.upper() # Sum of Values (S) if norm in ["S"]: return numpy.sum(data, axis=0) # Sum of Errors (SE) / L1-Norm (L1) if norm in ["SE", "L1"]: return numpy.sum(numpy.abs(data), axis=0) # Sum of Squared Errors (SSE) if norm in ["SSE"]: return numpy.sum(data ** 2, axis=0) # Root Sum of Squared Errors (RSSE) if norm in ["RSSE"]: return numpy.sqrt(numpy.sum(data ** 2, axis=0)) return nemoa.log("error", "unknown data sum norm '%s'" % (norm))
def getPath(self, str, check = False, create = False): """Resolve path.""" # clean up input string path = str.strip() # expand unix home directory path = os.path.expanduser(path) # expand nemoa environment variables path = self.__expandPath(path) # expand unix environment variables path = os.path.expandvars(path) # create directory if create: dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) # check path if check and not os.path.exists(path): return nemoa.log('warning', "directory '%s' does not exist!" % (path)) return path
def getConfig(type = None, config = None, merge = ['params'], **kwargs): """Return object configuration as dictionary.""" if config == None: return {} if isinstance(config, dict): return copy.deepcopy(config) elif not isinstance(config, str) or not isinstance(type, str): return False name, params = nemoa.common.strSplitParams(config) if 'params' in kwargs and isinstance(kwargs['params'], dict): params = nemoa.common.dictMerge(kwargs['params'], params) search = [name, '%s.%s' % (project(), name), name + '.default', 'base.' + name] if isinstance(config, str): search += [config, '%s.%s' % (project(), config), config + '.default', 'base.' + config] # get config if not name: name = project() + '.default' for cfgName in search: cfg = __shared['config'].get( type = type, name = cfgName, merge = merge, params = params) if isinstance(cfg, dict): return cfg return nemoa.log('error', """ could not get configuration: no %s with name '%s' could be found""" % (type, name))
def getCorruptedData(self, data, type = None, factor = 0.5): """Return numpy array with (partly) corrupted data. Keyword Arguments: type -- string describing algorithm for corruption 'mask': Masking Noise A fraction of every sample is forced to zero 'gauss': Gaussian Noise Additive isotropic Gaussian noise 'salt': Salt-and-pepper noise A fraction of every sample is forced to min or max with equal possibility factor -- float describing the strengt of the corruption The influence of the parameter depends on the type of the corruption""" if type in [None, 'none']: return data elif type == 'mask': return data * numpy.random.binomial( size = data.shape, n = 1, p = 1 - factor) elif type == 'gauss': return data + numpy.random.normal( size = data.shape, loc = 0.0, scale = factor) #2do!: implement salt and pepper noise #elif type == 'salt': return else: return nemoa.log('error', "unkown data corruption type '%s'!" % (type))