def __importModelFromFile(self, file): """Return new model instance and set configuration and parameters from file.""" nemoa.log('import model from file') nemoa.setLog(indent='+1') # check file if not os.path.exists(file): if os.path.exists(nemoa.workspace.path('models') + file + '.mp'): file = nemoa.workspace.path('models') + file + '.mp' else: return nemoa.log( 'error', """ could not load model '%s': file does not exist.""" % file) # load model parameters and configuration from file nemoa.log('load model: \'%s\'' % file) modelDict = nemoa.common.dictFromFile(file) model = self.__getModelInstance(name=modelDict['config']['name'], config=modelDict['config'], dataset=modelDict['dataset']['cfg'], network=modelDict['network']['cfg'], system=modelDict['system']['config']) if nemoa.type.isModel(model): model._set(modelDict) else: return None nemoa.setLog(indent='-1') return model
def loadProject(self, project): """Import configuration files from user project.""" nemoa.log('import private resources') nemoa.setLog(indent='+1') # check if project exists if not project in self.__listUserWorkspaces(): nemoa.log( 'warning', """ could not open project '%s': project folder could not be found in '%s'! """ % (project, self.__basepath['user'])) return False # set project self.__project = project # update paths self.__updatePaths(base='user') # update path of cache self.__updateCachePaths() # update logger to current logfile nemoa.initLogger(logfile=self.__path['logfile']) # import object configurations for current project self.__scanForConfigFiles() self.__scanForScripts() self.__scanForNetworks() nemoa.setLog(indent='-1') return True
def __importConfigFile(self, file): """Import configuration (.ini) file.""" # search definition file if os.path.isfile(file): configFile = file elif os.path.isfile(self.__basepath['workspace'] + file): configFile = self.__basepath['workspace'] + file else: nemoa.log('warning', "configuration file '%s' does not exist!" % (file)) return False # logger info nemoa.log("parsing configuration file: '" + configFile + "'") nemoa.setLog(indent = '+1') # import and register objects without testing importer = configFileImporter(self) objConfList = importer.load(configFile) for objConf in objConfList: self.__addObjToStore(objConf) self.__check(objConfList) nemoa.setLog(indent = '-1') return True
def configure(self): """Configure model.""" nemoa.log('configure model \'%s\'' % (self.name())) nemoa.setLog(indent = '+1') if not 'check' in self.__config: self.__config['check'] = {'dataset': False, 'network': False, 'System': False} self.__config['check']['dataset'] = \ self.dataset.configure(network = self.network) if not self.__config['check']['dataset']: nemoa.log('error', """could not configure model: dataset could not be configured!""") nemoa.setLog(indent = '-1') return False self.__config['check']['network'] = \ self.network.configure(dataset = self.dataset, system = self.system) if not self.__config['check']['network']: nemoa.log('error', """could not configure model: network could not be configured!""") nemoa.setLog(indent = '-1') return False self.__config['check']['system'] = \ self.system.configure(network = self.network, dataset = self.dataset) if not self.__config['check']['system']: nemoa.log('error', """could not configure model: system could not be configured!""") nemoa.setLog(indent = '-1') return False nemoa.setLog(indent = '-1') return True
def loadProject(self, project): """Import configuration files from user project.""" nemoa.log('import private resources') nemoa.setLog(indent = '+1') # check if project exists if not project in self.__listUserWorkspaces(): nemoa.log('warning', """ could not open project '%s': project folder could not be found in '%s'! """ % (project, self.__basepath['user'])) return False # set project self.__project = project # update paths self.__updatePaths(base = 'user') # update path of cache self.__updateCachePaths() # update logger to current logfile nemoa.initLogger(logfile = self.__path['logfile']) # import object configurations for current project self.__scanForConfigFiles() self.__scanForScripts() self.__scanForNetworks() nemoa.setLog(indent = '-1') return True
def __importConfigFile(self, file): """Import configuration (.ini) file.""" # search definition file if os.path.isfile(file): configFile = file elif os.path.isfile(self.__basepath['workspace'] + file): configFile = self.__basepath['workspace'] + file else: nemoa.log('warning', "configuration file '%s' does not exist!" % (file)) return False # logger info nemoa.log("parsing configuration file: '" + configFile + "'") nemoa.setLog(indent='+1') # import and register objects without testing importer = configFileImporter(self) objConfList = importer.load(configFile) for objConf in objConfList: self.__addObjToStore(objConf) self.__check(objConfList) nemoa.setLog(indent='-1') return True
def __importModelFromFile(self, file): """Return new model instance and set configuration and parameters from file.""" nemoa.log('import model from file') nemoa.setLog(indent = '+1') # check file if not os.path.exists(file): if os.path.exists( nemoa.workspace.path('models') + file + '.mp'): file = nemoa.workspace.path('models') + file + '.mp' else: return nemoa.log('error', """ could not load model '%s': file does not exist.""" % file) # load model parameters and configuration from file nemoa.log('load model: \'%s\'' % file) modelDict = nemoa.common.dictFromFile(file) model = self.__getModelInstance( name = modelDict['config']['name'], config = modelDict['config'], dataset = modelDict['dataset']['cfg'], network = modelDict['network']['cfg'], system = modelDict['system']['config']) if nemoa.type.isModel(model): model._set(modelDict) else: return None nemoa.setLog(indent = '-1') return model
def scripts(project): """Print list of scripts to standard output.""" logParams = nemoa.getLog() nemoa.setLog(mode='silent') workspace = nemoa.open(project) nemoa.setLog(mode=logParams['mode']) scripts = workspace.list(type='script', namespace=workspace.project()) print 'Scripts in project %s:\n' % (project) for script in scripts: print ' %s' % (script) print ''
def scripts(project): """Print list of scripts to standard output.""" logParams = nemoa.getLog() nemoa.setLog(mode="silent") workspace = nemoa.open(project) nemoa.setLog(mode=logParams["mode"]) scripts = workspace.list(type="script", namespace=workspace.project()) print "Scripts in project %s:\n" % (project) for script in scripts: print " %s" % (script) print ""
def __getModelInstance(self, name=None, config=None, dataset=None, network=None, system=None): """Return new model instance.""" nemoa.log('create model instance') nemoa.setLog(indent='+1') # create dataset instance (if not given) if not nemoa.type.isDataset(dataset): dataset = \ self.__getInstance(type = 'dataset', config = dataset) if not nemoa.type.isDataset(dataset): nemoa.log('error', 'could not create model instance: dataset is invalid!') nemoa.setLog(indent='-1') return None # create network instance (if not given) if network == None: network = {'type': 'auto'} if not nemoa.type.isNetwork(network): network = \ self.__getInstance(type = 'network', config = network) if not nemoa.type.isNetwork(network): nemoa.log('error', 'could not create model instance: network is invalid!') nemoa.setLog(indent='-1') return None # create system instance (if not given) if not nemoa.type.isSystem(system): system = \ self.__getInstance(type = 'system', config = system) if not nemoa.type.isSystem(system): nemoa.log('error', 'could not create model instance: system is invalid!') nemoa.setLog(indent='-1') return None # create name string (if not given) if name == None: name = '-'.join([dataset.name(), network.name(), system.name()]) # create model instance model = self.__getInstance(type='model', config=config, name=name, dataset=dataset, network=network, system=system) nemoa.setLog(indent='-1') return model
def __getInstance(self, type = None, config = None, empty = False, **kwargs): """Return new instance of given object type and configuration.""" nemoa.log('create%s %s instance' % (' empty' if empty else '', type)) nemoa.setLog(indent = '+1') # import module module = importlib.import_module('nemoa.' + str(type)) # get objects configuration as dictionary config = nemoa.workspace.getConfig(type = type, config = config, **kwargs) if not isinstance(config, dict): nemoa.log('error', """could not create %s instance: unknown configuration!""" % (type)) nemoa.setLog(indent = '-1') return None # create and initialize new instance of given class instance = module.empty() if empty \ else module.new(config = config, **kwargs) # check instance class if not nemoa.type.isInstanceType(instance, type): nemoa.log('error', """could not create %s instance: invalid configuration!""" % (type)) nemoa.setLog(indent = '-1') return None nemoa.log('name of %s is: \'%s\'' % (type, instance.name())) nemoa.setLog(indent = '-1') return instance
def configure(self, dataset=None, network=None, *args, **kwargs): """Configure system and subsystems to network and dataset.""" if not hasattr(self.__class__, "_configure") or not callable(getattr(self.__class__, "_configure")): return True nemoa.log("configure system '%s'" % (self.name())) nemoa.setLog(indent="+1") if not self.checkNetwork(network): nemoa.log( "error", """ system could not be configured: network is not valid!""", ) nemoa.setLog(indent="-1") return False if not self.checkDataset(dataset): nemoa.log( "error", """ system could not be configured: dataset is not valid!""", ) nemoa.setLog(indent="-1") return False retVal = self._configure(dataset=dataset, network=network, *args, **kwargs) nemoa.setLog(indent="-1") return retVal
def __getInstance(self, type=None, config=None, empty=False, **kwargs): """Return new instance of given object type and configuration.""" nemoa.log('create%s %s instance' % (' empty' if empty else '', type)) nemoa.setLog(indent='+1') # import module module = importlib.import_module('nemoa.' + str(type)) # get objects configuration as dictionary config = nemoa.workspace.getConfig(type=type, config=config, **kwargs) if not isinstance(config, dict): nemoa.log( 'error', """could not create %s instance: unknown configuration!""" % (type)) nemoa.setLog(indent='-1') return None # create and initialize new instance of given class instance = module.empty() if empty \ else module.new(config = config, **kwargs) # check instance class if not nemoa.type.isInstanceType(instance, type): nemoa.log( 'error', """could not create %s instance: invalid configuration!""" % (type)) nemoa.setLog(indent='-1') return None nemoa.log('name of %s is: \'%s\'' % (type, instance.name())) nemoa.setLog(indent='-1') return instance
def __scanForNetworks(self, files=None): """Scan for scripts files in current project.""" nemoa.log('scanning for networks') nemoa.setLog(indent='+1') # are files given? if files == None: files = self.__path['networks'] + '*.ini' # import definition files for file in glob.iglob(self.getPath(files)): self.__registerNetwork(file) nemoa.setLog(indent='-1') return True
def __scanForNetworks(self, files = None): """Scan for scripts files in current project.""" nemoa.log('scanning for networks') nemoa.setLog(indent = '+1') # are files given? if files == None: files = self.__path['networks'] + '*.ini' # import definition files for file in glob.iglob(self.getPath(files)): self.__registerNetwork(file) nemoa.setLog(indent = '-1') return True
def __scanForConfigFiles(self, files = None): """Import all config files from current project.""" nemoa.log('scanning for configuration files') nemoa.setLog(indent = '+1') # are files given? if files == None: files = self.__path['workspace'] + '*.ini' # import configuration files for file in glob.iglob(self.getPath(files)): self.__importConfigFile(file) nemoa.setLog(indent = '-1') return True
def __getModelInstance(self, name = None, config = None, dataset = None, network = None, system = None): """Return new model instance.""" nemoa.log('create model instance') nemoa.setLog(indent = '+1') # create dataset instance (if not given) if not nemoa.type.isDataset(dataset): dataset = \ self.__getInstance(type = 'dataset', config = dataset) if not nemoa.type.isDataset(dataset): nemoa.log('error', 'could not create model instance: dataset is invalid!') nemoa.setLog(indent = '-1') return None # create network instance (if not given) if network == None: network = {'type': 'auto'} if not nemoa.type.isNetwork(network): network = \ self.__getInstance(type = 'network', config = network) if not nemoa.type.isNetwork(network): nemoa.log('error', 'could not create model instance: network is invalid!') nemoa.setLog(indent = '-1') return None # create system instance (if not given) if not nemoa.type.isSystem(system): system = \ self.__getInstance(type = 'system', config = system) if not nemoa.type.isSystem(system): nemoa.log('error', 'could not create model instance: system is invalid!') nemoa.setLog(indent = '-1') return None # create name string (if not given) if name == None: name = '-'.join( [dataset.name(), network.name(), system.name()]) # create model instance model = self.__getInstance( type = 'model', config = config, name = name, dataset = dataset, network = network, system = system) nemoa.setLog(indent = '-1') return model
def __scanForConfigFiles(self, files=None): """Import all config files from current project.""" nemoa.log('scanning for configuration files') nemoa.setLog(indent='+1') # are files given? if files == None: files = self.__path['workspace'] + '*.ini' # import configuration files for file in glob.iglob(self.getPath(files)): self.__importConfigFile(file) nemoa.setLog(indent='-1') return True
def optimizeParams(self, dataset, schedule): """Optimize system parameters using data and given schedule.""" # check if optimization schedule exists for current system # and merge default, existing and given schedule if not "params" in schedule: config = self._default("optimize") nemoa.common.dictMerge(self._config["optimize"], config) self._config["optimize"] = config elif not self.getType() in schedule["params"]: return nemoa.log( "error", """could not optimize model: optimization schedule '%s' does not include system '%s' """ % (schedule["name"], self.getType()), ) else: config = self._default("optimize") nemoa.common.dictMerge(self._config["optimize"], config) nemoa.common.dictMerge(schedule["params"][self.getType()], config) self._config["optimize"] = config ################################################################ # System independent optimization settings # ################################################################ # check dataset if (not "checkDataset" in config or config["checkDataset"] == True) and not self._checkDataset(dataset): return False # initialize inspector inspector = nemoa.system.base.inspector(self) if "inspect" in config and not config["inspect"] == False: inspector.setTestData(self._getTestData(dataset)) # optimize system parameters algorithm = config["algorithm"].title() nemoa.log("note", "optimize '%s' (%s) using algorithm '%s'" % (self.name(), self.getType(), algorithm)) nemoa.setLog(indent="+1") retVal = self._optimizeParams(dataset, schedule, inspector) nemoa.setLog(indent="-1") return retVal
def plot(self, plot = None, output = 'file', file = None, **kwargs): """Create plot of model.""" nemoa.log('create plot of model') nemoa.setLog(indent = '+1') # check if model is configured if not self.__isConfigured(): nemoa.log('error', """could not create plot of model: model is not yet configured!""") nemoa.setLog(indent = '-1') return False # get plot instance nemoa.log('create plot instance') nemoa.setLog(indent = '+1') if plot == None: plot = self.system.getType() + '.default' if isinstance(plot, str): plotName, plotParams = nemoa.common.strSplitParams(plot) mergeDict = plotParams for param in kwargs.keys(): plotParams[param] = kwargs[param] objPlot = self.__getPlot(name = plotName, params = plotParams) if not objPlot: nemoa.log('warning', "could not create plot: unknown configuration '%s'" % (plotName)) nemoa.setLog(indent = '-1') return None elif isinstance(plot, dict): objPlot = self.__getPlot(config = plot) else: objPlot = self.__getPlot() if not objPlot: return None # prepare filename if output == 'display': file = None elif output == 'file' and not file: file = nemoa.common.getEmptyFile(nemoa.workspace.path('plots') + \ self.name() + '/' + objPlot.cfg['name'] + \ '.' + objPlot.settings['fileformat']) # create plot retVal = objPlot.create(self, file = file) if not file == None: nemoa.log('save plot: ' + file) nemoa.setLog(indent = '-2') return retVal
def preprocessData(self, **kwargs): """Data preprocessing. Keyword Arguments: stratify -- see algorithm in stratifyData normalize -- see algorithm in normalizeData transform -- see algorithm in transformData Description: Process stratification, normalization and transformation.""" nemoa.log('preprocessing data') nemoa.setLog(indent = '+1') if 'stratify' in kwargs.keys(): self.stratifyData(kwargs['stratify']) if 'normalize' in kwargs.keys(): self.normalizeData(kwargs['normalize']) if 'transform' in kwargs.keys(): self.transformData(kwargs['transform']) nemoa.setLog(indent = '-1') return True
def configure(self, dataset, system): """Configure network to dataset and system.""" # check if network instance is empty if self.isEmpty(): nemoa.log('configuration is not needed: network is \'empty\'') return True # check if dataset instance is available if not nemoa.type.isDataset(dataset): nemoa.log('error', 'could not configure network: no valid dataset instance given!') return False # check if system instance is available if not nemoa.type.isSystem(system): nemoa.log('error', 'could not configure network: no valid system instance given!') return False nemoa.log('configure network: \'%s\'' % (self.name())) nemoa.setLog(indent = '+1') # type: 'auto is used for networks # wich are created by datasets (visible units) # and systems (hidden units) if self.cfg['type'] == 'auto': self.__getVisibleNodesFromDataset(dataset) self.__getHiddenNodesFromSystem(system) self.__getEdgesFromNodesAndLayers() self.updateGraph() nemoa.setLog(indent = '-1') return True # type: 'autolayer' is used for networks # wich are created by layers and sizes if self.cfg['type'] == 'autolayer': self.__getNodesFromLayers() self.__getEdgesFromNodesAndLayers() self.updateGraph() nemoa.setLog(indent = '-1') return True # configure network to dataset groups = dataset.getColGroups() changes = [] for group in groups: if not group in self.cfg['nodes'] \ or not (groups[group] == self.cfg['nodes'][group]): self.updateGraph(nodelist = {'type': group, 'list': groups[group]}) nemoa.setLog(indent = '-1') return True
def optimizeParams(self, dataset, schedule): """Optimize system parameters using data and given schedule.""" # check if optimization schedule exists for current system # and merge default, existing and given schedule if not 'params' in schedule: config = self._default('optimize') nemoa.common.dictMerge(self._config['optimize'], config) self._config['optimize'] = config elif not self.getType() in schedule['params']: return nemoa.log( 'error', """could not optimize model: optimization schedule '%s' does not include system '%s' """ % (schedule['name'], self.getType())) else: config = self._default('optimize') nemoa.common.dictMerge(self._config['optimize'], config) nemoa.common.dictMerge(schedule['params'][self.getType()], config) self._config['optimize'] = config ################################################################ # System independent optimization settings # ################################################################ # check dataset if (not 'checkDataset' in config or config['checkDataset'] == True) \ and not self._checkDataset(dataset): return False # initialize inspector inspector = nemoa.system.base.inspector(self) if 'inspect' in config and not config['inspect'] == False: inspector.setTestData(self._getTestData(dataset)) # optimize system parameters algorithm = config['algorithm'].title() nemoa.log('note', "optimize '%s' (%s) using algorithm '%s'" % \ (self.name(), self.getType(), algorithm)) nemoa.setLog(indent = '+1') retVal = self._optimizeParams(dataset, schedule, inspector) nemoa.setLog(indent = '-1') return retVal
def save(self, file = None): """Save model settings to file and return filepath.""" nemoa.log('save model to file') nemoa.setLog(indent = '+1') # get filename if file == None: fileName = '%s.mp' % (self.name()) filePath = nemoa.workspace.path('models') file = filePath + fileName file = nemoa.common.getEmptyFile(file) # save model parameters and configuration to file nemoa.common.dictToFile(self._get(), file) # create console message nemoa.log("save model as: '%s'" % (os.path.basename(file)[:-3])) nemoa.setLog(indent = '-1') return file
def export(self, file, **kwargs): """Export data to file.""" file = nemoa.common.getEmptyFile(file) type = nemoa.common.getFileExt(file).lower() nemoa.log('export data to file') nemoa.setLog(indent = '+1') nemoa.log('exporting data to file: \'%s\'' % (file)) if type in ['gz', 'data']: retVal = self.save(file) elif type in ['csv', 'tsv', 'tab', 'txt']: cols, data = self.getData(output = ('cols', 'recarray')) retVal = nemoa.common.csvSaveData(file, data, cols = [''] + cols, **kwargs) else: retVal = nemoa.log('error', """ could not export dataset: unsupported file type '%s'""" % (type)) nemoa.setLog(indent = '-1') return retVal
def __createNewModel(self, name, config=None, dataset=None, network=None, system=None, configure=True, initialize=True): nemoa.log('create new model') nemoa.setLog(indent='+1') model = self.__getModelInstance(name=name, config=config, dataset=dataset, network=network, system=system) if not nemoa.type.isModel(model): nemoa.log('error', 'could not create new model!') nemoa.setLog(indent='-1') return False # configure model if configure: model.configure() # initialize model parameters if initialize: model.initialize() nemoa.setLog(indent='-1') return model
def loadCommon(self): """Import common projects.""" nemoa.log('import shared resources') nemoa.setLog(indent = '+1') # get current project curProject = self.__project # for project in self.__listCommonProjects(): # set common project, update paths and import workspaces self.__project = project self.__updatePaths(base = 'common') self.__scanForConfigFiles() self.__scanForScripts() self.__scanForNetworks() # reset to previous project self.__project = curProject nemoa.setLog(indent = '-1') return True
def loadCommon(self): """Import common projects.""" nemoa.log('import shared resources') nemoa.setLog(indent='+1') # get current project curProject = self.__project # for project in self.__listCommonProjects(): # set common project, update paths and import workspaces self.__project = project self.__updatePaths(base='common') self.__scanForConfigFiles() self.__scanForScripts() self.__scanForNetworks() # reset to previous project self.__project = curProject nemoa.setLog(indent='-1') return True
def configure(self, dataset = None, network = None, *args, **kwargs): """Configure system and subsystems to network and dataset.""" if not hasattr(self.__class__, '_configure') \ or not callable(getattr(self.__class__, '_configure')): return True nemoa.log("configure system '%s'" % (self.name())) nemoa.setLog(indent = '+1') if not self.checkNetwork(network): nemoa.log('error', """ system could not be configured: network is not valid!""") nemoa.setLog(indent = '-1') return False if not self.checkDataset(dataset): nemoa.log('error', """ system could not be configured: dataset is not valid!""") nemoa.setLog(indent = '-1') return False retVal = self._configure(dataset = dataset, network = network, *args, **kwargs) nemoa.setLog(indent = '-1') return retVal
def __createNewModel(self, name, config = None, dataset = None, network = None, system = None, configure = True, initialize = True): nemoa.log('create new model') nemoa.setLog(indent = '+1') model = self.__getModelInstance(name = name, config = config, dataset = dataset, network = network, system = system) if not nemoa.type.isModel(model): nemoa.log('error', 'could not create new model!') nemoa.setLog(indent = '-1') return False # configure model if configure: model.configure() # initialize model parameters if initialize: model.initialize() nemoa.setLog(indent = '-1') return model
def main(argv): os.system('cls' if os.name == 'nt' else 'clear') nemoa.setLog(mode='shell') nemoa.welcome()
def main(argv): os.system('cls' if os.name=='nt' else 'clear') nemoa.setLog(mode = 'shell') nemoa.welcome()
def optimize(self, schedule = None, **kwargs): """Optimize system parameters.""" nemoa.log('optimize model') nemoa.setLog(indent = '+1') # check if model is empty if self.isEmpty(): nemoa.log('warning', "empty models can not be optimized!") nemoa.setLog(indent = '-1') return self # check if model is configured if not self.__isConfigured(): nemoa.log('error', 'could not optimize model: model is not yet configured!') nemoa.setLog(indent = '-1') return False # get optimization schedule if schedule == None: schedule = self.system.getType() + '.default' elif not '.' in schedule: schedule = \ self.system.getType() + '.' + schedule schedule = nemoa.workspace.getConfig( type = 'schedule', config = schedule, merge = ['params', self.system.getType()], **kwargs) if not schedule: nemoa.log('error', """ could not optimize system parameters: optimization schedule is not valid!""") nemoa.setLog(indent = '-1') return self # optimization of system parameters nemoa.log("starting optimization schedule: '%s'" % (schedule['name'])) nemoa.setLog(indent = '+1') # 2DO: find better solution for multistage optimization if 'stage' in schedule and len(schedule['stage']) > 0: for stage, params in enumerate(config['stage']): self.system.optimizeParams(self.dataset, **params) elif 'params' in schedule: self.system.optimizeParams( dataset = self.dataset, schedule = schedule) nemoa.setLog(indent = '-1') # update network self.system.updateNetwork(self.network) nemoa.setLog(indent = '-1') return self
def transformData(self, algorithm = 'system', system = None, mapping = None, **kwargs): """Transform dataset. Keyword Arguments: algorithm -- name of algorithm used for data transformation 'system': Transform data using nemoa system instance 'gaussToBinary': Transform Gauss distributed values to binary values in {0, 1} 'gaussToWeight': Transform Gauss distributed values to weights in [0, 1] 'gaussToDistance': ?? Transform Gauss distributed values to distances in [0, 1] system -- nemoa system instance (nemoa object root class 'system') used for model based transformation of data mapping -- ...""" if not isinstance(algorithm, str): return False # system based data transformation if algorithm.lower() == 'system': if not nemoa.type.isSystem(system): return nemoa.log('error', """could not transform data using system: parameter 'system' is invalid!""") nemoa.log('transform data using system \'%s\'' % (system.name())) nemoa.setLog(indent = '+1') if mapping == None: mapping = system.getMapping() sourceColumns = system.getUnits(group = mapping[0])[0] targetColumns = system.getUnits(group = mapping[-1])[0] self.setColLabels(sourceColumns) for src in self.data: data = self.data[src]['array'] dataArray = data[sourceColumns].view('<f8').reshape( data.size, len(sourceColumns)) transArray = system.mapData(dataArray, mapping = mapping, **kwargs) # create empty record array numRows = self.data[src]['array']['label'].size colNames = ('label',) + tuple(targetColumns) colFormats = ('<U12',) + tuple(['<f8' for x in targetColumns]) newRecArray = numpy.recarray((numRows,), dtype = zip(colNames, colFormats)) # set values in record array newRecArray['label'] = self.data[src]['array']['label'] for colID, colName in enumerate(newRecArray.dtype.names[1:]): # update source data columns newRecArray[colName] = (transArray[:, colID]).astype(float) # set record array self.data[src]['array'] = newRecArray self.setColLabels(targetColumns) nemoa.setLog(indent = '-1') return True # gauss to binary data transformation elif algorithm.lower() in ['gausstobinary', 'binary']: nemoa.log('transform data using \'%s\'' % (algorithm)) for src in self.data: # update source per column (recarray) for colName in self.data[src]['array'].dtype.names[1:]: # update source data columns self.data[src]['array'][colName] = \ (self.data[src]['array'][colName] > 0.0).astype(float) return True # gauss to weight in [0, 1] data transformation elif algorithm.lower() in ['gausstoweight', 'weight']: nemoa.log('transform data using \'%s\'' % (algorithm)) for src in self.data: # update source per column (recarray) for colName in self.data[src]['array'].dtype.names[1:]: # update source data columns self.data[src]['array'][colName] = \ (2.0 / (1.0 + numpy.exp(-1.0 * \ self.data[src]['array'][colName] ** 2))).astype(float) return True # gauss to distance data transformation # ???? elif algorithm.lower() in ['gausstodistance', 'distance']: nemoa.log('transform data using \'%s\'' % (algorithm)) for src in self.data: # update source per column (recarray) for colName in self.data[src]['array'].dtype.names[1:]: # update source data columns self.data[src]['array'][colName] = \ (1.0 - (2.0 / (1.0 + numpy.exp(-1.0 * \ self.data[src]['array'][colName] ** 2)))).astype(float) return True return nemoa.log('error' "could not transform data: unknown algorithm '%s'!" % (algorithm))
def configure(self, network, useCache = False, **kwargs): """Configure dataset to a given network object Keyword arguments: network -- nemoa network object useCache -- shall data be cached""" nemoa.log("configure dataset '%s' to network '%s'" % \ (self.name(), network.name())) nemoa.setLog(indent = '+1') # load data from cachefile (if caching is used and cachefile exists) cacheFile = self.searchCacheFile(network) if useCache else None if cacheFile and self.load(cacheFile): nemoa.log('load cachefile: \'%s\'' % (cacheFile)) # preprocess data if 'preprocessing' in self.cfg.keys(): self.preprocessData(**self.cfg['preprocessing']) nemoa.setLog(indent = '-1') return True # create table with one record for every single dataset files if not 'table' in self.cfg: conf = self.cfg.copy() self.cfg['table'] = {} self.cfg['table'][self.cfg['name']] = conf self.cfg['table'][self.cfg['name']]['fraction'] = 1.0 ################################################################ # Annotation # ################################################################ # get nodes from network and convert to common format if network.cfg['type'] == 'auto': netGroups = {'v': None} else: # get grouped network node labels and label format netGroups = network.getNodeGroups(type = 'visible') netGroupsOrder = [] for layer in netGroups: netGroupsOrder.append((network.layer(layer)['id'], layer)) netGroupsOrder = sorted(netGroupsOrder) # convert network node labels to common format nemoa.log('search network nodes in dataset sources') convNetGroups = {} convNetGroupsLost = {} convNetNodes = [] convNetNodesLost = [] netLblFmt = network.cfg['label_format'] for id, group in netGroupsOrder: convNetGroups[group], convNetGroupsLost[group] = \ nemoa.annotation.convert(netGroups[group], input = netLblFmt) convNetNodes += convNetGroups[group] convNetNodesLost += convNetGroupsLost[group] # notify if any network node labels could not be converted if convNetNodesLost: nemoa.log('%s of %s network nodes could not be converted! (see logfile)' % (len(convNetNodesLost), len(convNetNodes))) ## 2DO get original node labels for log file nemoa.log('logfile', nemoa.common.strToList(convNetNodesLost)) # get columns from dataset files and convert to common format colLabels = {} nemoa.log('configure data sources') nemoa.setLog(indent = '+1') for src in self.cfg['table']: nemoa.log("configure '%s'" % (src)) srcCnf = self.cfg['table'][src] # get column labels from csv-file if 'csvtype' in srcCnf['source']: csvType = srcCnf['source']['csvtype'].strip().lower() else: csvType = None origColLabels = nemoa.common.csvGetColLabels( srcCnf['source']['file'], type = csvType) if not origColLabels: continue # set annotation format format = srcCnf['source']['columns'] \ if 'columns' in srcCnf['source'] else 'generic:string' # convert column labes convColLabels, convColLabelsLost = \ nemoa.annotation.convert(origColLabels, input = format) # notify if any dataset columns could not be converted if convColLabelsLost: nemoa.log('warning', "%i of %i dataset columns could not be converted! (logfile)" % (len(convColLabelsLost), len(convColLabels))) nemoa.log("logfile", ", ".join([convColLabels[i] \ for i in convColLabelsLost])) if not network.cfg['type'] == 'auto': # search converted network nodes in converted column labels numLost = 0 numAll = 0 lostNodes = {} for id, group in netGroupsOrder: lostNodesConv = \ [val for val in convNetGroups[group] \ if val not in convColLabels] numAll += len(convNetGroups[group]) if not lostNodesConv: continue numLost += len(lostNodesConv) # get original labels lostNodes[group] = [netGroups[group][ convNetGroups[group].index(val)] for val in lostNodesConv] # notify if any network nodes could not be found if numLost: nemoa.log('warning', """ %i of %i network nodes could not be found in dataset source! (logfile)""" % (numLost, numAll)) for group in lostNodes: nemoa.log("logfile", """ missing nodes (group %s): """ % (group) + ", ".join(lostNodes[group])) # prepare dictionary for column source ids colLabels[src] = { 'conv': convColLabels, 'usecols': (), 'notusecols': convColLabelsLost } nemoa.setLog(indent = '-1') # intersect converted dataset column labels interColLabels = colLabels[colLabels.keys()[0]]['conv'] for src in colLabels: list = colLabels[src]['conv'] blackList = [list[i] for i in colLabels[src]['notusecols']] interColLabels = [val for val in interColLabels \ if val in list and not val in blackList] # if network type is 'auto', set network visible nodes # to intersected data from database files (without label column) if network.cfg['type'] == 'auto': netGroups['v'] = [label for label in interColLabels \ if not label == 'label'] convNetGroups = netGroups # search network nodes in dataset columns self.cfg['columns'] = () for groupid, group in netGroupsOrder: found = 0 for id, col in enumerate(convNetGroups[group]): if not col in interColLabels: continue found += 1 # add column (use network label and group) self.cfg['columns'] += ((group, netGroups[group][id]), ) for src in colLabels: colLabels[src]['usecols'] \ += (colLabels[src]['conv'].index(col), ) if not found: nemoa.log('error', """ no node from network group '%s' could be found in dataset source! """ % (group)) nemoa.setLog(indent = '-1') return False # update source file config for src in colLabels: self.cfg['table'][src]['source']['usecols'] \ = colLabels[src]['usecols'] ################################################################ # Column & Row Filters # ################################################################ # add column filters and partitions from network node groups self.cfg['colFilter'] = {'*': ['*:*']} self.cfg['colPartitions'] = {'groups': []} for group in netGroups: self.cfg['colFilter'][group] = [group + ':*'] self.cfg['colPartitions']['groups'].append(group) # add row filters and partitions from sources self.cfg['rowFilter'] = {'*': ['*:*']} self.cfg['rowPartitions'] = {'source': []} for source in self.cfg['table']: self.cfg['rowFilter'][source] = [source + ':*'] self.cfg['rowPartitions']['source'].append(source) ################################################################ # Import data from CSV-files into numpy arrays # ################################################################ # import data from sources nemoa.log('import data from sources') nemoa.setLog(indent = '+1') self.data = {} for src in self.cfg['table']: self.data[src] = { 'fraction': self.cfg['table'][src]['fraction'], 'array': self.csvGetData(src) } nemoa.setLog(indent = '-1') # save cachefile if useCache: cacheFile = self.createCacheFile(network) nemoa.log('save cachefile: \'%s\'' % (cacheFile)) self.save(cacheFile) # preprocess data if 'preprocessing' in self.cfg.keys(): self.preprocessData(**self.cfg['preprocessing']) nemoa.setLog(indent = '-1') return True