def set_data(self, data=None, data_type=None, data_kwargs={}): """ Set the data for the given source based on the data type or a user specified type """ import toyz.utils.io # If the user didn't specify a dataset to initialize the DataSource, # use the specified parameters to load the data from a file if data is None: print('DATA IS NONE') if self.paths['data']['io_module'] == '': raise ToyzDataError( 'You must supply a data object or file info to initialize a DataSource' ) else: self.data = toyz.utils.io.load_data(**self.paths['data']) if data_type is None: self.data_type = type(self.data).__module__ + '.' + type( self.data).__name__ else: self.data_type = data_type else: # If the user didn't provide a data type try to detect it if data_type is None: module_info = core.get_module_info(toyz_settings, tid, params) for data_source in module_info['data_sources']: if data_source.check_instance(data, data_kwargs): break else: self.data_type = data_type if data_type == 'pandas.core.frame.DataFrame': from pandas import DataFrame self.data = DataFrame(data, **data_kwargs) elif data_type == 'numpy.ndarray': self.data = numpy.ndarray(data, **data_kwargs) elif data_type == 'list': self.data = list(data) if 'columns' in data_kwargs: self.columns = data_kwargs['columns'] else: self.columns = [ 'col-' + n for n in range(len(self.data)) ] else: # For now we assume that this is an affiliated data type # In the future we may want to check for this explicitly self.data = data self.data_type = data_type # If the data_type was not found in the Toyz standard data types look in # Set the column names based on the data type self.name_columns()
def set_data(self, data=None, data_type=None, data_kwargs={}): """ Set the data for the given source based on the data type or a user specified type """ import toyz.utils.io # If the user didn't specify a dataset to initialize the DataSource, # use the specified parameters to load the data from a file if data is None: print('DATA IS NONE') if self.paths['data']['io_module']=='': raise ToyzDataError( 'You must supply a data object or file info to initialize a DataSource') else: self.data = toyz.utils.io.load_data(**self.paths['data']) if data_type is None: self.data_type = type(self.data).__module__+'.'+type(self.data).__name__ else: self.data_type = data_type else: # If the user didn't provide a data type try to detect it if data_type is None: module_info = core.get_module_info(toyz_settings, tid, params) for data_source in module_info['data_sources']: if data_source.check_instance(data, data_kwargs): break else: self.data_type = data_type if data_type == 'pandas.core.frame.DataFrame': from pandas import DataFrame self.data = DataFrame(data, **data_kwargs) elif data_type == 'numpy.ndarray': self.data = numpy.ndarray(data, **data_kwargs) elif data_type == 'list': self.data = list(data) if 'columns' in data_kwargs: self.columns = data_kwargs['columns'] else: self.columns = ['col-'+n for n in range(len(self.data))] else: # For now we assume that this is an affiliated data type # In the future we may want to check for this explicitly self.data = data self.data_type = data_type # If the data_type was not found in the Toyz standard data types look in # Set the column names based on the data type self.name_columns()
def get_workspace_info(toyz_settings, tid, params): """ Get I/O settings for different packages (pure python, numpy, pandas, etc) and other settings for the current users workspaces """ import toyz.utils.io as io import toyz.utils.sources as sources module_info = core.get_module_info(toyz_settings, tid, params) load_src = io.build_gui(module_info, 'load') save_src = io.build_gui(module_info, 'save') response = { 'id': 'workspace_info', 'load_src_info': load_src, 'save_src_info': save_src, 'tiles': module_info['tiles'], 'import_error': module_info['import_errors'] } return response
def load_data_file(toyz_settings, tid, params): """ Load a data file given a set of parameters from the browser, initialized by ``get_io_info``. """ import toyz.utils.io as io import toyz.utils.sources as sources import time time1 = time.time() # If this is the first data source, define variable to keep track of data sources if not hasattr(session_vars, 'data_sources'): session_vars.data_sources = {} # Load the data into the specified data object src_id = params['src_id'] src_name = params['src_name'] src_type = params['paths']['data']['file_options']['src_type'] toyz_module = params['paths']['data']['toyz_module'] module_info = core.get_module_info(toyz_settings, tid, params) session_vars.data_sources[src_id] = module_info['data_sources'][ toyz_module][src_type](module_info, user_id=tid['user_id'], paths=params['paths']) session_vars.data_sources[src_id].src_id = src_id session_vars.data_sources[src_id].name = src_id time2 = time.time() response = { 'id': 'data_file', 'columns': session_vars.data_sources[src_id].columns, 'benchmark': { 'load_time': time2 - time1 } } return response
def load_data_file(toyz_settings, tid, params): """ Load a data file given a set of parameters from the browser, initialized by ``get_io_info``. """ import toyz.utils.io as io import toyz.utils.sources as sources import time time1 = time.time() # If this is the first data source, define variable to keep track of data sources if not hasattr(session_vars, 'data_sources'): session_vars.data_sources = {} # Load the data into the specified data object src_id = params['src_id'] src_name = params['src_name'] src_type = params['paths']['data']['file_options']['src_type'] toyz_module = params['paths']['data']['toyz_module'] module_info = core.get_module_info(toyz_settings, tid, params) session_vars.data_sources[src_id] = module_info['data_sources'][toyz_module][src_type]( module_info, user_id=tid['user_id'], paths=params['paths']) session_vars.data_sources[src_id].src_id = src_id session_vars.data_sources[src_id].name = src_id time2 = time.time() response = { 'id': 'data_file', 'columns': session_vars.data_sources[src_id].columns, 'benchmark': { 'load_time': time2-time1 } } return response