def _guess_variable(self, field_name, field_metadata, inspect_table): type_code = field_metadata[0] NUMERIC_TYPES = ("FLOAT", "DOUBLE", "DECIMAL") # real, float8, numeric INT_TYPES = ("INT", "TINYINT", "SMALLINT", "MEDIUMINT", "BIGINT") DATE_TYPES = ("DATE", "DATETIME", "YEAR") TIME_TYPES = ("TIMESTAMP", "TIME") CHAR_TYPES = ("CHAR", "ENUM") if type_code in NUMERIC_TYPES: return ContinuousVariable.make(field_name) if type_code in TIME_TYPES + DATE_TYPES: tv = TimeVariable.make(field_name) tv.have_date |= type_code in DATE_TYPES tv.have_time |= type_code in TIME_TYPES return tv if type_code in INT_TYPES: # bigint, int, smallint if inspect_table: values = self.get_distinct_values(field_name, inspect_table) if values: return DiscreteVariable.make(field_name, values) return ContinuousVariable.make(field_name) if type_code in CHAR_TYPES: if inspect_table: values = self.get_distinct_values(field_name, inspect_table) # remove trailing spaces values = [v.rstrip() for v in values] if values: return DiscreteVariable.make(field_name, values) return StringVariable.make(field_name)
def transpose_table(table): """ Transpose the rows and columns of the table. Args: table: Data in :obj:`Orange.data.Table` Returns: Transposed :obj:`Orange.data.Table`. (Genes as columns) """ # TODO: remove this and use Orange.data.Table.transpose attrs = table.domain.attributes attr = [ContinuousVariable.make(ex['Gene'].value) for ex in table] # Set metas new_metas = [ StringVariable.make(name) if name != 'Time' else TimeVariable.make(name) for name in sorted(table.domain.variables[0].attributes.keys()) ] domain = Domain(attr, metas=new_metas) meta_values = [[exp.attributes[var.name] for var in domain.metas] for exp in attrs] return Table(domain, table.X.transpose(), metas=meta_values)
def corpusDomain(mails): return(Domain([TimeVariable.make(FIELDDATE), \ DiscreteVariable.make(FIELDFROM,set([x[1] for x in mails])), \ DiscreteVariable.make(FIELDTO,set([x[2] for x in mails]))], \ metas=[StringVariable.make(FIELDTEXT), \ StringVariable.make(FIELDFILE), \ StringVariable.make(FIELDCOUNSELOR), \ StringVariable.make(FIELDSUBJECT)]))
def corpusDomain(self, mails): return(Domain([TimeVariable.make("date"), \ DiscreteVariable.make("from", set([x[1] for x in mails])), \ DiscreteVariable.make("to", set([x[2] for x in mails])), \ DiscreteVariable.make("duplicate", set([x[3] for x in mails]))], \ metas=[StringVariable.make("file"), \ StringVariable.make("subject"), \ StringVariable.make("extra"), \ StringVariable.make("text")]))
def _guess_variable(self, field_name, field_metadata, inspect_table): type_code = field_metadata[0] FLOATISH_TYPES = (700, 701, 1700) # real, float8, numeric INT_TYPES = (20, 21, 23) # bigint, int, smallint CHAR_TYPES = ( 25, 1042, 1043, ) # text, char, varchar BOOLEAN_TYPES = (16, ) # bool DATE_TYPES = ( 1082, 1114, 1184, ) # date, timestamp, timestamptz # time, timestamp, timestamptz, timetz TIME_TYPES = ( 1083, 1114, 1184, 1266, ) if type_code in FLOATISH_TYPES: return ContinuousVariable.make(field_name) if type_code in TIME_TYPES + DATE_TYPES: tv = TimeVariable.make(field_name) tv.have_date |= type_code in DATE_TYPES tv.have_time |= type_code in TIME_TYPES return tv if type_code in INT_TYPES: # bigint, int, smallint if inspect_table: values = self.get_distinct_values(field_name, inspect_table) if values: return DiscreteVariable.make(field_name, values) return ContinuousVariable.make(field_name) if type_code in BOOLEAN_TYPES: return DiscreteVariable.make(field_name, ['false', 'true']) if type_code in CHAR_TYPES: if inspect_table: values = self.get_distinct_values(field_name, inspect_table) # remove trailing spaces values = [v.rstrip() for v in values] if values: return DiscreteVariable.make(field_name, values) return StringVariable.make(field_name)
def transpose_table(table): """ Transpose the rows and columns of the table. Args: table: Data in :obj:`Orange.data.Table` Returns: Transposed :obj:`Orange.data.Table`. (Genes as columns) """ attrs = table.domain.attributes attr = [ContinuousVariable.make(ex['Gene'].value) for ex in table] # Set metas new_metas = [StringVariable.make(name) if name is not 'Time' else TimeVariable.make(name) for name in sorted(table.domain.variables[0].attributes.keys())] domain = Domain(attr, metas=new_metas) meta_values = [[exp.attributes[var.name] for var in domain.metas] for exp in attrs] return Table(domain, table.X.transpose(), metas=meta_values)
def _guess_variable(self, field_name, field_metadata, inspect_table): type_code = field_metadata[0] FLOATISH_TYPES = (700, 701, 1700) # real, float8, numeric INT_TYPES = (20, 21, 23) # bigint, int, smallint CHAR_TYPES = (25, 1042, 1043,) # text, char, varchar BOOLEAN_TYPES = (16,) # bool DATE_TYPES = (1082, 1114, 1184, ) # date, timestamp, timestamptz # time, timestamp, timestamptz, timetz TIME_TYPES = (1083, 1114, 1184, 1266,) if type_code in FLOATISH_TYPES: return ContinuousVariable.make(field_name) if type_code in TIME_TYPES + DATE_TYPES: tv = TimeVariable.make(field_name) tv.have_date |= type_code in DATE_TYPES tv.have_time |= type_code in TIME_TYPES return tv if type_code in INT_TYPES: # bigint, int, smallint if inspect_table: values = self.get_distinct_values(field_name, inspect_table) if values: return DiscreteVariable.make(field_name, values) return ContinuousVariable.make(field_name) if type_code in BOOLEAN_TYPES: return DiscreteVariable.make(field_name, ['false', 'true']) if type_code in CHAR_TYPES: if inspect_table: values = self.get_distinct_values(field_name, inspect_table) # remove trailing spaces values = [v.rstrip() for v in values] if values: return DiscreteVariable.make(field_name, values) return StringVariable.make(field_name)
def inputAnalysis(self, corpus): self.resetWidget() self.corpus = corpus if self.corpus is None: print("Unpack cell: No corpus available") self.label.setText("No corpus available") else: self.label.setText("Processing corpus") data = [] valueId = 0 for msgId in range(0, len(self.corpus)): date = self.getFieldValue(corpus, self.FIELDNAMEDATE, msgId) for value in self.getFieldValue(corpus, self.FIELDNAMEEXTRA, msgId): data.append([valueId, date, value]) valueId += 1 domain = Domain([ ContinuousVariable.make("id"), TimeVariable.make("date"), ContinuousVariable.make("extra") ], metas=[]) table = Table.from_list(domain, data) self.Outputs.table.send(table)
def read(self): import opusFC if self.sheet: db = self.sheet else: db = self.sheets[0] db = tuple(db.split(" ")) dim = db[1] try: data = opusFC.getOpusData(self.filename, db) except Exception: raise IOError("Couldn't load spectrum from " + self.filename) attrs, clses, metas = [], [], [] attrs = [ ContinuousVariable.make(repr(data.x[i])) for i in range(data.x.shape[0]) ] y_data = None meta_data = None if type(data) == opusFC.MultiRegionDataReturn: y_data = [] meta_data = [] metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region'), TimeVariable.make('start_time') ]) for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) start_time = region.start_time meta_region = np.column_stack( (mapX, mapY, map_region, start_time)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.MultiRegionTRCDataReturn: y_data = [] meta_data = [] metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region') ]) attrs = [ ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels)) ] for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) meta_region = np.column_stack((mapX, mapY, map_region)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.ImageDataReturn: metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y') ]) data_3D = data.spectra for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.ImageTRCDataReturn: metas.extend([ ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y') ]) attrs = [ ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels)) ] data_3D = data.traces for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.TimeResolvedTRCDataReturn: y_data = data.traces elif type(data) == opusFC.TimeResolvedDataReturn: metas.extend([ContinuousVariable.make('z')]) y_data = data.spectra meta_data = data.z elif type(data) == opusFC.SingleDataReturn: y_data = data.y[None, :] else: raise ValueError( "Empty or unsupported opusFC DataReturn object: " + type(data)) import_params = ['SRT', 'SNM'] for param_key in import_params: try: param = data.parameters[param_key] except KeyError: pass # TODO should notify user? else: try: param_name = opusFC.paramDict[param_key] except KeyError: param_name = param_key if param_key == 'SRT': var = TimeVariable.make(param_name) elif type(param) is float: var = ContinuousVariable.make(param_name) elif type(param) is str: var = StringVariable.make(param_name) else: raise ValueError #Found a type to handle metas.extend([var]) params = np.full((y_data.shape[0], ), param, np.array(param).dtype) if meta_data is not None: # NB dtype default will be np.array(fill_value).dtype in future meta_data = np.column_stack( (meta_data, params.astype(object))) else: meta_data = params domain = Orange.data.Domain(attrs, clses, metas) meta_data = np.atleast_2d(meta_data) table = Orange.data.Table.from_numpy(domain, y_data.astype(float, order='C'), metas=meta_data) return table
def read(self): import opusFC if self.sheet: db = self.sheet else: db = self.sheets[0] db = tuple(db.split(" ")) dim = db[1] try: data = opusFC.getOpusData(self.filename, db) except Exception: raise IOError("Couldn't load spectrum from " + self.filename) attrs, clses, metas = [], [], [] attrs = [ContinuousVariable.make(repr(data.x[i])) for i in range(data.x.shape[0])] y_data = None meta_data = None if dim == '3D': metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y')]) if db[0] == 'TRC': attrs = [ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels))] data_3D = data.traces else: data_3D = data.spectra for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif dim == '2D': y_data = data.y[None,:] try: stime = data.parameters['SRT'] except KeyError: pass # TODO notify user? else: metas.extend([TimeVariable.make(opusFC.paramDict['SRT'])]) if meta_data is not None: dates = np.full(meta_data[:,0].shape, stime, np.array(stime).dtype) meta_data = np.column_stack((meta_data, dates.astype(object))) else: meta_data = np.array([stime])[None,:] import_params = ['SNM'] for param_key in import_params: try: param = data.parameters[param_key] except Exception: pass # TODO should notify user? else: try: param_name = opusFC.paramDict[param_key] except KeyError: param_name = param_key if type(param) is float: var = ContinuousVariable.make(param_name) elif type(param) is str: var = StringVariable.make(param_name) else: raise ValueError #Found a type to handle metas.extend([var]) if meta_data is not None: # NB dtype default will be np.array(fill_value).dtype in future params = np.full(meta_data[:,0].shape, param, np.array(param).dtype) meta_data = np.column_stack((meta_data, params.astype(object))) else: meta_data = np.array([param])[None,:] domain = Orange.data.Domain(attrs, clses, metas) table = Orange.data.Table.from_numpy(domain, y_data.astype(float, order='C'), metas=meta_data) return table
def read(self): try: import opusFC except ImportError: raise RuntimeError(self._OPUS_WARNING) if self.sheet: db = self.sheet else: db = self.sheets[0] db = tuple(db.split(" ")) dim = db[1] try: data = opusFC.getOpusData(self.filename, db) except Exception: raise IOError("Couldn't load spectrum from " + self.filename) attrs, clses, metas = [], [], [] attrs = [ContinuousVariable.make(repr(data.x[i])) for i in range(data.x.shape[0])] y_data = None meta_data = None if type(data) == opusFC.MultiRegionDataReturn: y_data = [] meta_data = [] metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region'), TimeVariable.make('start_time')]) for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) start_time = region.start_time meta_region = np.column_stack((mapX, mapY, map_region, start_time)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.MultiRegionTRCDataReturn: y_data = [] meta_data = [] metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y'), StringVariable.make('map_region')]) attrs = [ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels))] for region in data.regions: y_data.append(region.spectra) mapX = region.mapX mapY = region.mapY map_region = np.full_like(mapX, region.title, dtype=object) meta_region = np.column_stack((mapX, mapY, map_region)) meta_data.append(meta_region.astype(object)) y_data = np.vstack(y_data) meta_data = np.vstack(meta_data) elif type(data) == opusFC.ImageDataReturn: metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y')]) data_3D = data.spectra for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.ImageTRCDataReturn: metas.extend([ContinuousVariable.make('map_x'), ContinuousVariable.make('map_y')]) attrs = [ContinuousVariable.make(repr(data.labels[i])) for i in range(len(data.labels))] data_3D = data.traces for i in np.ndindex(data_3D.shape[:1]): map_y = np.full_like(data.mapX, data.mapY[i]) coord = np.column_stack((data.mapX, map_y)) if y_data is None: y_data = data_3D[i] meta_data = coord.astype(object) else: y_data = np.vstack((y_data, data_3D[i])) meta_data = np.vstack((meta_data, coord)) elif type(data) == opusFC.TimeResolvedTRCDataReturn: y_data = data.traces elif type(data) == opusFC.TimeResolvedDataReturn: metas.extend([ContinuousVariable.make('z')]) y_data = data.spectra meta_data = data.z elif type(data) == opusFC.SingleDataReturn: y_data = data.y[None, :] else: raise ValueError("Empty or unsupported opusFC DataReturn object: " + type(data)) import_params = ['SRT', 'SNM'] for param_key in import_params: try: param = data.parameters[param_key] except KeyError: pass # TODO should notify user? else: try: param_name = opusFC.paramDict[param_key] except KeyError: param_name = param_key if param_key == 'SRT': var = TimeVariable.make(param_name) elif type(param) is float: var = ContinuousVariable.make(param_name) elif type(param) is str: var = StringVariable.make(param_name) else: raise ValueError #Found a type to handle metas.extend([var]) params = np.full((y_data.shape[0],), param, np.array(param).dtype) if meta_data is not None: # NB dtype default will be np.array(fill_value).dtype in future meta_data = np.column_stack((meta_data, params.astype(object))) else: meta_data = params domain = Orange.data.Domain(attrs, clses, metas) meta_data = np.atleast_2d(meta_data) table = Orange.data.Table.from_numpy(domain, y_data.astype(float, order='C'), metas=meta_data) return table