def store(self, result_dir, s_format="pickle"): """ Stores this collection in the directory *result_dir*. In contrast to *dump* this method stores the collection not in a single file but as a whole directory structure with meta information etc. The data sets are stored separately for each run, split, train/test combination. **Parameters** :result_dir: The directory in which the collection will be stored. :name: The prefix of the file names in which the individual data sets are stored. The actual file names are determined by appending suffixes that encode run, split, train/test information. (*optional, default: "time_series"*) :format: The format in which the actual data sets should be stored. Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat) format. In the MATLAB and text format, all time series objects are concatenated to a single large table containing only integer values. For the csv format comma separated values are taken as default or a specified Python format string. The MATLAB format is a struct that contains the data, the sampling frequency and the channel names. .. note:: For the text and MATLAB format, markers could be added by using a Marker_To_Mux node before (*optional, default: "pickle"*) .. todo:: Put marker to the right time point and also write marker channel. """ name = "time_series" if type(s_format) == list: s_type = s_format[1] s_format = s_format[0] else: s_type = "%.18e" if s_format in ["text", "matlab"]: s_type = "%i" if s_format == "csv" and s_type == "real": s_type = "%.18e" # Update the meta data try: author = pwd.getpwuid(os.getuid())[4] except Exception: author = "unknown" self._log("Author could not be resolved.", level=logging.WARNING) self.update_meta_data({"type": "time_series", "storage_format": s_format, "author": author, "data_pattern": "data_run" + os.sep + name + "_sp_tt." + s_format}) # Iterate through splits and runs in this dataset for key, time_series in self.data.iteritems(): # load data, if necessary # (due to the lazy loading, the data might be not loaded already) if isinstance(time_series, basestring): time_series = self.get_data(key[0], key[1], key[2]) if self.sort_string is not None: time_series.sort(key=eval(self.sort_string)) # Construct result directory result_path = result_dir + os.sep + "data" + "_run%s" % key[0] if not os.path.exists(result_path): os.mkdir(result_path) key_str = "_sp%s_%s" % key[1:] # Store data depending on the desired format if s_format in ["pickle", "cpickle", "cPickle"]: result_file = open(os.path.join(result_path, name+key_str+".pickle"), "w") cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL) elif s_format in ["text","csv"]: self.update_meta_data({ "type": "stream", "marker_column": "marker"}) result_file = open(os.path.join(result_path, name + key_str + ".csv"), "w") csvwriter = csv.writer(result_file) channel_names = copy.deepcopy(time_series[0][0].channel_names) if s_format == "csv": channel_names.append("marker") csvwriter.writerow(channel_names) for (data, key) in time_series: if s_format == "text": numpy.savetxt(result_file, data, delimiter=",", fmt=s_type) if not key is None: result_file.write(str(key)) result_file.flush() elif data.marker_name is not None \ and len(data.marker_name) > 0: result_file.write(str(data.marker_name)) result_file.flush() else: first_line = True marker = "" if not key is None: marker = str(key) elif data.marker_name is not None \ and len(data.marker_name) > 0: marker = str(data.marker_name) for line in data: l = list(line) l.append(marker) csvwriter.writerow(list(l)) if first_line: first_line = False marker = "" result_file.flush() elif s_format in ["mat"]: result_file = open(os.path.join(result_path, name + key_str + ".mat"),"w") # extract a first time series object to get meta data merged_time_series = time_series.pop(0)[0] # collect all important information in the collection_object collection_object = { "sampling_frequency": merged_time_series.sampling_frequency, "channel_names": merged_time_series.channel_names} # merge all data for (data,key) in time_series: merged_time_series = numpy.vstack((merged_time_series, data)) collection_object["data"] = merged_time_series mdict = dict() mdict[name + key_str] = collection_object import scipy.io scipy.io.savemat(result_file, mdict=mdict) else: NotImplementedError("Using unavailable storage format:%s!" % s_format) result_file.close() self.update_meta_data({ "channel_names": copy.deepcopy(time_series[0][0].channel_names), "sampling_frequency": time_series[0][0].sampling_frequency }) #Store meta data BaseDataset.store_meta_data(result_dir, self.meta_data)
def store(self, result_dir, s_format="pickle"): """ Stores this collection in the directory *result_dir*. In contrast to *dump* this method stores the collection not in a single file but as a whole directory structure with meta information etc. The data sets are stored separately for each run, split, train/test combination. **Parameters** :result_dir: The directory in which the collection will be stored. :name: The prefix of the file names in which the individual data sets are stored. The actual file names are determined by appending suffixes that encode run, split, train/test information. (*optional, default: "time_series"*) :format: The format in which the actual data sets should be stored. Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat) format. In the MATLAB and text format, all time series objects are concatenated to a single large table containing only integer values. For the csv format comma separated values are taken as default or a specified Python format string. The MATLAB format is a struct that contains the data, the sampling frequency and the channel names. .. note:: For the text and MATLAB format, markers could be added by using a Marker_To_Mux node before (*optional, default: "pickle"*) .. todo:: Put marker to the right time point and also write marker channel. """ name = "time_series" if type(s_format) == list: s_type = s_format[1] s_format = s_format[0] else: s_type = "%.18e" if s_format in ["text", "matlab"]: s_type = "%i" if s_format == "csv" and s_type == "real": s_type = "%.18e" # Update the meta data author = get_author() self.update_meta_data({ "type": "time_series", "storage_format": s_format, "author": author, "data_pattern": "data_run" + os.sep + name + "_sp_tt." + s_format }) # Iterate through splits and runs in this dataset for key, time_series in self.data.iteritems(): # load data, if necessary # (due to the lazy loading, the data might be not loaded already) if isinstance(time_series, basestring): time_series = self.get_data(key[0], key[1], key[2]) if self.sort_string is not None: time_series.sort(key=eval(self.sort_string)) # Construct result directory result_path = result_dir + os.sep + "data" + "_run%s" % key[0] if not os.path.exists(result_path): os.mkdir(result_path) key_str = "_sp%s_%s" % key[1:] # Store data depending on the desired format if s_format in ["pickle", "cpickle", "cPickle"]: result_file = open( os.path.join(result_path, name + key_str + ".pickle"), "w") cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL) elif s_format in ["text", "csv"]: self.update_meta_data({ "type": "stream", "marker_column": "marker" }) result_file = open( os.path.join(result_path, name + key_str + ".csv"), "w") csvwriter = csv.writer(result_file) channel_names = copy.deepcopy(time_series[0][0].channel_names) if s_format == "csv": channel_names.append("marker") csvwriter.writerow(channel_names) for (data, key) in time_series: if s_format == "text": numpy.savetxt(result_file, data, delimiter=",", fmt=s_type) if not key is None: result_file.write(str(key)) result_file.flush() elif data.marker_name is not None \ and len(data.marker_name) > 0: result_file.write(str(data.marker_name)) result_file.flush() else: first_line = True marker = "" if not key is None: marker = str(key) elif data.marker_name is not None \ and len(data.marker_name) > 0: marker = str(data.marker_name) for line in data: l = list(line) l.append(marker) csvwriter.writerow(list(l)) if first_line: first_line = False marker = "" result_file.flush() elif s_format in ["mat"]: result_file = open( os.path.join(result_path, name + key_str + ".mat"), "w") # extract a first time series object to get meta data merged_time_series = time_series.pop(0)[0] # collect all important information in the collection_object collection_object = { "sampling_frequency": merged_time_series.sampling_frequency, "channel_names": merged_time_series.channel_names } # merge all data for (data, key) in time_series: merged_time_series = numpy.vstack( (merged_time_series, data)) collection_object["data"] = merged_time_series mdict = dict() mdict[name + key_str] = collection_object import scipy.io scipy.io.savemat(result_file, mdict=mdict) elif s_format in ["eeg"]: result_file = open( os.path.join(result_path, name + key_str + ".eeg"), "a+") result_file_mrk = open( os.path.join(result_path, name + key_str + ".vmrk"), "w") result_file_mrk.write( "Brain Vision Data Exchange Marker File, " "Version 1.0\n") result_file_mrk.write("; Data stored by pySPACE\n") result_file_mrk.write("[Common Infos]\n") result_file_mrk.write("Codepage=UTF-8\n") result_file_mrk.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_file_mrk.write("\n[Marker Infos]\n") markerno = 1 datapoint = 1 sf = None channel_names = None for t in time_series: if sf is None: sf = t[0].sampling_frequency if channel_names is None: channel_names = t[0].get_channel_names() for mrk in t[0].marker_name.keys(): for tm in t[0].marker_name[mrk]: result_file_mrk.write( str("Mk%d=Stimulus,%s,%d,1,0\n" % (markerno, mrk, datapoint + (tm * sf / 1000.0)))) markerno += 1 data_ = t[0].astype(numpy.int16) data_.tofile(result_file) datapoint += data_.shape[0] result_hdr = open( os.path.join(result_path, name + key_str + ".vhdr"), "w") result_hdr.write("Brain Vision Data Exchange Header " "File Version 1.0\n") result_hdr.write("; Data stored by pySPACE\n\n") result_hdr.write("[Common Infos]\n") result_hdr.write("Codepage=UTF-8\n") result_hdr.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_hdr.write("MarkerFile=%s\n" % str(name + key_str + ".vmrk")) result_hdr.write("DataFormat=BINARY\n") result_hdr.write("DataOrientation=MULTIPLEXED\n") result_hdr.write("NumberOfChannels=%d\n" % len(channel_names)) result_hdr.write("SamplingInterval=%d\n\n" % (1000000 / sf)) result_hdr.write("[Binary Infos]\n") result_hdr.write("BinaryFormat=INT_16\n\n") result_hdr.write("[Channel Infos]\n") # TODO: Add Resolutions to time_series # 0 = 0.1 [micro]V, # 1 = 0.5 [micro]V, # 2 = 10 [micro]V, # 3 = 152.6 [micro]V (seems to be unused!) resolutions_str = [ unicode("0.1,%sV" % unicode(u"\u03BC")), unicode("0.5,%sV" % unicode(u"\u03BC")), unicode("10,%sV" % unicode(u"\u03BC")), unicode("152.6,%sV" % unicode(u"\u03BC")) ] for i in range(len(channel_names)): result_hdr.write( unicode("Ch%d=%s,,%s\n" % (i + 1, channel_names[i], unicode(resolutions_str[0]))).encode('utf-8')) else: NotImplementedError("Using unavailable storage format:%s!" % s_format) result_file.close() self.update_meta_data({ "channel_names": copy.deepcopy(time_series[0][0].channel_names), "sampling_frequency": time_series[0][0].sampling_frequency }) #Store meta data BaseDataset.store_meta_data(result_dir, self.meta_data)
def store(self, result_dir, s_format="pickle"): """ Stores this collection in the directory *result_dir*. In contrast to *dump* this method stores the collection not in a single file but as a whole directory structure with meta information etc. The data sets are stored separately for each run, split, train/test combination. **Parameters** :result_dir: The directory in which the collection will be stored. :name: The prefix of the file names in which the individual data sets are stored. The actual file names are determined by appending suffixes that encode run, split, train/test information. (*optional, default: "time_series"*) :s_format: The format in which the actual data sets should be stored. Possible formats are 'pickle', 'text', 'csv' and 'mat' (matlab) format. If s_format is a list, the second element further specifies additional options for storing. - pickle: Standard Python format - text: In the text format, all time series objects are concatenated to a single large table containing only integer values. - csv: For the csv format comma separated values are taken as default or a specified Python format string. - mat: Scipy's savemat function is used for storing. Thereby the data is stored as 3 dimensional array. Also meta data information, like sampling frequency and channel names are saved. As an additional parameter the orientation of the data arrays can be given as 'channelXtime' or 'timeXchannel' .. note:: For the text and MATLAB format, markers could be added by using a Marker_To_Mux node before (*optional, default: "pickle"*) .. todo:: Put marker to the right time point and also write marker channel. .. todo:: Shouldn't be 'text' and 'csv' format part of the stream data set?! """ name = "time_series" # for some storage procedures we need further specifications s_type = None if type(s_format) == list: # file format is first position f_format = s_format[0] if len(s_format) > 1: s_type = s_format[1] else: f_format = s_format if f_format == "text" and s_type is None: s_type = "%i" elif f_format == "csv" and s_type == "real": s_type = "%.18e" # Update the meta data author = get_author() self.update_meta_data({"type": "time_series", "storage_format": s_format, "author": author, "data_pattern": "data_run" + os.sep + name + "_sp_tt." + f_format}) # Iterate through splits and runs in this dataset for key, time_series in self.data.iteritems(): # load data, if necessary # (due to the lazy loading, the data might be not loaded already) if isinstance(time_series, basestring): time_series = self.get_data(key[0], key[1], key[2]) if self.sort_string is not None: time_series.sort(key=eval(self.sort_string)) # Construct result directory result_path = result_dir + os.sep + "data" + "_run%s" % key[0] if not os.path.exists(result_path): os.mkdir(result_path) key_str = "_sp%s_%s" % key[1:] # Store data depending on the desired format if f_format in ["pickle", "cpickle", "cPickle"]: result_file = open(os.path.join(result_path, name+key_str+".pickle"), "w") cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL) result_file.close() elif f_format in ["text","csv"]: self.update_meta_data({ "type": "stream", "marker_column": "marker"}) result_file = open(os.path.join(result_path, name + key_str + ".csv"), "w") csvwriter = csv.writer(result_file) channel_names = copy.deepcopy(time_series[0][0].channel_names) if f_format == "csv": channel_names.append("marker") csvwriter.writerow(channel_names) for (data, key) in time_series: if f_format == "text": numpy.savetxt(result_file, data, delimiter=",", fmt=s_type) if not key is None: result_file.write(str(key)) result_file.flush() elif data.marker_name is not None \ and len(data.marker_name) > 0: result_file.write(str(data.marker_name)) result_file.flush() else: first_line = True marker = "" if not key is None: marker = str(key) elif data.marker_name is not None \ and len(data.marker_name) > 0: marker = str(data.marker_name) for line in data: l = list(line) l.append(marker) csvwriter.writerow(list(l)) if first_line: first_line = False marker = "" result_file.flush() result_file.close() elif f_format in ["matlab", "mat", "MATLAB"]: # todo: handle all the other attributes of ts objects! import scipy.io result_file_name = os.path.join(result_path, name + key_str + ".mat") # extract a first time series object to get meta data ts1 = time_series[0][0] # collect all important information in the collection_object dataset_dict = { "sampling_frequency": ts1.sampling_frequency, "channel_names": ts1.channel_names} # we have to extract the data and labels separatly if 'channelXtime' in s_format: dataset_dict["data"] = [data.T for data, _ in time_series] else: dataset_dict["data"] = [data for data, _ in time_series] dataset_dict["labels"] = [label for _, label in time_series] # construct numpy 3d array (e.g., channelXtimeXtrials) dataset_dict["data"] = numpy.rollaxis(numpy.array( dataset_dict["data"]), 0, 3) scipy.io.savemat(result_file_name, mdict=dataset_dict) elif f_format in ["bp_eeg"]: result_file = open(os.path.join(result_path, name + key_str + ".eeg"),"a+") result_file_mrk = open(os.path.join(result_path, name + key_str + ".vmrk"),"w") result_file_mrk.write("Brain Vision Data Exchange Marker File, " "Version 1.0\n") result_file_mrk.write("; Data stored by pySPACE\n") result_file_mrk.write("[Common Infos]\n") result_file_mrk.write("Codepage=UTF-8\n") result_file_mrk.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_file_mrk.write("\n[Marker Infos]\n") markerno = 1 datapoint = 1 sf = None channel_names = None for t in time_series: if sf is None: sf = t[0].sampling_frequency if channel_names is None: channel_names = t[0].get_channel_names() for mrk in t[0].marker_name.keys(): for tm in t[0].marker_name[mrk]: result_file_mrk.write(str("Mk%d=Stimulus,%s,%d,1,0\n" % (markerno, mrk, datapoint+(tm*sf/1000.0)))) markerno += 1 data_ = t[0].astype(numpy.int16) data_.tofile(result_file) datapoint += data_.shape[0] result_hdr = open(os.path.join(result_path, name + key_str + ".vhdr"),"w") result_hdr.write("Brain Vision Data Exchange Header " "File Version 1.0\n") result_hdr.write("; Data stored by pySPACE\n\n") result_hdr.write("[Common Infos]\n") result_hdr.write("Codepage=UTF-8\n") result_hdr.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_hdr.write("MarkerFile=%s\n" % str(name + key_str + ".vmrk")) result_hdr.write("DataFormat=BINARY\n") result_hdr.write("DataOrientation=MULTIPLEXED\n") result_hdr.write("NumberOfChannels=%d\n" % len(channel_names)) result_hdr.write("SamplingInterval=%d\n\n" % (1000000/sf)) result_hdr.write("[Binary Infos]\n") result_hdr.write("BinaryFormat=INT_16\n\n") result_hdr.write("[Channel Infos]\n") # TODO: Add Resolutions to time_series # 0 = 0.1 [micro]V, # 1 = 0.5 [micro]V, # 2 = 10 [micro]V, # 3 = 152.6 [micro]V (seems to be unused!) resolutions_str = [unicode("0.1,%sV" % unicode(u"\u03BC")), unicode("0.5,%sV" % unicode(u"\u03BC")), unicode("10,%sV" % unicode(u"\u03BC")), unicode("152.6,%sV" % unicode(u"\u03BC"))] for i in range(len(channel_names)): result_hdr.write(unicode("Ch%d=%s,,%s\n" % (i+1,channel_names[i], unicode(resolutions_str[0]))).encode('utf-8')) result_file.close() else: NotImplementedError("Using unavailable storage format:%s!" % f_format) self.update_meta_data({ "channel_names": copy.deepcopy(time_series[0][0].channel_names), "sampling_frequency": time_series[0][0].sampling_frequency }) #Store meta data BaseDataset.store_meta_data(result_dir, self.meta_data)