Пример #1
0
    def store(self, result_dir, s_format="pickle"):
        """ Stores this collection in the directory *result_dir*.
        
        In contrast to *dump* this method stores the collection
        not in a single file but as a whole directory structure with meta
        information etc. The data sets are stored separately for each run, 
        split, train/test combination.
        
        **Parameters**
        
          :result_dir:
              The directory in which the collection will be stored.
              
          :name:
              The prefix of the file names in which the individual data sets are 
              stored. The actual file names are determined by appending suffixes
              that encode run, split, train/test information. 
              
              (*optional, default: "time_series"*)
              
          :format:
              The format in which the actual data sets should be stored.
              
              Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat)
              format.

              In the MATLAB and text format, all time series objects are
              concatenated to a single large table containing only integer
              values.
              For the csv format comma separated values are taken as default
              or a specified Python format string.
              
              The MATLAB format is a struct that contains the data, the
              sampling frequency and the channel names.
              
              .. note:: For the text and MATLAB format, markers could be added 
                        by using a Marker_To_Mux node before
              
              (*optional, default: "pickle"*)

        .. todo:: Put marker to the right time point and also write marker channel.
        """
        name = "time_series"
        if type(s_format) == list:
            s_type = s_format[1]
            s_format = s_format[0]
        else:
            s_type = "%.18e"
        if s_format in ["text", "matlab"]:
            s_type = "%i"
        if s_format == "csv" and s_type == "real":
            s_type = "%.18e"
        # Update the meta data
        try:
            author = pwd.getpwuid(os.getuid())[4]
        except Exception:
            author = "unknown"
            self._log("Author could not be resolved.", level=logging.WARNING)
        self.update_meta_data({"type": "time_series",
                               "storage_format": s_format,
                               "author": author,
                               "data_pattern": "data_run" + os.sep 
                                               + name + "_sp_tt." + s_format})

        # Iterate through splits and runs in this dataset
        for key, time_series in self.data.iteritems():
            # load data, if necessary 
            # (due to the  lazy loading, the data might be not loaded already)
            if isinstance(time_series, basestring):
                time_series = self.get_data(key[0], key[1], key[2])
            if self.sort_string is not None:
                time_series.sort(key=eval(self.sort_string))
            # Construct result directory
            result_path = result_dir + os.sep + "data" + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)
            
            key_str = "_sp%s_%s" % key[1:]
            # Store data depending on the desired format
            if s_format in ["pickle", "cpickle", "cPickle"]:
                result_file = open(os.path.join(result_path,
                                                name+key_str+".pickle"), "w")
                cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL)
            elif s_format in ["text","csv"]:
                self.update_meta_data({
                    "type": "stream",
                    "marker_column": "marker"})
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".csv"), "w")
                csvwriter = csv.writer(result_file)
                channel_names = copy.deepcopy(time_series[0][0].channel_names)
                if s_format == "csv":
                    channel_names.append("marker")
                csvwriter.writerow(channel_names)
                for (data, key) in time_series:
                    if s_format == "text":
                        numpy.savetxt(result_file, data, delimiter=",", fmt=s_type)
                        if not key is None:
                            result_file.write(str(key))
                            result_file.flush()
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            result_file.write(str(data.marker_name))
                            result_file.flush()
                    else:
                        first_line = True
                        marker = ""
                        if not key is None:
                            marker = str(key)
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            marker = str(data.marker_name)
                        for line in data:
                            l = list(line)
                            l.append(marker)
                            csvwriter.writerow(list(l))
                            if first_line:
                                first_line = False
                                marker = ""
                        result_file.flush()
            elif s_format in ["mat"]:
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".mat"),"w")
                # extract a first time series object to get meta data 
                merged_time_series = time_series.pop(0)[0]
                # collect all important information in the collection_object
                collection_object = {
                    "sampling_frequency": merged_time_series.sampling_frequency,
                    "channel_names": merged_time_series.channel_names}

                # merge all data 
                for (data,key) in time_series:
                    merged_time_series = numpy.vstack((merged_time_series,
                                                       data))
                collection_object["data"] = merged_time_series 
                mdict = dict()
                mdict[name + key_str] = collection_object 
                import scipy.io
                scipy.io.savemat(result_file, mdict=mdict)
            else:
                NotImplementedError("Using unavailable storage format:%s!"
                                    % s_format)
            result_file.close()
        self.update_meta_data({
            "channel_names": copy.deepcopy(time_series[0][0].channel_names),
            "sampling_frequency": time_series[0][0].sampling_frequency
        })
        #Store meta data
        BaseDataset.store_meta_data(result_dir, self.meta_data)
Пример #2
0
    def store(self, result_dir, s_format="pickle"):
        """ Stores this collection in the directory *result_dir*.
        
        In contrast to *dump* this method stores the collection
        not in a single file but as a whole directory structure with meta
        information etc. The data sets are stored separately for each run, 
        split, train/test combination.
        
        **Parameters**
        
          :result_dir:
              The directory in which the collection will be stored.
              
          :name:
              The prefix of the file names in which the individual data sets are 
              stored. The actual file names are determined by appending suffixes
              that encode run, split, train/test information. 
              
              (*optional, default: "time_series"*)
              
          :format:
              The format in which the actual data sets should be stored.
              
              Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat)
              format.

              In the MATLAB and text format, all time series objects are
              concatenated to a single large table containing only integer
              values.
              For the csv format comma separated values are taken as default
              or a specified Python format string.
              
              The MATLAB format is a struct that contains the data, the
              sampling frequency and the channel names.
              
              .. note:: For the text and MATLAB format, markers could be added 
                        by using a Marker_To_Mux node before
              
              (*optional, default: "pickle"*)

        .. todo:: Put marker to the right time point and also write marker channel.
        """
        name = "time_series"
        if type(s_format) == list:
            s_type = s_format[1]
            s_format = s_format[0]
        else:
            s_type = "%.18e"
        if s_format in ["text", "matlab"]:
            s_type = "%i"
        if s_format == "csv" and s_type == "real":
            s_type = "%.18e"
        # Update the meta data
        author = get_author()
        self.update_meta_data({
            "type":
            "time_series",
            "storage_format":
            s_format,
            "author":
            author,
            "data_pattern":
            "data_run" + os.sep + name + "_sp_tt." + s_format
        })

        # Iterate through splits and runs in this dataset
        for key, time_series in self.data.iteritems():
            # load data, if necessary
            # (due to the  lazy loading, the data might be not loaded already)
            if isinstance(time_series, basestring):
                time_series = self.get_data(key[0], key[1], key[2])
            if self.sort_string is not None:
                time_series.sort(key=eval(self.sort_string))
            # Construct result directory
            result_path = result_dir + os.sep + "data" + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)

            key_str = "_sp%s_%s" % key[1:]
            # Store data depending on the desired format
            if s_format in ["pickle", "cpickle", "cPickle"]:
                result_file = open(
                    os.path.join(result_path, name + key_str + ".pickle"), "w")
                cPickle.dump(time_series, result_file,
                             cPickle.HIGHEST_PROTOCOL)
            elif s_format in ["text", "csv"]:
                self.update_meta_data({
                    "type": "stream",
                    "marker_column": "marker"
                })
                result_file = open(
                    os.path.join(result_path, name + key_str + ".csv"), "w")
                csvwriter = csv.writer(result_file)
                channel_names = copy.deepcopy(time_series[0][0].channel_names)
                if s_format == "csv":
                    channel_names.append("marker")
                csvwriter.writerow(channel_names)
                for (data, key) in time_series:
                    if s_format == "text":
                        numpy.savetxt(result_file,
                                      data,
                                      delimiter=",",
                                      fmt=s_type)
                        if not key is None:
                            result_file.write(str(key))
                            result_file.flush()
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            result_file.write(str(data.marker_name))
                            result_file.flush()
                    else:
                        first_line = True
                        marker = ""
                        if not key is None:
                            marker = str(key)
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            marker = str(data.marker_name)
                        for line in data:
                            l = list(line)
                            l.append(marker)
                            csvwriter.writerow(list(l))
                            if first_line:
                                first_line = False
                                marker = ""
                        result_file.flush()
            elif s_format in ["mat"]:
                result_file = open(
                    os.path.join(result_path, name + key_str + ".mat"), "w")
                # extract a first time series object to get meta data
                merged_time_series = time_series.pop(0)[0]
                # collect all important information in the collection_object
                collection_object = {
                    "sampling_frequency":
                    merged_time_series.sampling_frequency,
                    "channel_names": merged_time_series.channel_names
                }

                # merge all data
                for (data, key) in time_series:
                    merged_time_series = numpy.vstack(
                        (merged_time_series, data))
                collection_object["data"] = merged_time_series
                mdict = dict()
                mdict[name + key_str] = collection_object
                import scipy.io
                scipy.io.savemat(result_file, mdict=mdict)
            elif s_format in ["eeg"]:

                result_file = open(
                    os.path.join(result_path, name + key_str + ".eeg"), "a+")
                result_file_mrk = open(
                    os.path.join(result_path, name + key_str + ".vmrk"), "w")

                result_file_mrk.write(
                    "Brain Vision Data Exchange Marker File, "
                    "Version 1.0\n")
                result_file_mrk.write("; Data stored by pySPACE\n")
                result_file_mrk.write("[Common Infos]\n")
                result_file_mrk.write("Codepage=UTF-8\n")
                result_file_mrk.write("DataFile=%s\n" %
                                      str(name + key_str + ".eeg"))
                result_file_mrk.write("\n[Marker Infos]\n")

                markerno = 1
                datapoint = 1
                sf = None
                channel_names = None

                for t in time_series:
                    if sf is None:
                        sf = t[0].sampling_frequency
                    if channel_names is None:
                        channel_names = t[0].get_channel_names()
                    for mrk in t[0].marker_name.keys():
                        for tm in t[0].marker_name[mrk]:
                            result_file_mrk.write(
                                str("Mk%d=Stimulus,%s,%d,1,0\n" %
                                    (markerno, mrk, datapoint +
                                     (tm * sf / 1000.0))))
                            markerno += 1
                    data_ = t[0].astype(numpy.int16)
                    data_.tofile(result_file)
                    datapoint += data_.shape[0]

                result_hdr = open(
                    os.path.join(result_path, name + key_str + ".vhdr"), "w")

                result_hdr.write("Brain Vision Data Exchange Header "
                                 "File Version 1.0\n")
                result_hdr.write("; Data stored by pySPACE\n\n")
                result_hdr.write("[Common Infos]\n")
                result_hdr.write("Codepage=UTF-8\n")
                result_hdr.write("DataFile=%s\n" %
                                 str(name + key_str + ".eeg"))
                result_hdr.write("MarkerFile=%s\n" %
                                 str(name + key_str + ".vmrk"))
                result_hdr.write("DataFormat=BINARY\n")
                result_hdr.write("DataOrientation=MULTIPLEXED\n")
                result_hdr.write("NumberOfChannels=%d\n" % len(channel_names))
                result_hdr.write("SamplingInterval=%d\n\n" % (1000000 / sf))
                result_hdr.write("[Binary Infos]\n")
                result_hdr.write("BinaryFormat=INT_16\n\n")
                result_hdr.write("[Channel Infos]\n")

                # TODO: Add Resolutions to time_series
                # 0 = 0.1 [micro]V,
                # 1 = 0.5 [micro]V,
                # 2 = 10 [micro]V,
                # 3 = 152.6 [micro]V (seems to be unused!)
                resolutions_str = [
                    unicode("0.1,%sV" % unicode(u"\u03BC")),
                    unicode("0.5,%sV" % unicode(u"\u03BC")),
                    unicode("10,%sV" % unicode(u"\u03BC")),
                    unicode("152.6,%sV" % unicode(u"\u03BC"))
                ]
                for i in range(len(channel_names)):
                    result_hdr.write(
                        unicode("Ch%d=%s,,%s\n" %
                                (i + 1, channel_names[i],
                                 unicode(resolutions_str[0]))).encode('utf-8'))
            else:
                NotImplementedError("Using unavailable storage format:%s!" %
                                    s_format)
            result_file.close()
        self.update_meta_data({
            "channel_names":
            copy.deepcopy(time_series[0][0].channel_names),
            "sampling_frequency":
            time_series[0][0].sampling_frequency
        })
        #Store meta data
        BaseDataset.store_meta_data(result_dir, self.meta_data)
Пример #3
0
    def store(self, result_dir, s_format="pickle"):
        """ Stores this collection in the directory *result_dir*.
        
        In contrast to *dump* this method stores the collection
        not in a single file but as a whole directory structure with meta
        information etc. The data sets are stored separately for each run, 
        split, train/test combination.
        
        **Parameters**
        
          :result_dir:
              The directory in which the collection will be stored.
              
          :name:
              The prefix of the file names in which the individual data sets are 
              stored. The actual file names are determined by appending suffixes
              that encode run, split, train/test information. 
              
              (*optional, default: "time_series"*)
              
          :format:
              The format in which the actual data sets should be stored.
              
              Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat)
              format.

              In the MATLAB and text format, all time series objects are
              concatenated to a single large table containing only integer
              values.
              For the csv format comma separated values are taken as default
              or a specified Python format string.
              
              The MATLAB format is a struct that contains the data, the
              sampling frequency and the channel names.
              
              .. note:: For the text and MATLAB format, markers could be added 
                        by using a Marker_To_Mux node before
              
              (*optional, default: "pickle"*)

        .. todo:: Put marker to the right time point and also write marker channel.
        """
        name = "time_series"
        if type(s_format) == list:
            s_type = s_format[1]
            s_format = s_format[0]
        else:
            s_type = "%.18e"
        if s_format in ["text", "matlab"]:
            s_type = "%i"
        if s_format == "csv" and s_type == "real":
            s_type = "%.18e"
        # Update the meta data
        author = get_author()
        self.update_meta_data({"type": "time_series",
                               "storage_format": s_format,
                               "author": author,
                               "data_pattern": "data_run" + os.sep 
                                               + name + "_sp_tt." + s_format})

        # Iterate through splits and runs in this dataset
        for key, time_series in self.data.iteritems():
            # load data, if necessary 
            # (due to the  lazy loading, the data might be not loaded already)
            if isinstance(time_series, basestring):
                time_series = self.get_data(key[0], key[1], key[2])
            if self.sort_string is not None:
                time_series.sort(key=eval(self.sort_string))
            # Construct result directory
            result_path = result_dir + os.sep + "data" + "_run%s" % key[0]
            if not os.path.exists(result_path):
                os.mkdir(result_path)
            
            key_str = "_sp%s_%s" % key[1:]
            # Store data depending on the desired format
            if s_format in ["pickle", "cpickle", "cPickle"]:
                result_file = open(os.path.join(result_path,
                                                name+key_str+".pickle"), "w")
                cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL)
            elif s_format in ["text","csv"]:
                self.update_meta_data({
                    "type": "stream",
                    "marker_column": "marker"})
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".csv"), "w")
                csvwriter = csv.writer(result_file)
                channel_names = copy.deepcopy(time_series[0][0].channel_names)
                if s_format == "csv":
                    channel_names.append("marker")
                csvwriter.writerow(channel_names)
                for (data, key) in time_series:
                    if s_format == "text":
                        numpy.savetxt(result_file, data, delimiter=",", fmt=s_type)
                        if not key is None:
                            result_file.write(str(key))
                            result_file.flush()
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            result_file.write(str(data.marker_name))
                            result_file.flush()
                    else:
                        first_line = True
                        marker = ""
                        if not key is None:
                            marker = str(key)
                        elif data.marker_name is not None \
                                and len(data.marker_name) > 0:
                            marker = str(data.marker_name)
                        for line in data:
                            l = list(line)
                            l.append(marker)
                            csvwriter.writerow(list(l))
                            if first_line:
                                first_line = False
                                marker = ""
                        result_file.flush()
            elif s_format in ["mat"]:
                result_file = open(os.path.join(result_path,
                                                name + key_str + ".mat"),"w")
                # extract a first time series object to get meta data 
                merged_time_series = time_series.pop(0)[0]
                # collect all important information in the collection_object
                collection_object = {
                    "sampling_frequency": merged_time_series.sampling_frequency,
                    "channel_names": merged_time_series.channel_names}

                # merge all data 
                for (data,key) in time_series:
                    merged_time_series = numpy.vstack((merged_time_series,
                                                       data))
                collection_object["data"] = merged_time_series 
                mdict = dict()
                mdict[name + key_str] = collection_object 
                import scipy.io
                scipy.io.savemat(result_file, mdict=mdict)
            elif s_format in ["eeg"]:

                result_file = open(os.path.join(result_path,
                                                name + key_str + ".eeg"),"a+")
                result_file_mrk = open(os.path.join(result_path,
                                                name + key_str + ".vmrk"),"w")

                result_file_mrk.write("Brain Vision Data Exchange Marker File, "
                                      "Version 1.0\n")
                result_file_mrk.write("; Data stored by pySPACE\n")
                result_file_mrk.write("[Common Infos]\n")
                result_file_mrk.write("Codepage=UTF-8\n")
                result_file_mrk.write("DataFile=%s\n" %
                                      str(name + key_str + ".eeg"))
                result_file_mrk.write("\n[Marker Infos]\n")

                markerno = 1
                datapoint = 1
                sf = None
                channel_names = None

                for t in time_series:
                    if sf is None:
                        sf = t[0].sampling_frequency
                    if channel_names is None:
                        channel_names = t[0].get_channel_names()
                    for mrk in t[0].marker_name.keys():
                        for tm in t[0].marker_name[mrk]:
                            result_file_mrk.write(str("Mk%d=Stimulus,%s,%d,1,0\n" %
                                (markerno, mrk, datapoint+(tm*sf/1000.0))))
                            markerno += 1
                    data_ = t[0].astype(numpy.int16)
                    data_.tofile(result_file)
                    datapoint += data_.shape[0]

                result_hdr = open(os.path.join(result_path,
                                                name + key_str + ".vhdr"),"w")

                result_hdr.write("Brain Vision Data Exchange Header "
                                 "File Version 1.0\n")
                result_hdr.write("; Data stored by pySPACE\n\n")
                result_hdr.write("[Common Infos]\n")
                result_hdr.write("Codepage=UTF-8\n")
                result_hdr.write("DataFile=%s\n" %
                                      str(name + key_str + ".eeg"))
                result_hdr.write("MarkerFile=%s\n" %
                                      str(name + key_str + ".vmrk"))
                result_hdr.write("DataFormat=BINARY\n")
                result_hdr.write("DataOrientation=MULTIPLEXED\n")
                result_hdr.write("NumberOfChannels=%d\n" % len(channel_names))
                result_hdr.write("SamplingInterval=%d\n\n" % (1000000/sf))
                result_hdr.write("[Binary Infos]\n")
                result_hdr.write("BinaryFormat=INT_16\n\n")
                result_hdr.write("[Channel Infos]\n")

                # TODO: Add Resolutions to time_series
                # 0 = 0.1 [micro]V,
                # 1 = 0.5 [micro]V,
                # 2 = 10 [micro]V,
                # 3 = 152.6 [micro]V (seems to be unused!)
                resolutions_str = [unicode("0.1,%sV" % unicode(u"\u03BC")),
                   unicode("0.5,%sV" % unicode(u"\u03BC")),
                   unicode("10,%sV" % unicode(u"\u03BC")),
                   unicode("152.6,%sV" % unicode(u"\u03BC"))]
                for i in range(len(channel_names)):
                    result_hdr.write(unicode("Ch%d=%s,,%s\n" %
                        (i+1,channel_names[i],
                        unicode(resolutions_str[0]))).encode('utf-8'))
            else:
                NotImplementedError("Using unavailable storage format:%s!"
                                    % s_format)
            result_file.close()
        self.update_meta_data({
            "channel_names": copy.deepcopy(time_series[0][0].channel_names),
            "sampling_frequency": time_series[0][0].sampling_frequency
        })
        #Store meta data
        BaseDataset.store_meta_data(result_dir, self.meta_data)