def test_sort(): dm = DataMatrix(length=2) dm.a = 'b', 'a' dm.b = 1, 0 dm.a = ops.sort(dm.a) check_col(dm.a, ['a', 'b']) check_col(dm.b, [1, 0]) dm = ops.sort(dm, by=dm.b) check_col(dm.a, ['b', 'a']) check_col(dm.b, [0, 1])
def check_mixedcolumn_sorting(): dm = DataMatrix(length=24) dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', '' ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ -INF, -INF, 1, 1, 1.1, 1.1, 2, 2, 2.1, 2.1, INF, INF, '', 'None', 'alpha', 'beta', None, None, None, None, NAN, NAN, NAN, NAN, ])
def check_nan_sort(): dm = DataMatrix(length=3, default_col_type=FloatColumn) dm.col1 = 2,np.nan,1 dm.col2 = 1,2,np.nan dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [np.nan, 1, 2]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [2, np.nan, 1]) check_col(dm.col2, [1, 2, np.nan]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [1, 2, np.nan]) check_integrity(dm)
def check_nan_sort(): dm = DataMatrix(length=3, default_col_type=FloatColumn) dm.col1 = 2, np.nan, 1 dm.col2 = 1, 2, np.nan dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [np.nan, 1, 2]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [2, np.nan, 1]) check_col(dm.col2, [1, 2, np.nan]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, np.nan]) check_col(dm.col2, [1, 2, np.nan]) check_integrity(dm)
def check_sort(col_type): dm = DataMatrix(length=3, default_col_type=col_type) dm.col1 = 3,2,1 dm.col2 = 1,2,3 dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [3, 2, 1]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [3, 2, 1]) check_col(dm.col2, [1, 2, 3]) dm.col2 = operations.sort(dm.col2, by=dm.col1) check_col(dm.col2, [3, 2, 1]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [1, 2, 3]) check_integrity(dm)
def check_sort(col_type): dm = DataMatrix(length=3, default_col_type=col_type) dm.col1 = 3, 2, 1 dm.col2 = 1, 2, 3 dm = operations.sort(dm, by=dm.col1) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [3, 2, 1]) dm = operations.sort(dm, by=dm.col2) check_col(dm.col1, [3, 2, 1]) check_col(dm.col2, [1, 2, 3]) dm.col2 = operations.sort(dm.col2, by=dm.col1) check_col(dm.col2, [3, 2, 1]) dm.col1 = operations.sort(dm.col1) dm.col2 = operations.sort(dm.col2) check_col(dm.col1, [1, 2, 3]) check_col(dm.col2, [1, 2, 3]) check_integrity(dm)
def check_intcolumn_sorting(): dm = DataMatrix(length=8, default_col_type=IntColumn) dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.8', ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ 1, 1, 1, 1, 2, 2, 2, 2 ])
def word_summary(dm): """ desc: Plots the mean pupil size for dark and bright words as a bar plot. The time window is indicated by the PEAKWIN constant. This data is also written to a .csv file. arguments: dm: type: DataMatrix """ dm = (dm.type == "light") | (dm.type == "dark") x = np.arange(dm.pupil.depth) sm = DataMatrix(length=len(dm.word.unique)) sm.word = 0 sm.type = 0 sm.pupil_win = FloatColumn sm.pupil_win_se = FloatColumn sm.pupil_full = FloatColumn sm.pupil_full_se = FloatColumn for i, w in enumerate(dm.word.unique): _dm = dm.word == w sm.word[i] = w sm.type[i] = (dm.word == w).type[0] sm.pupil_win[i], sm.pupil_win_se[i] = size_se(_dm, PEAKWIN[0], PEAKWIN[1]) sm.pupil_full[i], sm.pupil_full_se[i] = size_se(_dm) sm = operations.sort(sm, sm.pupil_win) io.writetxt(sm, "%s/word_summary.csv" % OUTPUT_FOLDER) plot.new(size=(4, 3)) dx = 0 for color, type_ in ((orange[1], "light"), (blue[1], "dark")): sm_ = sm.type == type_ x = np.arange(len(sm_)) plt.plot(sm_.pupil_win, "o-", color=color) if type_ == "dark": yerr = (np.zeros(len(sm_)), sm_.pupil_win_se) else: yerr = (sm_.pupil_win_se, np.zeros(len(sm_))) plt.errorbar(x, sm_.pupil_win, yerr=yerr, linestyle="", color=color, capsize=0) plt.xlim(-1, 33) plt.ylabel("Pupil size (normalized)") plt.xlabel("Word") plt.xticks([]) plot.save("word_summary")
def check_floatcolumn_sorting(): dm = DataMatrix(length=24, default_col_type=FloatColumn) with pytest.warns(UserWarning): dm.c = [ 1, '1', 2, '2', 1.1, '1.1', 2.1, '2.1', INF, -INF, 'inf', '-inf', NAN, NAN, 'nan', 'nan', None, None, None, None, 'alpha', 'beta', 'None', '' ] dm.c = ops.shuffle(dm.c) dm = ops.sort(dm, by=dm.c) check_col(dm.c, [ -INF, -INF, 1, 1, 1.1, 1.1, 2, 2, 2.1, 2.1, INF, INF, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, ])
def subject_summary(dm): """ desc: Plots the mean difference in pupil size between dark and bright trials for each participant as a bar plot. The time window is indicated by the PEAKWIN constant. This data is also written to a .csv file. arguments: dm: type: DataMatrix """ x = np.arange(len(dm.subject_nr.unique)) sm = DataMatrix(length=len(dm.subject_nr.unique)) sm.subject_nr = 0 sm.effect_win = FloatColumn sm.effect_win_se = FloatColumn sm.effect_full = FloatColumn sm.effect_full_se = FloatColumn for i, s in enumerate(dm.subject_nr.unique): _dm = dm.subject_nr == s sm.subject_nr[i] = s sm.effect_win[i], sm.effect_win_se[i] = effect_se(_dm, PEAKWIN[0], PEAKWIN[1]) sm.effect_full[i], sm.effect_full_se[i] = effect_se(_dm) sm = operations.sort(sm, by=sm.effect_win) plot.new(size=(4, 3)) plt.axhline(0, color="black") plt.plot(sm.effect_win, "o-", color=green[-1]) plt.errorbar(x, sm.effect_win, yerr=sm.effect_win_se, linestyle="", color=green[-1], capsize=0) plt.xlim(-1, 30) plt.ylabel("Pupil-size difference (normalized)") plt.xlabel("Participant") plt.xticks([]) plot.save("subject_summary") io.writetxt(sm, "%s/subject_summary.csv" % OUTPUT_FOLDER)
def _create_live_datamatrix(self): """ desc: Builds a live DataMatrix. That is, it takes the orignal DataMatrix and applies all the operations as specified. returns: desc: A live DataMatrix. type: DataMatrix """ if self.var.source == u'table': src_dm = self.dm else: from datamatrix import io src = self.experiment.pool[self.var.source_file] if src.endswith(u'.xlsx'): try: src_dm = io.readxlsx(src) except Exception as e: raise osexception(u'Failed to read .xlsx file: %s' % src, exception=e) else: try: src_dm = io.readtxt(src) except Exception as e: raise osexception(u'Failed to read text file (perhaps it has the wrong format or it is not utf-8 encoded): %s' % src, exception=e) for column_name in src_dm.column_names: if not self.syntax.valid_var_name(column_name): raise osexception( u'The loop table contains an invalid column name: 'u'\'%s\'' \ % column_name) # The number of repeats should be numeric. If not, then give an error. # This can also occur when generating a preview of a loop table if # repeat is variable. if not isinstance(self.var.repeat, (int, float)): raise osexception( u'Don\'t know how to generate a DataMatrix for "%s" repeats' \ % self.var.repeat) length = int(len(src_dm) * self.var.repeat) dm = DataMatrix(length=0) while len(dm) < length: i = min(length-len(dm), len(src_dm)) if self.var.order == u'random': dm <<= operations.shuffle(src_dm)[:i] else: dm <<= src_dm[:i] if self.var.order == u'random': dm = operations.shuffle(dm) if self.ef is not None: self.ef.dm = dm dm = self.ef.enforce() for cmd, arglist in self.operations: # The column name is always specified last, or not at all if arglist: try: colname = arglist[-1] col = dm[colname] except: raise osexception( u'Column %s does not exist' % arglist[-1]) if cmd == u'fullfactorial': dm = operations.fullfactorial(dm) elif cmd == u'shuffle': if not arglist: dm = operations.shuffle(dm) else: dm[colname] = operations.shuffle(col) elif cmd == u'shuffle_horiz': if not arglist: dm = operations.shuffle_horiz(dm) else: dm = operations.shuffle_horiz( *[dm[_colname] for _colname in arglist]) elif cmd == u'slice': self._require_arglist(cmd, arglist, minlen=2) dm = dm[arglist[0]: arglist[1]] elif cmd == u'sort': self._require_arglist(cmd, arglist) dm[colname] = operations.sort(col) elif cmd == u'sortby': self._require_arglist(cmd, arglist) dm = operations.sort(dm, by=col) elif cmd == u'reverse': if not arglist: dm = dm[::-1] else: dm[colname] = col[::-1] elif cmd == u'roll': self._require_arglist(cmd, arglist) steps = arglist[0] if not isinstance(steps, int): raise osexception(u'roll steps should be numeric') if len(arglist) == 1: dm = dm[-steps:] << dm[:-steps] else: dm[colname] = list(col[-steps:]) + list(col[:-steps]) elif cmd == u'weight': self._require_arglist(cmd, arglist) dm = operations.weight(col) return dm
def _create_live_datamatrix(self): """ desc: Builds a live DataMatrix. That is, it takes the orignal DataMatrix and applies all the operations as specified. returns: desc: A live DataMatrix. type: DataMatrix """ if self.var.source == u'table': src_dm = self.dm else: from datamatrix import io src = self.experiment.pool[self.var.source_file] if src.endswith(u'.xlsx'): try: src_dm = io.readxlsx(src) except Exception as e: raise osexception(u'Failed to read .xlsx file: %s' % src, exception=e) else: try: src_dm = io.readtxt(src) except Exception as e: raise osexception( u'Failed to read text file (perhaps it has the wrong format or it is not utf-8 encoded): %s' % src, exception=e) for column_name in src_dm.column_names: if not self.syntax.valid_var_name(column_name): raise osexception( u'The loop table contains an invalid column name: 'u'\'%s\'' \ % column_name) # The number of repeats should be numeric. If not, then give an error. # This can also occur when generating a preview of a loop table if # repeat is variable. if not isinstance(self.var.repeat, (int, float)): raise osexception( u'Don\'t know how to generate a DataMatrix for "%s" repeats' \ % self.var.repeat) length = int(len(src_dm) * self.var.repeat) dm = DataMatrix(length=0) while len(dm) < length: i = min(length - len(dm), len(src_dm)) if self.var.order == u'random': dm <<= operations.shuffle(src_dm)[:i] else: dm <<= src_dm[:i] if self.var.order == u'random': dm = operations.shuffle(dm) if self.ef is not None: self.ef.dm = dm dm = self.ef.enforce() for cmd, arglist in self.operations: # The column name is always specified last, or not at all if arglist: try: colname = arglist[-1] col = dm[colname] except: raise osexception(u'Column %s does not exist' % arglist[-1]) if cmd == u'fullfactorial': dm = operations.fullfactorial(dm) elif cmd == u'shuffle': if not arglist: dm = operations.shuffle(dm) else: dm[colname] = operations.shuffle(col) elif cmd == u'shuffle_horiz': if not arglist: dm = operations.shuffle_horiz(dm) else: dm = operations.shuffle_horiz( *[dm[_colname] for _colname in arglist]) elif cmd == u'slice': self._require_arglist(cmd, arglist, minlen=2) dm = dm[arglist[0]:arglist[1]] elif cmd == u'sort': self._require_arglist(cmd, arglist) dm[colname] = operations.sort(col) elif cmd == u'sortby': self._require_arglist(cmd, arglist) dm = operations.sort(dm, by=col) elif cmd == u'reverse': if not arglist: dm = dm[::-1] else: dm[colname] = col[::-1] elif cmd == u'roll': self._require_arglist(cmd, arglist) steps = arglist[0] if not isinstance(steps, int): raise osexception(u'roll steps should be numeric') if len(arglist) == 1: dm = dm[-steps:] << dm[:-steps] else: dm[colname] = list(col[-steps:]) + list(col[:-steps]) elif cmd == u'weight': self._require_arglist(cmd, arglist) dm = operations.weight(col) return dm
def _create_live_datamatrix(self): """ desc: Builds a live DataMatrix. That is, it takes the orignal DataMatrix and applies all the operations as specified. returns: desc: A live DataMatrix. type: DataMatrix """ if self.var.source == u'table': src_dm = self.dm else: from datamatrix import io src = self.experiment.pool[self.var.source_file] if src.endswith(u'.xlsx'): try: src_dm = io.readxlsx(src) except Exception as e: raise osexception(u'Failed to read .xlsx file: %s' % src, exception=e) else: try: src_dm = io.readtxt(src) except Exception as e: raise osexception(u'Failed to read text file: %s' % src, exception=e) length = int(len(src_dm) * self.var.repeat) dm = DataMatrix(length=0) while len(dm) < length: i = min(length-len(dm), len(src_dm)) if self.var.order == u'random': dm <<= operations.shuffle(src_dm)[:i] else: dm <<= src_dm[:i] if self.var.order == u'random': dm = operations.shuffle(dm) if self.ef is not None: self.ef.dm = dm dm = self.ef.enforce() for cmd, arglist in self.operations: # The column name is always specified last, or not at all if arglist: try: colname = arglist[-1] col = dm[colname] except: raise osexception( u'Column %s does not exist' % arglist[-1]) if cmd == u'fullfactorial': dm = operations.fullfactorial(dm) elif cmd == u'shuffle': if not arglist: dm = operations.shuffle(dm) else: dm[colname] = operations.shuffle(col) elif cmd == u'shuffle_horiz': if not arglist: dm = operations.shuffle_horiz(dm) else: dm = operations.shuffle_horiz( *[dm[_colname] for _colname in arglist]) elif cmd == u'slice': self._require_arglist(cmd, arglist, minlen=2) dm = dm[arglist[0]: arglist[1]] elif cmd == u'sort': self._require_arglist(cmd, arglist) dm[colname] = operations.sort(col) elif cmd == u'sortby': self._require_arglist(cmd, arglist) dm = operations.sort(dm, by=col) elif cmd == u'reverse': if not arglist: dm = dm[::-1] else: dm[colname] = col[::-1] elif cmd == u'roll': self._require_arglist(cmd, arglist) steps = arglist[0] if not isinstance(steps, int): raise osexception(u'roll steps should be numeric') if len(arglist) == 1: dm = dm[-steps:] << dm[:-steps] else: dm[colname] = list(col[-steps:]) + list(col[:-steps]) elif cmd == u'weight': self._require_arglist(cmd, arglist) dm = operations.weight(col) return dm
def _create_live_datamatrix(self): """ desc: Builds a live DataMatrix. That is, it takes the orignal DataMatrix and applies all the operations as specified. returns: desc: A live DataMatrix. type: DataMatrix """ if self.var.source == u'table': src_dm = self.dm else: from datamatrix import io src = self.experiment.pool[self.var.source_file] if src.endswith(u'.xlsx'): try: src_dm = io.readxlsx(src) except Exception as e: raise osexception(u'Failed to read .xlsx file: %s' % src, exception=e) else: try: src_dm = io.readtxt(src) except Exception as e: raise osexception(u'Failed to read text file: %s' % src, exception=e) length = int(len(src_dm) * self.var.repeat) dm = DataMatrix(length=0) while len(dm) < length: i = min(length - len(dm), len(src_dm)) if self.var.order == u'random': dm <<= operations.shuffle(src_dm)[:i] else: dm <<= src_dm[:i] if self.var.order == u'random': dm = operations.shuffle(dm) if self.ef is not None: self.ef.dm = dm dm = self.ef.enforce() for cmd, arglist in self.operations: # The column name is always specified last, or not at all if arglist: try: colname = arglist[-1] col = dm[colname] except: raise osexception(u'Column %s does not exist' % arglist[-1]) if cmd == u'fullfactorial': dm = operations.fullfactorial(dm) elif cmd == u'shuffle': if not arglist: dm = operations.shuffle(dm) else: dm[colname] = operations.shuffle(col) elif cmd == u'shuffle_horiz': if not arglist: dm = operations.shuffle_horiz(dm) else: dm = operations.shuffle_horiz( *[dm[_colname] for _colname in arglist]) elif cmd == u'slice': self._require_arglist(cmd, arglist, minlen=2) dm = dm[arglist[0]:arglist[1]] elif cmd == u'sort': self._require_arglist(cmd, arglist) dm[colname] = operations.sort(col) elif cmd == u'sortby': self._require_arglist(cmd, arglist) dm = operations.sort(dm, by=col) elif cmd == u'reverse': if not arglist: dm = dm[::-1] else: dm[colname] = col[::-1] elif cmd == u'roll': self._require_arglist(cmd, arglist) steps = arglist[0] if not isinstance(steps, int): raise osexception(u'roll steps should be numeric') if len(arglist) == 1: dm = dm[-steps:] << dm[:-steps] else: dm[colname] = list(col[-steps:]) + list(col[:-steps]) elif cmd == u'weight': self._require_arglist(cmd, arglist) dm = operations.weight(col) return dm
def _create_live_datamatrix(self): """ desc: Builds a live DataMatrix. That is, it takes the orignal DataMatrix and applies all the operations as specified. returns: desc: A live DataMatrix. type: DataMatrix """ src_dm = self.dm if self.var.source == u'table' else self._read_file() for column_name in src_dm.column_names: if not self.syntax.valid_var_name(column_name): raise osexception( u'The loop table contains an invalid column name: ' u'\'%s\'' % column_name) # The number of repeats should be numeric. If not, then give an error. # This can also occur when generating a preview of a loop table if # repeat is variable. if not isinstance(self.var.repeat, (int, float)): raise osexception( u'Don\'t know how to generate a DataMatrix for "%s" repeats' % self.var.repeat) length = int(len(src_dm) * self.var.repeat) dm = DataMatrix(length=0) while len(dm) < length: i = min(length - len(dm), len(src_dm)) if self.var.order == u'random': dm <<= operations.shuffle(src_dm)[:i] else: dm <<= src_dm[:i] if self.var.order == u'random': dm = operations.shuffle(dm) # Constraints come before loop operations if self._constraints: self.ef = Enforce(dm) for constraint_cls, colname, kwargs in self._constraints: self.ef.add_constraint(constraint_cls, cols=dm[colname], **kwargs) dm = self.ef.enforce() # Operations come last for cmd, arglist in self._operations: # The column name is always specified last, or not at all if arglist: try: colname = arglist[-1] col = dm[colname] except: raise osexception(u'Column %s does not exist' % arglist[-1]) if cmd == u'fullfactorial': dm = operations.fullfactorial(dm) elif cmd == u'shuffle': if not arglist: dm = operations.shuffle(dm) else: dm[colname] = operations.shuffle(col) elif cmd == u'shuffle_horiz': if not arglist: dm = operations.shuffle_horiz(dm) else: # There can be multiple column names, so we need to check # if all of them exist, rather than only the last one as # we did above. for _colname in arglist: try: dm[_colname] except: raise osexception(u'Column %s does not exist' % _colname) dm = operations.shuffle_horiz( *[dm[_colname] for _colname in arglist]) elif cmd == u'slice': self._require_arglist(cmd, arglist, minlen=2) dm = dm[arglist[0]:arglist[1]] elif cmd == u'sort': self._require_arglist(cmd, arglist) dm[colname] = operations.sort(col) elif cmd == u'sortby': self._require_arglist(cmd, arglist) dm = operations.sort(dm, by=col) elif cmd == u'reverse': if not arglist: dm = dm[::-1] else: dm[colname] = col[::-1] elif cmd == u'roll': self._require_arglist(cmd, arglist) steps = arglist[0] if not isinstance(steps, int): raise osexception(u'roll steps should be numeric') if len(arglist) == 1: dm = dm[-steps:] << dm[:-steps] else: dm[colname] = list(col[-steps:]) + list(col[:-steps]) elif cmd == u'weight': self._require_arglist(cmd, arglist) try: dm = operations.weight(col) except TypeError: raise osexception( u'weight values should be non-negative numeric values') return dm