def validateLabels(self): """ This is run at the end of the label combining process (see scripts/combine_labels.py) to validate the resulting ground truth windows, specifically that they are distinct (unique, non-overlapping). """ with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue num_windows = len(self.windows[relativePath]) if num_windows > 1: if not all( [(self.windows[relativePath][i + 1][0] - self.windows[relativePath][i][1]).total_seconds() >= 0 for i in xrange(num_windows - 1)]): raise ValueError("In the label file %s, windows overlap." % self.path)
def getWindows(self): """ Get windows as dictionaries with key value pairs of a relative path and its corresponding list of windows. """ def found(t, data): f = data["timestamp"][data["timestamp"] == pandas.tslib.Timestamp(t)] exists = (len(f) == 1) if not exists: print t, "doesn't exist" return exists with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue data = self.corpus.dataFiles[relativePath].data timestamps = list(itertools.chain(windows[relativePath]))[0] if not all(map((lambda t: found(t, data)), timestamps)): raise ValueError("timestamp listed in labels doesn't exist in file")
def getWindows(self): """ Read JSON label file. Get timestamps as dictionaries with key:value pairs of a relative path and its corresponding list of windows. """ def found(t, data): f = data["timestamp"][data["timestamp"] == pandas.tslib.Timestamp(t)] exists = (len(f) == 1) return exists with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue data = self.corpus.dataFiles[relativePath].data if "raw" in self.path: timestamps = windows[relativePath] else: timestamps = list(itertools.chain.from_iterable(windows[relativePath]))
def getWindows(self): """ Get windows as dictionaries with key value pairs of a relative path and its corresponding list of windows. """ with open(os.path.join(self.labelDir, "corpus_windows.json")) as windowFile: windows = json.load(windowFile) self.rawWindows = windows self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath])
def getWindows(self): """ Read JSON label file. Get timestamps as dictionaries with key:value pairs of a relative path and its corresponding list of windows. """ def found(t, data): f = data["timestamp"][data["timestamp"] == pandas.tslib.Timestamp( t)] exists = (len(f) == 1) return exists with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) print(windows) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue data = self.corpus.dataFiles[relativePath].data if "raw" in self.path: timestamps = windows[relativePath] else: timestamps = list( itertools.chain.from_iterable(windows[relativePath])) # Check that timestamps are present in dataset if not all([found(t, data) for t in timestamps]): raise ValueError( "In the label file %s, one of the timestamps used for " "the datafile %s doesn't match; it does not exist in " "the file. Timestamps in json label files have to " "exactly match timestamps in corresponding datafiles." % (self.path, relativePath))
def getWindows(self): """ Read JSON label file. Get windows as dictionaries with key value pairs of a relative path and its corresponding list of windows. """ def found(t, data): f = data["timestamp"][data["timestamp"] == pandas.tslib.Timestamp(t)] exists = (len(f) == 1) if not exists: print t, "doesn't exist" return exists with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue data = self.corpus.dataFiles[relativePath].data timestamps = list(itertools.chain(windows[relativePath]))[0] if not all(map((lambda t: found(t, data)), timestamps)): raise ValueError("In the label file %s, one of the timestamps used for " "the datafile %s doesn't exist in the file itself. " "Timestamps in json label files have to exactly match " "timestamps in corresponding datafiles." % (self.path,relativePath) )
def getWindows(self): """ Read JSON label file. Get timestamps as dictionaries with key:value pairs of a relative path and its corresponding list of windows. """ def found(t, data): f = data["timestamp"][data["timestamp"] == pandas.tslib.Timestamp( t)] exists = (len(f) == 1) return exists with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue
def getWindows(self): """ Read JSON label file. Get timestamps as dictionaries with key:value pairs of a relative path and its corresponding list of windows. """ def found(t, data): f = data["timestamp"][data["timestamp"] == pandas.tslib.Timestamp(t)] exists = (len(f) == 1) return exists with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue data = self.corpus.dataFiles[relativePath].data if "raw" in self.path: timestamps = windows[relativePath] else: timestamps = list(itertools.chain.from_iterable(windows[relativePath])) # Check that timestamps are present in dataset if not all([found(t,data) for t in timestamps]): raise ValueError("In the label file %s, one of the timestamps used for " "the datafile %s doesn't match; it does not exist in " "the file. Timestamps in json label files have to " "exactly match timestamps in corresponding datafiles." % (self.path, relativePath))
def validateLabels(self): """ This is run at the end of the label combining process (see scripts/combine_labels.py) to validate the resulting ground truth windows, specifically that they are distinct (unique, non-overlapping). """ with open(os.path.join(self.path)) as windowFile: windows = json.load(windowFile) self.windows = {} for relativePath in windows.keys(): self.windows[relativePath] = deepmap(strp, windows[relativePath]) if len(self.windows[relativePath]) == 0: continue num_windows = len(self.windows[relativePath]) if num_windows > 1: if not all([(self.windows[relativePath][i+1][0] - self.windows[relativePath][i][1]).total_seconds() >= 0 for i in xrange(num_windows-1)]): raise ValueError("In the label file %s, windows overlap." % self.path)