def collectUserFeatures(data, user, featureFunction): directory = data + '/' + user features = [] for site in os.listdir(directory): path = directory + '/' + site stream = util.filterKeystrokes(util.openStream(path)) features.append(featureFunction(stream)) return chain(*features)
def readNewick(self, filename, readData=None): # use simple parsing if pyparsing is not available if pyparsing == None: return self.readBigNewick(filename) try: # default data reader if readData == None: readData = self.readData # get parse tree text = util.readUntil(util.openStream(filename), ";")[0] + ";" expr = newickParser(text)[0] # walk the parse tree and build the tree self.clear() def walk(expr): if isinstance(expr, pyparsing.ParseResults): # parse name if "name" in expr: node = TreeNode(expr["name"]) else: node = TreeNode(self.newName()) if "data" in expr: readData(node, expr["data"]) # recurse for child in expr: ret = walk(child) if ret: self.addChild(node, ret) return node self.root = walk(expr) # test for boot strap presence for node in self.nodes.itervalues(): if "boot" in node.data: self.defaultData["boot"] = 0 break self.setDefaultData() except RuntimeError: # try simpler parser return self.readBigNewick(filename)
def readTreeColorMap(filename): """Reads a tree colormap from a file""" infile = util.openStream(filename) maps = [] for line in infile: expr, red, green, blue = line.rstrip().split("\t") maps.append([expr, map(float, (red, green, blue))]) name2color = makeExprMapping(maps) def leafmap(node): return name2color(node.name) return treeColorMap(leafmap)
def extractFeatureVectors(data, statisticFn): userList = os.listdir(data) count = 0 for user in userList: # collect all segmented data userdir = data + '/' + user for site in os.listdir(userdir): path = userdir + '/' + site stream = util.filterKeystrokes(util.openStream(path)) sessions = util.segmentStream(stream, 600) for session in sessions: # Generate each feature anyEmptyFeatures = False vector = [count] for ind in FEATURE_INDICES: features = viz.collectFeatures(session, viz.FEATURES[ind]) #if len(features) < 2: # anyEmptyFeatures = True # break vector.extend(statisticFn(features)) if not anyEmptyFeatures: yield vector count += 1
def writeNewick(self, out=sys.stdout, writeData=None, oneline=False): """Write the tree in newick notation""" self.writeNewickNode(self.root, util.openStream(out, "w"), writeData=writeData, oneline=oneline)
def writeNewick(self, out = sys.stdout, writeData=None, oneline=False): """Write the tree in newick notation""" self.writeNewickNode(self.root, util.openStream(out, "w"), writeData=writeData, oneline=oneline)
def main(args): createDirectoryIfNotExist(args.odir) userList = os.listdir(args.dir) for user in userList: createDirectoryIfNotExist(args.odir + '/' + user) sites = os.listdir(args.dir + '/' + user) for site in sites: relName = user + '/' + site createDirectoryIfNotExist(args.odir + '/' + relName) stream = util.filterKeystrokes(util.openStream(args.dir + '/' + relName)) sessions = util.segmentStream(stream) # We want histograms of keystroke usage per user allLengths = list(util.getAllKeystrokeLengths(stream)) if len(allLengths) > 0: plt.clf() plt.hist(list(allLengths), 200) plt.savefig(args.odir + '/' + user + '/all-kl-' + site + '.png') for i in range(len(keyCombos)): lengths = list(util.getKeystrokeLengths(stream, keyCombos[i])) if len(lengths) > 0: plt.clf() plt.hist(list(lengths), 200) plt.savefig(args.odir + '/' + user + '/' + names[i] + '-kl-' + site + '.png') # Also want histograms of word data, per user wordData = zip(*chain(*[util.getWordData(s) for s in sessions])) if len(wordData) > 0 and len(wordData[0]) > 0: plt.clf() plt.hist(list(d for d in wordData[0] if abs(d) < 10), 200) plt.savefig(args.odir + '/' + user + '/word-dur-' + site + '.png') plt.clf() plt.hist(list(wordData[1]), 200) plt.savefig(args.odir + '/' + user + '/word-len-' + site + '.png') # Key overlaps #keyOverlaps = list(chain.from_iterable(util.getKeyOverlaps(s) for s in sessions)) keyOverlaps = list(x for x in util.getKeyOverlaps(stream) if abs(x) < 5) if len(keyOverlaps) > 0: plt.clf() plt.hist(keyOverlaps, 100) plt.savefig(args.odir + '/' + user + '/overlap-' + site + '.png') # Word pauses wordPauses = list(x for x in util.getWordPauses(stream) if abs(x) < 60) if len(wordPauses) > 0: plt.clf() plt.hist(wordPauses, 100) plt.savefig(args.odir + '/' + user + '/word-pause-' + site + '.png') # Time between shift key and modified key shiftTime = list(x for x in util.getModifierDelays(stream, util._SHIFT) if abs(x) < 5) if len(shiftTime) > 0: plt.clf() plt.hist(shiftTime, 100) plt.savefig(args.odir + '/' + user + '/shift-delay-' + site + '.png') # Time between shift-to-shift shiftShift= list(x for x in util.getModifierDelays(stream, util._SHIFT) if abs(x) < 1200) if len(shiftShift) > 0: plt.clf() plt.hist(shiftShift, 100) plt.savefig(args.odir + '/' + user + '/shift-shift-' + site + '.png')