def extractDialogWithSudachi(rootDir): outputDir = utils.getOutputPath(rootDir, "stats") tokenizer_obj = dictionary.Dictionary().create() mode = tokenizer.Tokenizer.SplitMode.C unigrams = [] bigrams = [] trigrams = [] fourgrams = [] POS_LIST = ["名詞", "動詞", "副詞", "形容詞", "連体詞", "形状詞"] for fn, fd in utils.loadFiles(rootDir): for line in fd: line = line.strip() wordList = [] for word in tokenizer_obj.tokenize(line, mode): if word.part_of_speech()[0] not in POS_LIST: continue wordList.append( (word.dictionary_form(), word.part_of_speech()[0])) print([ word.surface(), word.dictionary_form(), word.part_of_speech()[0] ]) unigrams.extend(getChunks(wordList, 1)) bigrams.extend(getChunks(wordList, 2)) trigrams.extend(getChunks(wordList, 3)) fourgrams.extend(getChunks(wordList, 4)) _output(outputDir, unigrams, bigrams, trigrams, fourgrams)
def extractDialog(rootDir): outputDir = utils.getOutputPath(rootDir, "dialog") for fn, fd in utils.loadFiles(rootDir): sentenceList = [] for line in fd: if ":「" not in line: continue line = line.split(":「")[1].rstrip().rstrip("」") for punctuation in NON_FINAL_PUNCTUATION: line = line.replace(punctuation, "") tmpSentenceList = [line] for punctuation in SENTENCE_FINAL_PUNCTUATION: subTmpSentenceList = [] for sentence in tmpSentenceList: subTmpSentenceList.extend(sentence.split(punctuation)) tmpSentenceList = [ line.strip() for line in subTmpSentenceList if line != "" ] sentenceList.extend(tmpSentenceList) with io.open(os.path.join(outputDir, fn), "w", encoding="utf-8") as fd: for line in sentenceList: fd.write(line + "\n")
def analyze(data_): rp.loginfo('Cloud received') # Extract data points, normals, npts = utils.extractLabeledCloud(data_.cloud) rp.loginfo('Retrieved %d pts', npts) # Compute DBSCAN rp.loginfo('...synthesizing grasping points') graspPts = [] minDataSize = 0.1 * npts for key in points: # filter classes with too few points classSize = len(points[key]) if (classSize < minDataSize) or (classSize < 10): continue # Get the actual data from the cloud positionData = np.array(points[key]) db = DBSCAN(eps=epsilon, min_samples=minPoints).fit(positionData) nclusters = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0) rp.loginfo('...label %d (%d pts), found %d clusters', key, len(positionData), nclusters) if nclusters > 0: # Synthesize the grasping points normalsData = np.array(normals[key]) graspPts = graspPts + synthesizePoints(data_.cloud.header.frame_id, positionData, normalsData, db.labels_, key) # Generate debug data if requested if debug: utils.plotClusters(utils.getOutputPath(), positionData, db.labels_, key, nclusters, palette) # Publish the synthesized grasping points rp.loginfo('...publishing %d grasping points', len(graspPts)) if debug: for gp in graspPts: rp.logdebug( '.....p=(%.3f, %.3f, %.3f) - n=(%.3f, %.3f, %.3f) - l=%d', gp.position.x, gp.position.y, gp.position.z, gp.normal.x, gp.normal.y, gp.normal.z, gp.label) msg = GraspingData() msg.graspingPoints = graspPts msg.boundingBoxMin = data_.boundingBoxMin msg.boundingBoxMax = data_.boundingBoxMax publisher.publish(msg)
def analyze(data_): rp.loginfo('Cloud received') # Extract data points, normals, npts = utils.extractLabeledCloud(data_.cloud) rp.loginfo('Retrieved %d pts', npts) # Compute DBSCAN rp.loginfo('...synthesizing grasping points') graspPts = [] minDataSize = 0.1 * npts for key in points: # filter classes with too few points classSize = len(points[key]) if (classSize < minDataSize) or (classSize < 10): continue # Get the actual data from the cloud positionData = np.array(points[key]) db = DBSCAN(eps=epsilon, min_samples=minPoints).fit(positionData) nclusters = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0) rp.loginfo('...label %d (%d pts), found %d clusters', key, len(positionData), nclusters) if nclusters > 0: # Synthesize the grasping points normalsData = np.array(normals[key]) graspPts = graspPts + synthesizePoints(data_.cloud.header.frame_id, positionData, normalsData, db.labels_, key) # Generate debug data if requested if debug: utils.plotClusters(utils.getOutputPath(), positionData, db.labels_, key, nclusters, palette) # Publish the synthesized grasping points rp.loginfo('...publishing %d grasping points', len(graspPts)) if debug: for gp in graspPts: rp.logdebug('.....p=(%.3f, %.3f, %.3f) - n=(%.3f, %.3f, %.3f) - l=%d', gp.position.x, gp.position.y, gp.position.z, gp.normal.x, gp.normal.y, gp.normal.z, gp.label) msg = GraspingData() msg.graspingPoints = graspPts msg.boundingBoxMin = data_.boundingBoxMin msg.boundingBoxMax = data_.boundingBoxMax publisher.publish(msg)
def extractDialogWithMecab(rootDir): outputDir = utils.getOutputPath(rootDir, "stats") parser = MeCab.Tagger("-Owakati") unigrams = [] bigrams = [] trigrams = [] fourgrams = [] for fn, fd in utils.loadFiles(rootDir): for line in fd: wordList = parser.parse(line).split() unigrams.extend(getChunks(wordList, 1)) bigrams.extend(getChunks(wordList, 2)) trigrams.extend(getChunks(wordList, 3)) fourgrams.extend(getChunks(wordList, 4)) _output(outputDir, unigrams, bigrams, trigrams, fourgrams)
def synthesizePoints(frameId_, points_, normals_, clusteringLabels_, index_): points = [] classes = set(clusteringLabels_) for cls in classes: if cls == -1: continue totalPts = len(points_) minPts = 0.1 * totalPts pts = points_[clusteringLabels_ == cls] if len(pts) < minPts: rp.loginfo('.......cluster discarded % 4d / %4d pts', len(pts), totalPts) continue rp.loginfo('.......cluster size: %4d pts', len(pts)) position = np.average(pts, axis=0) # normal = np.average(normals_[clusteringLabels_ == cls], axis=0) filteredNormals = filterByStd(normals_[clusteringLabels_ == cls]) normal = np.average(filteredNormals, axis=0) point = GraspingPoint() point.header.frame_id = frameId_ point.label = index_ point.position.x = position[0] point.position.y = position[1] point.position.z = position[2] point.normal.x = normal[0] point.normal.y = normal[1] point.normal.z = normal[2] points.append(point) if debug: labels = np.ones(len(points_)) * -1 labels[clusteringLabels_ == cls] = 1 utils.plotSelectedCluster(utils.getOutputPath(), points_, labels, index_, cls, 'set1') return points
def run(self): try: self.login() self.openTradebook() for period in self.report: month, year = (period.get("month"), period.get("year")) self.setTradebookDate(month, year) outputFolder = os.path.join( os.getcwd(), 'output', '{}_{}'.format(year, month)) if (not isDirectoryExists(outputFolder)): createDirectory(outputFolder) for k, v in SEGMENTS.items(): self.setTradebookSegment(v) sleep(5) isSaved = self.getSnapshot(getOutputPath( outputFolder, '{}.png'.format(k))) print("took snapshot for {} and saved={}".format(k, isSaved)) createPdf(outputFolder, "tradebook.pdf") finally: if(self.loggedIn): self.logout() self.closeDriver()