Пример #1
0
def extractDialogWithSudachi(rootDir):
    outputDir = utils.getOutputPath(rootDir, "stats")
    tokenizer_obj = dictionary.Dictionary().create()
    mode = tokenizer.Tokenizer.SplitMode.C

    unigrams = []
    bigrams = []
    trigrams = []
    fourgrams = []
    POS_LIST = ["名詞", "動詞", "副詞", "形容詞", "連体詞", "形状詞"]
    for fn, fd in utils.loadFiles(rootDir):
        for line in fd:
            line = line.strip()
            wordList = []
            for word in tokenizer_obj.tokenize(line, mode):
                if word.part_of_speech()[0] not in POS_LIST:
                    continue
                wordList.append(
                    (word.dictionary_form(), word.part_of_speech()[0]))
                print([
                    word.surface(),
                    word.dictionary_form(),
                    word.part_of_speech()[0]
                ])

            unigrams.extend(getChunks(wordList, 1))
            bigrams.extend(getChunks(wordList, 2))
            trigrams.extend(getChunks(wordList, 3))
            fourgrams.extend(getChunks(wordList, 4))

    _output(outputDir, unigrams, bigrams, trigrams, fourgrams)
def extractDialog(rootDir):

    outputDir = utils.getOutputPath(rootDir, "dialog")

    for fn, fd in utils.loadFiles(rootDir):
        sentenceList = []
        for line in fd:
            if ":「" not in line:
                continue
            line = line.split(":「")[1].rstrip().rstrip("」")

            for punctuation in NON_FINAL_PUNCTUATION:
                line = line.replace(punctuation, "")

            tmpSentenceList = [line]
            for punctuation in SENTENCE_FINAL_PUNCTUATION:
                subTmpSentenceList = []
                for sentence in tmpSentenceList:
                    subTmpSentenceList.extend(sentence.split(punctuation))
                tmpSentenceList = [
                    line.strip() for line in subTmpSentenceList if line != ""
                ]
            sentenceList.extend(tmpSentenceList)

        with io.open(os.path.join(outputDir, fn), "w", encoding="utf-8") as fd:
            for line in sentenceList:
                fd.write(line + "\n")
def analyze(data_):
    rp.loginfo('Cloud received')

    # Extract data
    points, normals, npts = utils.extractLabeledCloud(data_.cloud)
    rp.loginfo('Retrieved %d pts', npts)

    # Compute DBSCAN
    rp.loginfo('...synthesizing grasping points')
    graspPts = []
    minDataSize = 0.1 * npts
    for key in points:
        # filter classes with too few points
        classSize = len(points[key])
        if (classSize < minDataSize) or (classSize < 10):
            continue

        # Get the actual data from the cloud
        positionData = np.array(points[key])

        db = DBSCAN(eps=epsilon, min_samples=minPoints).fit(positionData)
        nclusters = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
        rp.loginfo('...label %d (%d pts), found %d clusters', key,
                   len(positionData), nclusters)

        if nclusters > 0:
            # Synthesize the grasping points
            normalsData = np.array(normals[key])
            graspPts = graspPts + synthesizePoints(data_.cloud.header.frame_id,
                                                   positionData, normalsData,
                                                   db.labels_, key)

            # Generate debug data if requested
            if debug:
                utils.plotClusters(utils.getOutputPath(), positionData,
                                   db.labels_, key, nclusters, palette)

    # Publish the synthesized grasping points
    rp.loginfo('...publishing %d grasping points', len(graspPts))

    if debug:
        for gp in graspPts:
            rp.logdebug(
                '.....p=(%.3f, %.3f, %.3f) - n=(%.3f, %.3f, %.3f) - l=%d',
                gp.position.x, gp.position.y, gp.position.z, gp.normal.x,
                gp.normal.y, gp.normal.z, gp.label)

    msg = GraspingData()
    msg.graspingPoints = graspPts
    msg.boundingBoxMin = data_.boundingBoxMin
    msg.boundingBoxMax = data_.boundingBoxMax
    publisher.publish(msg)
def analyze(data_):
	rp.loginfo('Cloud received')

	# Extract data
	points, normals, npts = utils.extractLabeledCloud(data_.cloud)
	rp.loginfo('Retrieved %d pts', npts)

	# Compute DBSCAN
	rp.loginfo('...synthesizing grasping points')
	graspPts = []
	minDataSize = 0.1 * npts
	for key in points:
		# filter classes with too few points
		classSize = len(points[key])
		if (classSize < minDataSize) or (classSize < 10):
			continue

		# Get the actual data from the cloud
		positionData = np.array(points[key])


		db = DBSCAN(eps=epsilon, min_samples=minPoints).fit(positionData)
		nclusters = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
		rp.loginfo('...label %d (%d pts), found %d clusters', key, len(positionData), nclusters)


		if nclusters > 0:
			# Synthesize the grasping points
			normalsData = np.array(normals[key])
			graspPts = graspPts + synthesizePoints(data_.cloud.header.frame_id, positionData, normalsData, db.labels_, key)

			# Generate debug data if requested
			if debug:
				utils.plotClusters(utils.getOutputPath(), positionData, db.labels_, key, nclusters, palette)


	# Publish the synthesized grasping points
	rp.loginfo('...publishing %d grasping points', len(graspPts))

	if debug:
		for gp in graspPts:
			rp.logdebug('.....p=(%.3f, %.3f, %.3f) - n=(%.3f, %.3f, %.3f) - l=%d', gp.position.x, gp.position.y, gp.position.z, gp.normal.x, gp.normal.y, gp.normal.z, gp.label)

	msg = GraspingData()
	msg.graspingPoints = graspPts
	msg.boundingBoxMin = data_.boundingBoxMin
	msg.boundingBoxMax = data_.boundingBoxMax
	publisher.publish(msg)
Пример #5
0
def extractDialogWithMecab(rootDir):

    outputDir = utils.getOutputPath(rootDir, "stats")
    parser = MeCab.Tagger("-Owakati")
    unigrams = []
    bigrams = []
    trigrams = []
    fourgrams = []
    for fn, fd in utils.loadFiles(rootDir):
        for line in fd:
            wordList = parser.parse(line).split()
            unigrams.extend(getChunks(wordList, 1))
            bigrams.extend(getChunks(wordList, 2))
            trigrams.extend(getChunks(wordList, 3))
            fourgrams.extend(getChunks(wordList, 4))

    _output(outputDir, unigrams, bigrams, trigrams, fourgrams)
def synthesizePoints(frameId_, points_, normals_, clusteringLabels_, index_):
    points = []

    classes = set(clusteringLabels_)
    for cls in classes:
        if cls == -1:
            continue

        totalPts = len(points_)
        minPts = 0.1 * totalPts
        pts = points_[clusteringLabels_ == cls]
        if len(pts) < minPts:
            rp.loginfo('.......cluster discarded % 4d / %4d pts', len(pts),
                       totalPts)
            continue

        rp.loginfo('.......cluster size: %4d pts', len(pts))

        position = np.average(pts, axis=0)
        # normal = np.average(normals_[clusteringLabels_ == cls], axis=0)
        filteredNormals = filterByStd(normals_[clusteringLabels_ == cls])
        normal = np.average(filteredNormals, axis=0)

        point = GraspingPoint()
        point.header.frame_id = frameId_
        point.label = index_
        point.position.x = position[0]
        point.position.y = position[1]
        point.position.z = position[2]
        point.normal.x = normal[0]
        point.normal.y = normal[1]
        point.normal.z = normal[2]

        points.append(point)

        if debug:
            labels = np.ones(len(points_)) * -1
            labels[clusteringLabels_ == cls] = 1
            utils.plotSelectedCluster(utils.getOutputPath(), points_, labels,
                                      index_, cls, 'set1')

    return points
def synthesizePoints(frameId_, points_, normals_, clusteringLabels_, index_):
	points = []

	classes = set(clusteringLabels_)
	for cls in classes:
		if cls == -1:
			continue

		totalPts = len(points_)
		minPts = 0.1 * totalPts
		pts = points_[clusteringLabels_ == cls]
		if len(pts) < minPts:
			rp.loginfo('.......cluster discarded % 4d / %4d pts', len(pts), totalPts)
			continue

		rp.loginfo('.......cluster size: %4d pts', len(pts))

		position = np.average(pts, axis=0)
		# normal = np.average(normals_[clusteringLabels_ == cls], axis=0)
		filteredNormals = filterByStd(normals_[clusteringLabels_ == cls])
		normal = np.average(filteredNormals, axis=0)

		point = GraspingPoint()
		point.header.frame_id = frameId_
		point.label = index_
		point.position.x = position[0]
		point.position.y = position[1]
		point.position.z = position[2]
		point.normal.x = normal[0]
		point.normal.y = normal[1]
		point.normal.z = normal[2]

		points.append(point)

		if debug:
			labels = np.ones(len(points_)) * -1
			labels[clusteringLabels_ == cls] = 1
			utils.plotSelectedCluster(utils.getOutputPath(), points_, labels, index_, cls, 'set1')

	return points
    def run(self):
        try:
            self.login()
            self.openTradebook()
            for period in self.report:
                month, year = (period.get("month"), period.get("year"))
                self.setTradebookDate(month, year)
                outputFolder = os.path.join(
                    os.getcwd(), 'output', '{}_{}'.format(year, month))
                if (not isDirectoryExists(outputFolder)):
                    createDirectory(outputFolder)
                for k, v in SEGMENTS.items():
                    self.setTradebookSegment(v)
                    sleep(5)

                    isSaved = self.getSnapshot(getOutputPath(
                        outputFolder, '{}.png'.format(k)))
                    print("took snapshot for {} and saved={}".format(k, isSaved))
                createPdf(outputFolder, "tradebook.pdf")

        finally:
            if(self.loggedIn):
                self.logout()
            self.closeDriver()