Python Writer.write_whitelist示例

编程语言: Python

命名空间/包名称: tinasoft.data

类/类型: Writer

方法/功能: write_whitelist

hotexamples.com的示例: 4

Python Writer.write_whitelist - 已找到4个示例。这些是从开源项目中提取的最受好评的tinasoft.data.Writer.write_whitelist现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

write_whitelist(4)

export_from_storage(1)

new_graph(1)

示例#1

显示文件

文件： __init__.py 项目： elishowk/TinasoftPytextminer

    def graph_preprocess(self, dataset):
        """
        Preprocesses the whole graph database
        specially cooccurrences used for further graph generation
        """
        self.logger.debug("starting graph_preprocess")

        storage = self.get_storage(dataset, create=True, drop_tables=False)
        if storage == self.STATUS_ERROR:
            yield self.STATUS_ERROR
            return

        yield self.STATUS_RUNNING
        ngramgraphconfig = self.config['datamining']['NGramGraph']
        datasetObj = storage.loadCorpora(dataset)

        ### cooccurrences calculated for each period
        for corpusid in datasetObj['edges']['Corpus'].keys():
            period = storage.loadCorpus( corpusid )
            if period is None: continue
            ngram_index = set( period.edges['NGram'].keys() )
            doc_index = set( period.edges['Document'].keys() )
            if len(ngram_index) == 0:
                self.logger.warning("period %s has NO NGram indexed, skipping graph_preprocess"%period.id)
                continue
            if len(doc_index) == 0:
                self.logger.warning("period %s has NO Document indexed, skipping graph_preprocess"%period.id)
                continue

            yield self.STATUS_RUNNING
            cooc_writer = self._new_graph_writer(
                dataset,
                [period['id']],
                "preprocess tmp graph",
                storage,
                generate=False,
                preprocess=True
            )

            yield self.STATUS_RUNNING
            ngram_matrix_reducer = matrix.MatrixReducer(ngram_index)
            ngram_graph_preprocess = _dynamic_get_class("tinasoft.pytextminer.graph.subgraph", "NgramGraphPreprocess")
            ngramsubgraph_gen = process_ngram_subgraph(
                self.config,
                dataset,
                [period],
                ngram_index,
                doc_index,
                ngram_matrix_reducer,
                ngramgraphconfig,
                cooc_writer,
                storage,
                ngram_graph_preprocess
            )

            try:
                while 1:
                    yield self.STATUS_RUNNING
                    ngramsubgraph_gen.next()
            except StopIteration:
                self.logger.debug("exporting master whitelist")

                whitelistlabel = "%s_master"%datasetObj['id']
                outpath = self._get_whitelist_filepath(whitelistlabel)
                # this whitelist == dataset
                newwl = whitelist.Whitelist(whitelistlabel, whitelistlabel)
                # dataset's storage == master whitelist's storage
                del newwl.storage
                newwl.storage = storage
                yield self.STATUS_RUNNING
                # exports the dataset's whitelist
                whitelist_exporter = Writer("whitelist://"+outpath)
                yield abspath( whitelist_exporter.write_whitelist(newwl, datasetObj['id'], status="w"))
                return

示例#2

显示文件

文件： __init__.py 项目： moma/TinasoftPytextminer

            )
            extractorGenerator = extract.index()
            
            while 1:
                extractorGenerator.next()
                yield self.STATUS_RUNNING
        except IOError, ioe:
            self.logger.error("%s"%ioe)
            yield self.STATUS_ERROR
            return
        except StopIteration:
            whitelist_exporter = Writer("whitelist://"+outpath)
            try:
                master_user_whitelist = self._import_whitelist(self.config['general']['userwhitelist'])
                self.logger.debug( "user whitelist found, contains %d valid NGrams"%len(master_user_whitelist['edges']['form_label'].keys()) )
                whitelist_exporter.write_whitelist(newwl, corporaObj.id, minoccs=minoccs, status=master_user_whitelist)
            except Exception:
                self.logger.warning("user whitelist not found at %s"%self.config['general']['userwhitelist'])
                whitelist_exporter.write_whitelist(newwl, corporaObj.id, minoccs=minoccs, status="")
            yield abspath(outpath)
            return

    def index_file(self,
            path,
            dataset,
            whitelistpath,
            format='tinacsv'
        ):
        """
        pytextminer's indexation controler : whitelist + source file => session database
        """

示例#3

显示文件

文件： __init__.py 项目： elishowk/TinasoftPytextminer

                filters,
                stemmer.Nltk(),
                tokenizer.NGramTokenizer
            )
            extractorGenerator = extract.index()
            
            while 1:
                extractorGenerator.next()
                yield self.STATUS_RUNNING
        except IOError, ioe:
            self.logger.error("%s"%ioe)
            yield self.STATUS_ERROR
            return
        except StopIteration:
            whitelist_exporter = Writer("whitelist://"+outpath)
            whitelist_exporter.write_whitelist(newwl, corporaObj.id, minoccs=minoccs)
            yield abspath(outpath)
            return

    def index_file(self,
            path,
            dataset,
            whitelistpath,
            format='tinacsv',
            overwrite=False,
        ):
        """
        pytextminer's indexation controler : whitelist + source file => session database
        """
        self._load_config()
        try:

示例#4

显示文件

文件： output.py 项目： moma/easiparse

 def save(self, whitelistobj):
     wlexporter = Writer("whitelist://"+self.config['output']['whitelist']['path'])
     wlexporter.write_whitelist(whitelistobj, None, status="w")