Python Default примеры использования

Язык программирования: Python

Пространство имен/Пакет: texts.extraction.default

Класс/Тип: Default

Примеров на hotexamples.com: 8

Python Default - 8 примеров найдено. Это лучшие примеры Python кода для texts.extraction.default.Default, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

create_default_stemmer(3)

create_default_frame_variants_collection(2)

create_default_frames_collection(2)

create_default_synonyms_collection(2)

create_ner_extractor(2)

get_class_by_ner_name(2)

Пример #1

Показать файл

Файл: settings.py Проект: nicolay-r/frame-based-attitude-extraction-workflow

    def __init__(self,
                 synonyms_collection_filepath,
                 frames_collection_filepath,
                 init_ner=True,
                 init_stemmer=True,
                 init_frames=True,
                 use_ner_cache_only=False,
                 ner_name=supported.ONTONOTES_BERT_MULT_NAME):
        assert (isinstance(init_ner, bool))
        assert (isinstance(init_frames, bool))
        assert (isinstance(init_stemmer, bool))
        assert (isinstance(ner_name, str))

        self.__auth_objects = None
        self.__use_ner_cache_only = use_ner_cache_only
        self.__synonyms = None
        self.__stemmer = None
        self.__frame_variants = None
        self.__frames = None
        self.__pos_tagger = None
        self.__syntax = None
        self.__use_auth_list = False
        self.__frames_cache = None

        # NER
        self.__ner_cache = None
        self.__ner_class_type = Default.get_class_by_ner_name(ner_name)
        self.__ner = None

        if init_stemmer:
            self.__stemmer = Default.create_default_stemmer()

        if self.__stemmer is not None:
            self.__pos_tagger = POSMystemWrapper(self.__stemmer.MystemInstance)

        if init_frames:
            self.__frames = Default.create_default_frames_collection(
                frames_collection_filepath)

        if self.__stemmer is not None and self.__frames is not None:
            self.__frame_variants = Default.create_default_frame_variants_collection(
                frames=self.__frames, stemmer=self.__stemmer)

        if self.__frame_variants is not None:
            self.__frames_helper = FramesHelper(self.__frame_variants)

        if init_ner and not use_ner_cache_only:
            self.__ner = self.__ner_class_type()

        self.__synonyms = Default.create_default_synonyms_collection(
            filepath=synonyms_collection_filepath,
            stemmer=None
            if self.DISABLE_LEMMA_FOR_SYNONYMS else self.__stemmer)

        self.__auth_objects = AuthorizedObjectsCollection(OrderedDict())

Пример #2

Показать файл

    def __init__(self,
                 settings,
                 contexts_printer,
                 opinion_statistic_printer,
                 object_statistic_printer,
                 parse_frames_in_news_sentences):
        assert(isinstance(settings, Settings))
        assert(isinstance(contexts_printer, ContextsPrinter))
        assert(isinstance(parse_frames_in_news_sentences, bool))
        assert(isinstance(opinion_statistic_printer, OpinionStatisticBasePrinter))
        assert(isinstance(object_statistic_printer, StatisticObjectsPrinter) or object_statistic_printer is None)
        self.__settings = settings
        self.__context_printer = contexts_printer
        self.__opinion_statistic_printer = opinion_statistic_printer
        self.__object_statistic_printer = object_statistic_printer
        self.__parse_frames_in_news_sentences = parse_frames_in_news_sentences
        self.__check_obj_preposition_in_title = True

        self.__text_object_authorizer = TextObjectAuthorizer(ner_type=settings.NERClassType)

        self.__ner_extractor = Default.create_ner_extractor(
            ner=settings.NER,
            ner_cache=settings.NerCache,
            default_auth_check=lambda text_obj: self.__text_object_authorizer.is_auth(text_obj))

        self.__debug_opinions_created = 0
        self.__debug_opinions_with_missed_synonyms = 0
        self.__debug_opinions_looped = 0
        self.__debug_opinions_total_extracted_from_titles = 0
        self.__debug_opinions_rejected_by_preps = 0
        self.__debug_opinions_title_synonymous_existed = 0

Пример #3

Показать файл

    def __init__(self, ner_cache, stemmer, default_auth_check):
        assert(isinstance(stemmer, Stemmer))

        self.__stemmer = stemmer
        self.__ner_extractor = Default.create_ner_extractor(
            ner=None,
            ner_cache=ner_cache,
            default_auth_check=default_auth_check)

Пример #4

Показать файл

Файл: ruattitudes_neutral.py Проект: nicolay-r/frame-based-attitude-extraction-workflow

    def expand_with_neutral(self, from_zip_filepath, cache_dir, log_filepath,
                            used_locations_filepath, neut_opin_stat_filepath):
        assert (isinstance(from_zip_filepath, str))
        assert (isinstance(cache_dir, str))
        assert (isinstance(log_filepath, str))
        assert (isinstance(used_locations_filepath, str))
        assert (isinstance(neut_opin_stat_filepath, str))

        # Clearing folder and filling with fresh data.
        os.system('rm -rf {cache_dir}'.format(cache_dir=cache_dir))

        # Extract everything into cache_dir.
        with zipfile.ZipFile(from_zip_filepath, 'r') as zip_input:
            zip_input.extractall(cache_dir)

        # Reading synonyms collection.
        print("Reading synonyms collection ...")
        stemmer = Default.create_default_stemmer()
        synonyms_filepath = join(cache_dir, "synonyms.txt")
        synonyms = SynonymsCollection.from_file(synonyms_filepath,
                                                stemmer=stemmer)

        # Initialize all the related from synonyms collection information.
        self.__init_from_synonyms(synonyms)

        # Start neutral opinion annotation process.
        print("Run processing ...")
        source_filepath = join(cache_dir, "collection.txt")
        target_filepath = join(cache_dir, "collection-neut.txt")
        with open(source_filepath, 'r') as f_src:
            with open(target_filepath, 'w') as f_to:
                neut_logger = self.__process(f_src=f_src, f_to=f_to)

        # Replacing old file in cache dir with a new one.
        os.system('mv {} {}'.format(target_filepath, source_filepath))

        # Saving everything in a new archive file.
        target_zip_filepath = join(dirname(from_zip_filepath),
                                   get_target_filename(from_zip_filepath))

        if neut_logger is not None:
            with open(log_filepath, 'w') as f:
                for line in neut_logger.iter_data():
                    f.write(line)

        with open(used_locations_filepath, 'w') as f:
            for key, value in sorted(self.__used_locations.items(),
                                     key=lambda pair: pair[1]):
                f.write("'{entry}': {count}\n".format(entry=key, count=value))

        self.__opin_stat_printer.print_statistic(neut_opin_stat_filepath)

        archive_all_files_in_zip(to_zip_filepath=target_zip_filepath,
                                 source_dir=cache_dir)

Пример #5

Показать файл

Файл: run_0_init.py Проект: nicolay-r/frame-based-attitude-extraction-workflow

    NewsSourceDirArg.add_argument(parser)
    SourceNewsReaderArg.add_argument(parser)
    NewsStartFromIndexArg.add_argument(parser)
    ParseFramesInSentencesArgs.add_argument(parser)
    RuSentiFramesCacheArgs.add_argument(parser)

    # Parsing arguments.
    args = parser.parse_args()

    # Reading arguments.
    src_dir = NewsSourceDirArg.read_argument(args)
    reader = SourceNewsReaderArg.read_argument(args)
    frames_filepath = RuSentiFramesCacheArgs.read_argument(args)
    parse_frames_in_sents = ParseFramesInSentencesArgs.read_argument(args)
    start_from_index = NewsStartFromIndexArg.read_argument(args)

    stemmer = Default.create_default_stemmer()
    frames = Default.create_default_frames_collection(frames_filepath)
    f_var = Default.create_default_frame_variants_collection(frames=frames,
                                                             stemmer=stemmer)

    run_frames_cache(reader=reader,
                     src_dir=src_dir,
                     version=basename(frames_filepath),
                     frames=frames,
                     frames_helper=FramesHelper(f_var),
                     stemmer=stemmer,
                     parse_frames_in_sentences=parse_frames_in_sents,
                     start_from_index=start_from_index,
                     miniter_count=2000000)

Пример #6

Показать файл

Файл: syn_0_extract_obj_values.py Проект: nicolay-r/frame-based-attitude-extraction-workflow

    ner_type = NerTypeArg.read_argument(args)
    ner_cache_filepath = NerCacheFilepathArg.read_argument(args)
    output_dir = OutputDirArg.read_argument(args)
    source_dir = NewsSourceDirArg.read_argument(args)
    reader = SourceNewsReaderArg.read_argument(args)

    # Initializing ner cache.
    ner_cache = SQLiteNERCacheData.init_as_read_only(ner_cache_filepath)

    # Exporting results.
    news_processed = 0
    added_words = set()
    f_name = "{}.txt".format(ner_type)

    # Init obj values extractor.
    ner_class_type = Default.get_class_by_ner_name(ner_type)
    text_object_authorizer = TextObjectAuthorizer(ner_type=ner_class_type)
    obj_values_extractor = TextObjectValuesExtractor(
        ner_cache=ner_cache,
        stemmer=Default.create_default_stemmer(),
        default_auth_check=lambda text_obj: text_object_authorizer.is_auth(
            text_obj))

    create_dir(output_dir)
    print("Output dir: {}".format(output_dir))

    with ner_cache:
        with open(join(output_dir, f_name), "w") as f:
            for _, news_info in reader.get_news_iter(source_dir):
                assert (isinstance(news_info, NewsInfo))

Пример #7

Показать файл

                        nargs='?',
                        help='Source directory')

    # Added parameters.
    SynonymsCollectionFilepathArg.add_argument(parser)

    # Parsing arguments.
    args = parser.parse_args()

    # Readed parameters.
    opinion_filepath = args.opinion_filepath
    source_filepath = args.source_filepath
    synonyms_filepath = SynonymsCollectionFilepathArg.read_argument(args)
    opinion_filename = basename(opinion_filepath)

    stemmer = Default.create_default_stemmer()
    synonyms = Default.create_default_synonyms_collection(
        filepath=synonyms_filepath, stemmer=stemmer)

    with open(opinion_filepath, 'r') as f:

        opinions = read_opinions(
            filepath=opinion_filepath,
            synonyms=synonyms,
            custom_opin_ends_iter=lambda use_sentiment:
            OpinionStatisticBasePrinter.iter_opinion_end_values(
                f=f, read_sentiment=use_sentiment),
            read_sentiment=False)

    file_ids_it = iter_relevant_file_ids(source_filepath=source_filepath,
                                         opinions=opinions)

Пример #8

Показать файл

Файл: syn_1_compose_collection.py Проект: nicolay-r/frame-based-attitude-extraction-workflow

    parser.add_argument('--obj-values-dir',
                        dest='obj_values_dir',
                        type=str,
                        nargs=1,
                        help='Source dir')

    # Parse arguments.
    OptionalOutputDirArg.add_argument(parser)

    # Reading arguments.
    args = parser.parse_args()
    source_dir = args.obj_values_dir[0]
    output_dir = OptionalOutputDirArg.read_argument(args)

    # Initialize necessary instances for words grouping.
    stemmer = Default.create_default_stemmer()
    ruthes_nouns = RussianThesaurusSynsets.from_xml_file(filepath=args.ruthes_filepath[0])

    log_found_in_ruthes = 0
    log_lemmas_kept = 0
    syn_groups = {}

    # Processsing all the files in subdir.
    f_names_it = get_all_subfiles(data_folder=source_dir,
                                  f_name_check_rule=lambda _: True)
    for filename in f_names_it:
        print(filename)
        for obj_value, obj_type in iter_words_with_types_from_filepath(filename):

            if obj_value in ruthes_nouns:
                log_found_in_ruthes += 1