Python Corpus.Word примеры использования

Язык программирования: Python

Класс/Тип: Corpus

Метод/Функция: Word

Примеров на hotexamples.com: 5

Python Corpus.Word - 5 примеров найдено. Это лучшие примеры Python кода для Corpus.Word из пакета differentiable-plasticity, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Corpus(12)

TRECReader(8)

TRECWriter(5)

Word(5)

Sentence(4)

Document(2)

MonoCorpus(2)

Vocabulary(2)

ValCorpus(1)

list_of_documents(1)

sentences(1)

setname(1)

Пример #1

Показать файл

    def __init__(self, loc=MOHX_LOCATION):
        self.instances, self.words = [], []

        c = 0
        for line in open(loc).readlines()[1:]:
            sentence = Corpus.Sentence()
            data = line.split(",")
            sentence.id = str(c)
            c += 1
            word_data = data[3].split()

            for i in range(len(word_data)):
                met = "N"
                if i == int(data[-2]):
                    met = "tag-" + data[-1].strip()
                w = Corpus.Word(text=word_data[i],
                                met=met,
                                sentence=sentence,
                                index=i)
                sentence.words.append(w)
                self.words.append(w)

            self.instances.append(sentence)

        Corpus.add_dependencies(self.instances, MOHX_DEPS, lex_field=1)

Пример #2

Показать файл

    def __init__(self):
        super().__init__()
        self.instances, self.words = [], []
        lemmatizer = WordNetLemmatizer()
        cur_verb, cluster = "", ""

        for line in open(TROFI_LOCATION).readlines():
            if re.match(r"\*\*\*[a-z]", line):
                cur_verb = line.split("***")[1]
                continue
            elif "*" in line or not line.strip():
                if "literal" in line:
                    cluster = "literal"
                elif "nonliteral" in line:
                    cluster = "nonliteral"
                continue

            sentence = Corpus.Sentence()
            data = line.strip().split("\t")
            sentence.id = data[0]

            met = ""
            if "N" in data[1]:
                met = "met"
            if "L" in data[1]:
                met = "N"
            if "U" in data[1]:
                met = "?"

            for i in range(len(data[2].split())):
                word = data[2].split()[i]
                v_lem = lemmatizer.lemmatize(word, "v")
                cur_met = "N"
                if v_lem == cur_verb:
                    cur_met = "tag-" + met
                w = Corpus.Word(text=word,
                                met=cur_met,
                                sentence=sentence,
                                index=i)
                sentence.words.append(w)
                self.words.append(w)

            self.instances.append(sentence)

        Corpus.add_dependencies(self.instances, TROFI_DEPS, lex_field=1)

Пример #3

Показать файл

    def __init__(self, lcc_instance_node):
        super().__init__()
        self.target_cm = [lcc_instance_node.get('targetConcept')]
        annotations_element = lcc_instance_node.find(".//Annotations")

        met_anns = annotations_element.find(".//MetaphoricityAnnotations")
        self.met_score = sum([float(m.get('score'))
                              for m in met_anns]) / len(met_anns)

        cm_source_anns = annotations_element.find(".//CMSourceAnnotations")
        self.source_cm = []
        if cm_source_anns is not None:
            self.source_cm = set([(cm.get("sourceConcept"),
                                   float(cm.get("score")))
                                  for cm in cm_source_anns
                                  if float(cm.get('score')) >= 0])

        self.chain = lcc_instance_node.get('chain')
        self.id = lcc_instance_node.get('id')

        all_text = lcc_instance_node.find(".//TextContent")
        self.current_text = all_text.find(".//Current")
        self.prev_text = all_text.find(".//Prev")
        self.next_text = all_text.find(".//Next")

        self.source_lm = self.current_text.find(".//LmSource").text.strip()
        self.target_lm = self.current_text.find(".//LmTarget").text.strip()

        i = 0
        all_words = []
        for word_group in self.current_text.itertext():
            if word_group.strip() == self.source_lm:
                met = ["source", self.source_cm, self.met_score]
            elif word_group.strip() == self.target_lm:
                met = ["target", self.target_cm, self.met_score]
            else:
                met = ["N", "", ""]

            for w in [
                    w for w in re.findall(r"[\w']+|[.,?!;:\"']", word_group)
                    if w != "="
            ]:
                self.words.append(
                    Corpus.Word(text=w, met=met, index=i, sentence=self))
                i += 1

Пример #4

Показать файл

    def __init__(self, corpus_location):
        self.instances, self.words = [], []
        data = csv.reader(open(corpus_location))
        next(data)
        for line in data:
            sentence = Corpus.Sentence()
            sentence.id = line[1]

            index = int(line[-2])
            tag = int(line[-1])

            sent_data = line[3].split()
            for i in range(len(sent_data)):
                word = sent_data[i]
                met = "N"
                if i == index:
                    met = "met"
                w = Corpus.Word(text=word, sentence=sentence, met=met, index=i)
                sentence.words.append(w)
                self.words.append(w)

            self.instances.append(sentence)

Пример #5

Показать файл

def load_vuamc_csv(filename=VUAMC_CSV):
    with codecs.open(filename, encoding="latin-1", errors='replace') as f:
        data = [line for line in csv.reader(f)]

    sentences = []
    all_words = []

    for sent_index in range(1, len(data[1:])):
        line_data = data[sent_index]
        if not line_data:
            continue
        sentence = Corpus.Sentence()

        sentence.source_file = line_data[0]

        if sentence.source_file in ACADEMIC:
            sentence.domain = "academic"
        elif sentence.source_file in CONVERSATION:
            sentence.domain = "conversation"
        elif sentence.source_file in FICTION:
            sentence.domain = "fiction"
        elif sentence.source_file in NEWS:
            sentence.domain = "news"
        sentence.id = line_data[1]

        words = line_data[2]

        j = 0
        for i in range(0, len(words.split())):
            w_data = words.split()[i].split(";;")
            if "M_" in w_data[-1]:
                met = "met"
                word_text = w_data[-1][2:]
            else:
                met = "N"
                word_text = w_data[-1]

            pos = w_data[0]
            lemma = w_data[1]

            for extra_words in word_text.split("_"):
                if not set(extra_words).intersection(
                        str(string.punctuation + string.ascii_letters +
                            string.digits)):
                    sentence.words.append(
                        Corpus.Word(text="none",
                                    met="none",
                                    pos="none",
                                    lemma="none",
                                    sentence=sentence,
                                    index=j))
                    j += 1
                    continue

                word = Corpus.Word(text=extra_words,
                                   met=met,
                                   pos=pos,
                                   lemma=lemma,
                                   sentence=sentence,
                                   index=j)

                sentence.words.append(word)
                all_words.append(word)
                j += 1
        sentences.append(sentence)

    Corpus.add_dependencies(sentences, VUAMC_DEPS)
    Corpus.add_vn_parse(sentences, VUAMC_VN)
    Corpus.add_allen_parse(sentences, VUAMC_ALLEN)
    #Corpus.populate_vn_from_heads(sentences)

    return sentences, all_words