Exemplos de French em Python, exemplos de spacy.lang.fr.French em Python

Exemplo n.º 1

0

Exibir arquivo

def get_french_distances(dataset_fn):
    pipeline = French()
    sentencizer = pipeline.create_pipe('sentencizer')
    pipeline.add_pipe(sentencizer)

    questions_list, sentences_list, spans_list = compute_question_sentence(
        dataset_fn, pipeline)

    nlp_fr = spacy.load('fr_core_news_sm')

    all_distances = []
    error = 0
    error_anchor = 0
    no_pronoums = 0
    all_lexical_variation = []
    for i, question in enumerate(questions_list):
        try:
            print(questions_list[i], sentences_list[i], spans_list[i])
            distance, lexical_variation = get_anchor(questions_list[i],
                                                     sentences_list[i], nlp_fr,
                                                     spans_list[i])
            if distance is not None:
                if distance == -1:
                    error_anchor += 1
                elif distance == -2:
                    no_pronoums += 1
                else:
                    all_distances.append(distance)
                    all_lexical_variation.append(lexical_variation)
        except:
            error += 1
            continue
    print(error, error_anchor, no_pronoums)
    return all_distances, all_lexical_variation

Exemplo n.º 2

0

Exibir arquivo

Arquivo: tokenizer.py Projeto: strayMat/tag_serve

 def __init__(self, language='en'):
     self.exclude = EXCLUDE
     self.language = language
     if language == 'fr':
         nlp = French()
     else:
         nlp = English()
     #nlp.add_pipe(nlp.create_pipe('sentencizer'))
     sbd = SentenceSegmenter(nlp.vocab, strategy=split_sents)
     nlp.add_pipe(sbd)
     self.nlp = nlp

Exemplo n.º 3

0

Exibir arquivo

Arquivo: process.py Projeto: bilal-rachik/Expense-Management_Tesseract_Spacy

def clean_text(txt):
    nlp = French()
    listcode = [x + 45 for x in range(99)]
    postalcod = lambda dd, liscode: str(int(dd) * 1000
                                        ) if dd in liscode else dd
    customize_remove_PUNCT = ['%']
    for w in customize_remove_PUNCT:
        nlp.vocab[w].is_punct = False
    customize_add_PUNCT = [
        '>', '=', '$', '™', 'eee', 'ee', 'e', "EE", "EEE", "E", ":"
    ]
    for w in customize_add_PUNCT:
        nlp.vocab[w].is_punct = True
    reg = '(?<=[0-9])[+\\-\\*^](?=[0-9-])'
    list_infixes_defaults = list(nlp.Defaults.infixes)
    if reg in list_infixes_defaults:
        list_infixes_defaults.remove(reg)
    # modify process_text infix patterns(dd-dd-dd)
    infixes = (list_infixes_defaults + [r"(?<=[0-9])[\+\*^](?=[0-9-])"])
    infix_re = compile_infix_regex(infixes)
    nlp.tokenizer.infix_finditer = infix_re.finditer
    doc = nlp(txt)
    tokens = [
        postalcod(w.text.lower(), listcode) for w in doc
        if w.text != 'n' and not w.is_punct and not w.is_space
        and not (w.like_num and len(w.text) > 5) and not len(w.text) > 11
        and not w.is_quote
    ]
    listToStr = ' '.join(map(str, tokens))

    return listToStr

Exemplo n.º 4

0

Exibir arquivo

Arquivo: text.py Projeto: openfoodfacts/robotoff

def get_nlp(lang: str):
    if lang == "fr":
        return French()
    elif lang == "en":
        return English()
    else:
        raise ValueError("unknown lang: {}".format(lang))

Exemplo n.º 5

0

Exibir arquivo

def read_mtl_file(domain, filename):
    X = []
    Y = []
    if domain == 'en':
        # tokenizer = WordPunctTokenizer()
        tokenizer = English().Defaults.create_tokenizer()
    elif domain == 'fr':
        # tokenizer = nltk.data.load('tokenizers/punkt/french.pickle')
        tokenizer = French().Defaults.create_tokenizer()
    elif domain == 'de':
        # tokenizer = nltk.data.load('tokenizers/punkt/german.pickle')
        tokenizer = German().Defaults.create_tokenizer()
    with open(filename, 'r', encoding='utf-8') as inf:
        for line in inf.readlines():
            parts = line.split('\t')
            if len(parts) == 3:  # labeled
                Y.append(int(float(parts[1])))
            elif len(parts) == 2:  # unlabeled
                Y.append(0)
            else:
                raise Exception('Unknown format')
            clean = clean_sentence(parts[-1])
            # if domain is 'en':
            #     words = word_tokenize(clean, language='english')
            # elif domain is 'fr':
            #     words = word_tokenize(clean, language='french')
            # elif domain is 'de':
            #     words = word_tokenize(clean, language='german')
            words = [str(e) for e in tokenizer(clean)]
            tmp = {}
            tmp['tokens'] = words
            tmp['sent'] = clean
            X.append(tmp)
    #Y = torch.LongTensor(Y).to(opt.device)
    return (X, Y)

Exemplo n.º 6

0

Exibir arquivo

    def init_resources(self):
        self.punctuation_pattern = re.compile("|".join(PUNCTUATION))
        self.stemmer = None
        stopwords_path = os.path.join(
            os.path.dirname(assistant_dialog_skill_analysis.__file__),
            "resources",
            self.language_code,
            "stopwords",
        )
        if self.language_code == "en":
            from spacy.lang.en import English

            self.tokenizer = Tokenizer(English().vocab)
            self.stemmer = SnowballStemmer(language="english")
            self.stop_words = self.load_stop_words(stopwords_path)

        elif self.language_code == "fr":
            from spacy.lang.fr import French

            self.tokenizer = Tokenizer(French().vocab)
            self.stemmer = SnowballStemmer(language="french")
            self.stop_words = self.load_stop_words(stopwords_path)

        elif self.language_code == "de":
            from spacy.lang.de import German

            self.tokenizer = Tokenizer(German().vocab)
            self.stemmer = SnowballStemmer(language="german")
            self.stop_words = self.load_stop_words(stopwords_path)

        elif self.language_code == "it":
            from spacy.lang.it import Italian

            self.tokenizer = Tokenizer(Italian().vocab)
            self.stemmer = SnowballStemmer(language="italian")
            self.stop_words = self.load_stop_words(stopwords_path)

        elif self.language_code == "cs":
            from spacy.lang.cs import Czech

            self.tokenizer = Tokenizer(Czech().vocab)
            self.stop_words = self.load_stop_words(stopwords_path)

        elif self.language_code == "pt":
            from spacy.lang.pt import Portuguese

            self.tokenizer = Tokenizer(Portuguese().vocab)
            self.stemmer = SnowballStemmer(language="portuguese")
            self.stop_words = self.load_stop_words(stopwords_path)

        elif self.language_code == "es":
            from spacy.lang.es import Spanish

            self.tokenizer = Tokenizer(Spanish().vocab)
            self.stemmer = SnowballStemmer(language="spanish")
            self.stop_words = self.load_stop_words(stopwords_path)
        else:
            raise Exception("language code %s is not supported",
                            self.language_code)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ImportationFichier.py Projeto: badryach/Verb-Statistics

def RecupererTextTokenSansPonctuation(fichier):
    #Import Package French langages
    Langue = French()
    f = fichier
    tokenizer = RegexpTokenizer(r'\w+')
    doc = Langue(f.read())
    filtered_sent = []
    for word in doc:
        if word.text:
            filtered_sent.append(word)
# delete all and save just text
    return str(tokenizer.tokenize(str(filtered_sent)))

Exemplo n.º 8

0

Exibir arquivo

def define_spacy_tokenizer(language):
    # Construction 1
    from spacy.tokenizer import Tokenizer
    if (language == 'french'):
        from spacy.lang.fr import French
        nlp = French()
    if (language == 'english'):
        from spacy.lang.en import English
        nlp = English()
    # Create a blank Tokenizer with just the language vocab
    tokenizer = Tokenizer(nlp.vocab)

    return tokenizer

Exemplo n.º 9

0

Exibir arquivo

Arquivo: sources.py Projeto: titouansola/be_ikigai

def initGlobal():
    global parser
    global fr_stop

    print("INITIALIZATION")
    print("Check downloads for nltk libs...")
    nltk.download('wordnet')
    nltk.download('stopwords')

    print("Parse into French")
    parser = French()
    fr_stop = set(nltk.corpus.stopwords.words('french'))

    print("DONE")

Exemplo n.º 10

0

Exibir arquivo

def tokenize(document, language, punctutation):
    if language == 'fr':
        nlp = French()
    if language == 'de':
        nlp = German()
    if language == 'en':
        nlp = French()
    if language == 'es':
        nlp = Spanish()
    sentencizer = nlp.create_pipe("sentencizer")
    nlp.add_pipe(sentencizer)
    doc = nlp(document)
    if punctutation:
        sentences = [[str(word) for word in sent if str(word) != '\n']
                     for sent in doc.sents]
    else:
        sentences = [[
            str(word) for word in sent
            if ((str(word) != '\n') and (str(word).isalpha()))
        ] for sent in doc.sents]
    return sentences

Exemplo n.º 11

0

Exibir arquivo

 def get_tokenizers(self, lang):
     os.environ['TOKENIZERS_PARALLELISM'] = "True"
     if lang == 'de':
         spacy = German()
         bert = "deepset/gbert-base"
     elif lang == 'fr':
         spacy = French()
         bert = "camembert/camembert-base-ccnet"
     elif lang == 'it':
         spacy = Italian()
         bert = "dbmdz/bert-base-italian-cased"
     else:
         raise ValueError(
             f"Please choose one of the following languages: {self.languages}"
         )
     return spacy.tokenizer, AutoTokenizer.from_pretrained(bert)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: functions.py Projeto: JhansiA/Data-Science-Portfolio

def lang_change(language):
    if language == 'en':
        from spacy.lang.en import English
        from spacy.lang.en.stop_words import STOP_WORDS
        parser = English()
        file = "\config_files\config_spacy_en.yaml"
        configfile_path = os.getcwd() + file
    elif language == 'de':
        from spacy.lang.de import German
        from spacy.lang.de.stop_words import STOP_WORDS
        parser = German()
        file = "\config_files\config_spacy_de.yaml"
        configfile_path = os.getcwd() + file
    elif language == 'es':
        from spacy.lang.es import Spanish
        from spacy.lang.es.stop_words import STOP_WORDS
        parser = Spanish()
        file = "\config_files\config_spacy_es.yaml"
        configfile_path = os.getcwd() + file
    elif language == 'pt':
        from spacy.lang.pt import Portuguese
        from spacy.lang.pt.stop_words import STOP_WORDS
        parser = Portuguese()
        file = "\config_files\config_spacy_pt.yaml"
        configfile_path = os.getcwd() + file
    elif language == 'fr':
        from spacy.lang.fr import French
        from spacy.lang.fr.stop_words import STOP_WORDS
        parser = French()
        file = "\config_files\config_spacy_fr.yaml"
        configfile_path = os.getcwd() + file
    elif language == 'it':
        from spacy.lang.it import Italian
        from spacy.lang.it.stop_words import STOP_WORDS
        parser = Italian()
        file = "\config_files\config_spacy_it.yaml"
        configfile_path = os.getcwd() + file
    elif language == 'nl':
        from spacy.lang.nl import Dutch
        from spacy.lang.nl.stop_words import STOP_WORDS
        parser = Dutch()
        file = "\config_files\config_spacy_nl.yaml"
        configfile_path = os.getcwd() + file

    return parser, STOP_WORDS, configfile_path

Exemplo n.º 13

0

Exibir arquivo

Arquivo: tokenizer.py Projeto: RobinSrimal/Tokenizer

    def get_nlp(self, language):

        """"
        this method returns the corresponding spacy language model when 
        provided with a language. To do so it also does the required 
        import. This is certainly not the standard approach. 
        But as this endpoint will be deployed to Heroku (space limitation)
        and only be invoked rarely it is the fastest approach.
        """

        if language == "en":

            from spacy.lang.en import English
            return English()

        elif language == "fr":

            from spacy.lang.fr import French
            return French()

        elif language == "de":

            from spacy.lang.de import German
            return German()

        elif language == "es":

            from spacy.lang.es import Spanish
            return Spanish()

        elif language == "pt":

            from spacy.lang.pt import Portuguese
            return Portuguese()

        else:

            return {"error": "invalid or not supported language entered"}

Exemplo n.º 14

0

Exibir arquivo

    def tokenize(self, dataset, language):
        """
        Articles will be processed in parallel
        """
        articles_iter = chunk(dataset, size=self.chunks)
        length = int(len(dataset) / self.chunks)
        if language == 'english':
            nlp_iter = repeat(English())
        else:
            nlp_iter = repeat(French())

        tokenized_questions = []
        with ProcessPoolExecutor() as executor:
            chunksize = int(max(length / (self.processes * self.parallelism), 1))
            i = 0
            for result in executor.map(_tokenize_questions, articles_iter,
                                        nlp_iter, chunksize=chunksize):
                for article in result:
                    tokenized_questions.append(article)
                    i += 1
                    if i % 10000 == 0:
                        print('Processed {} articles'.format(i))
        return tokenized_questions

Exemplo n.º 15

0

Exibir arquivo

def preprocess_file(file_path):
    json_data = []
    with open(file_path, encoding="utf8") as json_file:
        json_data = json.load(json_file)

    # Filters the question to only take into account the ones that have answers
    response_data = []
    for contrib in json_data:
        for response in contrib["responses"]:
            # Si on a une reponse non vide
            if response["value"] and response["formattedValue"]:
                # Flattens the responses and add it to the response data
                response_obj = dict(contrib)
                del response_obj["responses"]
                response_obj.update(response)
                response_data.append(response_obj)
    df_response_data = pd.DataFrame.from_records(response_data)

    df_response_data.to_json(
        os.path.join(data_dir, "response_" + os.path.basename(file_path)))

    # Loads the french model of spacy and adds some new stop words (could be extended)
    nlp = fr_core_news_md.load()
    tokenizer = French().Defaults.create_tokenizer(nlp)
    additional_stopwords = ["de", "le", "que", "ce", "l"]
    for stopword in additional_stopwords:
        nlp.Defaults.stop_words.add(stopword)

    # Creates a new column in the dataframe that contains each token lemma.
    # Punctuations, spaces and stopwords are removed
    df_response_data["lemmatizedValue"] = df_response_data["formattedValue"].\
        apply(lambda t: [token.lemma_ for token in tokenizer(t.lower()) if not token.is_stop and not token.is_punct and
                         not token.is_space])

    df_response_data.to_json(
        os.path.join(data_dir,
                     "response_lemmatized_" + os.path.basename(file_path)))

Exemplo n.º 16

0

Exibir arquivo

 def __init__(self):
     self.nlp = French()

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_fr.py Projeto: cs394-s20/Aqua

def fr_nlp():
    return French()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: solution_03_12.py Projeto: datalayer-externals/edu-spacy

import json
from spacy.lang.fr import French
from spacy.tokens import Span
from spacy.matcher import PhraseMatcher

with open("exercises/fr/countries.json", encoding="utf8") as f:
    COUNTRIES = json.loads(f.read())

with open("exercises/fr/capitals.json", encoding="utf8") as f:
    CAPITALS = json.loads(f.read())

nlp = French()
matcher = PhraseMatcher(nlp.vocab)
matcher.add("COUNTRY", None, *list(nlp.pipe(COUNTRIES)))


def countries_component(doc):
    # Crée une entité Span avec le label "GPE" pour toutes les correspondances
    matches = matcher(doc)
    doc.ents = [
        Span(doc, start, end, label="GPE") for match_id, start, end in matches
    ]
    return doc


# Ajoute le composant au pipeline
nlp.add_pipe(countries_component)
print(nlp.pipe_names)

# Getter qui recherche le texte du span dans le dictionnaire
# des capitales des pays

Exemplo n.º 19

0

Exibir arquivo

import json
from spacy.matcher import Matcher
from spacy.lang.fr import French

with open("exercises/fr/iphone.json", encoding="utf8") as f:
    TEXTS = json.loads(f.read())

nlp = French()
matcher = Matcher(nlp.vocab)

# Deux tokens dont les formes majuscules correspondent à "iphone" et "x"
pattern1 = [{"LOWER": "iphone"}, {"LOWER": "x"}]

# Tokens dont les formes majuscules correspondent à "iphone" et un nombre
pattern2 = [{"LOWER": "iphone"}, {"IS_DIGIT": True}]

# Ajoute les motifs au matcher et vérifie le résultat
matcher.add("GADGET", None, pattern1, pattern2)
for doc in nlp.pipe(TEXTS):
    print([doc[start:end] for match_id, start, end in matcher(doc)])

Exemplo n.º 20

0

Exibir arquivo

Arquivo: morph.py Projeto: mjj203/taxi-1

                    get_stop_words("en") + STOP_LIST +
                    stopwords.words('english'))
        elif lang == "nl":
            return set(
                get_stop_words("nl") + stopwords.words('dutch') + STOP_LIST_NL)
    except:
        print("warning: no stopwords were downloaded. check nltk corpora")
        print(format_exc())
        return set()


# load resources
_stop_words = load_stoplist()
print("Loading spacy model...")
_spacy = English()
_spacy_fr = French()
_spacy_nl = Dutch()
_spacy_it = Italian()


def get_stoplist():
    return _stop_words


def lemmatize(text, lowercase=True, lang="en"):
    """ Return lemmatized text """

    if lang == "en":
        tokens = _spacy(text)
    elif lang == "fr":
        tokens = _spacy_fr(text)

Exemplo n.º 21

0

Exibir arquivo

"""
import re
from dataclasses import dataclass, field
from typing import Iterable, List, Set, Tuple, Dict

from spacy.lang.fr import French

SPLITTER_CHAR = {"(", ")", ",", ";", "[", "]", "-", "{", "}"}

# Food additives (EXXX) may be mistaken from one another, because of their edit distance proximity
ADDITIVES_REGEX = re.compile("(?:E ?\d{3,5}[a-z]*)", re.IGNORECASE)

OffsetType = Tuple[int, int]

FR_NLP = French()


class TokenLengthMismatchException(Exception):
    pass


def normalize_ingredients(ingredients: str) -> str:
    normalized = ingredients.lower()
    normalized = normalized.replace("œu", "oeu")
    normalized = normalized.replace("’", "'")
    return normalized


def normalize_item_ingredients(item: Dict) -> Dict:
    item = item.copy()

Exemplo n.º 22

0

Exibir arquivo

# Spacy
from spacy.lang.en import English
from spacy.lang.es import Spanish
from spacy.lang.fr import French
from spacy.lang.zh import Chinese
from spacy.lang.ru import Russian
from spacy.lang.ar import Arabic
from spacy.lang.de import German
from spacy.lang.uk import Ukrainian
from spacy.lang.ro import Romanian

lang_id_to_spacy = {
    'en': English(),
    'es': Spanish(),
    'fr': French(),
    'zh-cn': Chinese(),
    'ru': Russian(),
    'ar': Arabic(),
    'de': German(),
    'uk': Ukrainian(),
    'ro': Romanian()
}

#####################
### Globals
#####################

reddit = Reddit(client_id='OFsSWAsbFrzLpg',
                client_secret='tRReu7VAAyxgEXbGqaE19_OUrR4',
                password='******',

Exemplo n.º 23

0

Exibir arquivo

import torch.nn.functional as F
import torch.optim as optim

import spacy
from spacy.lang.fr import French

# %%
# python -m spacy download fr_core_news_sm
spacy_fr = spacy.load("fr_core_news_sm")

# %% [markdown]
# ## Tokenizing the corpus

# %%
# Create a tokenizer for the french language
tokenizer = French().Defaults.create_tokenizer()

with open("data/20_000_lieues_sous_les_mers.txt", "r", encoding="utf-8") as f:
    document = tokenizer(f.read())

# Define a filtered set of tokens by iterating on `document`
tokens = ...

# Make a list of unique tokens and dictionary that maps tokens to
# their index in that list.
idx2tok = []
tok2idx = {}
...

# %% [markdown]
# ## The continuous bag of words model

Exemplo n.º 24

0

Exibir arquivo

Arquivo: Tokenizer.py Projeto: gkaramanolakis/CLTS

from spacy.lang.de import German
from spacy.lang.ru import Russian
from spacy.lang.zh import Chinese
from spacy.lang.ja import Japanese
from spacy.lang.ca import Catalan
from spacy.lang.eu import Basque

from DataHandler import load_df_twitter_sent, load_df_lorelei
from util import clean_str as test_clean_str
from nltk.corpus import stopwords
from util import identity_fn, lang2id

language_dict = {
    'english': English(),
    'spanish': Spanish(),
    'french': French(),
    'italian': Italian(),
    'german': German(),
    'russian': Russian(),
    'chinese': Chinese(),
    'japanese': Japanese(),
    'catalan': Catalan(),
    'basque': Basque(),
}


class Tokenizer:
    def __init__(self,
                 language,
                 tokenizer_method='spacy',
                 remove_stopwords=True,

Exemplo n.º 25

0

Exibir arquivo

 def __init__(self, lang=English):
     if lang == "fr":
         self.nlp = French()
     else:
         self.nlp = English()
     self.nlp.add_pipe(self.nlp.create_pipe('sentencizer'))

Exemplo n.º 26

0

Exibir arquivo

Arquivo: classes_func.py Projeto: anaismonet/Chatbot

def split_into_lemmas_spacy(desc):
    nlp = French()
    doc = nlp(desc)
    return [w.lemma_ for w in doc]

Exemplo n.º 27

0

Exibir arquivo

from spacy.lang.fr import French

nlp = French()

# Importe la classe Doc
from ____ import ____

# Texte désiré : "spaCy est cool."
words = ["spaCy", "est", "cool", "."]
spaces = [True, True, False, False]

# Crée un Doc à partir des mots et des espaces
doc = ____(____, words=words, spaces=spaces)
print(doc.text)

Exemplo n.º 28

0

Exibir arquivo

import json
from spacy.lang.fr import French

with open("exercises/fr/countries.json", encoding="utf8") as f:
    COUNTRIES = json.loads(f.read())

nlp = French()
doc = nlp("La Tchéquie pourrait aider la Slovaquie à protéger son espace aérien")

# Importe le PhraseMatcher et initialise-le
from spacy.____ import ____

matcher = ____(____)

# Crée des motifs objets Doc et ajoute-les au matcher
# C'est la version rapide de : [nlp(country) for country in COUNTRIES]
patterns = list(nlp.pipe(COUNTRIES))
matcher.add("COUNTRY", None, *patterns)

# Appelle le matcher sur le document de test et affiche le résultat
matches = ____(____)
print([doc[start:end] for match_id, start, end in matches])

Exemplo n.º 29

0

Exibir arquivo

Arquivo: vec_search.py Projeto: taniki/florian_opendatascience

import numpy as np
from scipy import spatial
import sys
import unidecode
#from sklearn.decomposition import PCA
#QUERY  Neighbours Ids_and_Score_bool
directory = '../'
argv = sys.argv
nlp = spacy.load("fr_core_news_lg")
pca = pickle.load(open(directory + 'models/pca_30.pkl', 'rb'))
pca_space = np.load(directory + 'models/vectors_pca_30.npy', allow_pickle=True)
id_table = list(np.load(directory + '../data/id_table.npy', allow_pickle=True))
tree = spatial.KDTree(pca_space)
from spacy.lang.fr.stop_words import STOP_WORDS
from spacy.lang.fr import French
parser = French()
stopwords = list(STOP_WORDS)


def process_query(search_query):
    query = str(search_query).lower()
    clean_query = unidecode.unidecode(query)
    tokens = parser(clean_query)
    tokens = [word.lower_ for word in tokens]
    tokens = [word for word in tokens if word not in stopwords]
    tokens = " ".join([i for i in tokens])
    return (tokens)


def query2vec(search_query):
    x = nlp(search_query).vector  #spacy 300d

Exemplo n.º 30

0

Exibir arquivo

Arquivo: solution_03_15.py Projeto: datalayer-externals/edu-spacy

import json
from spacy.lang.fr import French
from spacy.tokens import Doc

with open("exercises/fr/bookquotes.json", encoding="utf8") as f:
    DATA = json.loads(f.read())

nlp = French()

# Déclare l'extension de Doc "author" (défaut None)
Doc.set_extension("author", default=None)

# Déclare l'extension de Doc "book" (default None)
Doc.set_extension("book", default=None)

for doc, context in nlp.pipe(DATA, as_tuples=True):
    # Définis les attributs doc._.book et doc._.author à partir du contexte
    doc._.book = context["book"]
    doc._.author = context["author"]

    # Affiche le texte et les données des attributs personnalisés
    print(f"{doc.text}\n — '{doc._.book}' par {doc._.author}\n")