Exemplo n.º 1
0
 def __init__(self,
              stemming_language=None,
              remove_stopwords=False,
              remove_html_markup=False,
              remove_latex_markup=False):
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                       remove_stopwords, remove_html_markup,
                                       remove_latex_markup)
Exemplo n.º 2
0
 def __init__(self, stemming_language = None, remove_stopwords = False, remove_html_markup = False, remove_latex_markup = False):
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                             remove_stopwords,
                                             remove_html_markup,
                                             remove_latex_markup)
     self.single_initial_re = re.compile('^\w\.$')
     self.split_on_re = re.compile('[\.\s-]')
     # lastname_stopwords describes terms which should not be used for indexing,
     # in multiple-word last names.  These are purely conjunctions, serving the
     # same function as the American hyphen, but using linguistic constructs.
     self.lastname_stopwords = set(['y', 'of', 'and', 'de'])
Exemplo n.º 3
0
 def __init__(self,
              stemming_language=None,
              remove_stopwords=False,
              remove_html_markup=False,
              remove_latex_markup=False):
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                       remove_stopwords, remove_html_markup,
                                       remove_latex_markup)
     self.single_initial_re = re.compile('^\w\.$')
     self.split_on_re = re.compile('[\.\s-]')
     # lastname_stopwords describes terms which should not be used for indexing,
     # in multiple-word last names.  These are purely conjunctions, serving the
     # same function as the American hyphen, but using linguistic constructs.
     self.lastname_stopwords = set(['y', 'of', 'and', 'de'])
Exemplo n.º 4
0
 def __init__(self, stemming_language = None, remove_stopwords = False, remove_html_markup = False, remove_latex_markup = False):
     self.verbose = 3
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                             remove_stopwords,
                                             remove_html_markup,
                                             remove_latex_markup)
Exemplo n.º 5
0
 def __init__(self, stemming_language = None, remove_stopwords = False, remove_html_markup = False, remove_latex_markup = False):
     """Initialisation"""
     BibIndexDefaultTokenizer.__init__(self, stemming_language,
                                             remove_stopwords,
                                             remove_html_markup,
                                             remove_latex_markup)