def __init__(self, only_single_letter_firsts=False, names_to_utf8=True, only_one_initial=False, strip_first_names=False, *roles): """ Arguments: - only_single_letter_firsts(bool): Make proper initials (e.g. C. H. Bennett) only if the entry itself only has initials. This is useful if your entries don't contain the proper punctuation (e.g. C H Bennett). (default: False) - names_to_utf8(bool): Convert LaTeX escapes to UTF-8 characters in names in bib file. (default: True) - only_one_initial(bool): Keep only the first initial, removing any middle names. For instance, "P. A. M. Dirac" -> "P. Dirac". (default: False) - strip_first_names(bool): Only keep last names and strip first/middle names entirely. """ super(NameInitialsFilter, self).__init__() self.roles = roles if not self.roles: self.roles = ['author'] self._names_to_utf8 = getbool(names_to_utf8) self._only_single_letter_firsts = getbool(only_single_letter_firsts) self._only_one_initial = getbool(only_one_initial) self._strip_first_names = getbool(strip_first_names) logger.debug('NameInitialsFilter constructor')
def __init__(self, dupfile=None, warn=False, custom_bibalias=False, keep_only_used_in_jobname=None, jobname_search_dirs=None, *args): r"""DuplicatesFilter constructor. *dupfile: the name of a file to write latex code for defining duplicates to. This file will be overwritten!! *warn(bool): if this flag is set, dupfile is not mandatory, and a warning is issued for every duplicate entry found in the database. *custom_bibalias(bool): if set to TRUE, then no latex definitions will be generated in the file given in `dupfile', and will rely on a user-defined implementation of `\bibalias`. *keep_only_used_in_jobname: only keep entries which are referenced in LaTeX job Jobname. The corresponding AUX file is searched for and analyzed, see only_used filter. Note that this has no effect if the `dupfile' is not set. *jobname_search_dirs(CommaStrList): (use with keep_only_used_in_jobname) search for the AUX file in the given directories, as for the only_used filter. """ BibFilter.__init__(self); self.dupfile = dupfile self.warn = butils.getbool(warn) self.custom_bibalias = butils.getbool(custom_bibalias) if len(args) == 1: if self.dupfile is None: self.dupfile = args[0] else: raise BibFilterError("duplicates", "Repeated values given for dupfile: one as an option (`%s'), " "the other as a positional argument (`%s')"%(self.dupfile, args[0])) elif len(args) != 0: raise BibFilterError("duplicates", "Received unexpected positional arguments (at most one expected, " "the dupfile name): [%s]"%(",".join(["%s"%(x) for x in args]))) if not keep_only_used_in_jobname: keep_only_used_in_jobname = None self.keep_only_used_in_jobname = keep_only_used_in_jobname if jobname_search_dirs is not None: jobname_search_dirs = CommaStrList(jobname_search_dirs) self.jobname_search_dirs = jobname_search_dirs self.cache_entries_validator = None if (not self.dupfile and not self.warn): logger.warning("bibolamazi duplicates filter: no action will be taken as neither -sDupfile or"+ " -dWarn are given!") logger.debug('duplicates: dupfile=%r, warn=%r' % (dupfile, warn));
def __init__(self, Strip=False, StripAllIfDoiOrArxiv=False, StripDoiUrl=True, StripArxivUrl=True, UrlFromDoi=False, UrlFromArxiv=False, KeepFirstUrlOnly=False, StripForTypes=None, AddAsHowPublished=False, HowPublishedText='available at {urlstr}'): r""" UrlNormalizeFilter constructor. Arguments: - Strip(bool): Removes all URLs from the entry. Maybe add URLs according to the other options. [default: False] - StripAllIfDoiOrArxiv(bool): Removes all URLs from the entry, but only if a DOI identifier or an ArXiv ID is present. [default: False] - StripDoiUrl(bool): Remove any URL that is in fact a DOI lookup, e.g. of the form `https://dx.doi.org/<DOI>` [default: True] - StripArxivUrl(bool): Remove any URL that looks like an arxiv lookup, i.e. of the form `http(s)://arxiv.org/abs/<ID>` [default: True] - UrlFromDoi(bool): If the entry has a DOI identifier, then add an explicit URL that is a DOI lookup, i.e. `https://dx.doi.org/<DOI>` [default: False] - UrlFromArxiv(bool): If the entry has an ArXiv identifier, then add an explicit URL that links to the arXiv page, i.e. `https://arxiv.org/abs/<ARXIV-ID>` [default: False] - KeepFirstUrlOnly(bool): If the entry has several URLs, then after applying all the other stripping rules, keep only the first remaining URL, if any. [default: False] - StripForTypes: strip all URLs specified for entries among the given list of types. Common types to strip would be e.g. 'book' or 'phdthesis'. - AddAsHowPublished(bool): Add a howpublished={available at \url{...}} entry to the bibtex. - HowPublishedText: replace the 'available at ' text for -dAddAsHowPublished. Use Python string formatting. Available keys are '{urlstr}' to insert list of URLs concatenated with a comma, '{url}' to insert the first url and the key 'urls' is passed the raw Python list as argument. """ super(UrlNormalizeFilter, self).__init__() self.strip = getbool(Strip) self.stripallifdoiorarxiv = getbool(StripAllIfDoiOrArxiv) self.stripdoiurl = getbool(StripDoiUrl) self.striparxivurl = getbool(StripArxivUrl) self.urlfromdoi = getbool(UrlFromDoi) self.urlfromarxiv = getbool(UrlFromArxiv) self.keepfirsturlonly = getbool(KeepFirstUrlOnly) self.stripfortypes = None if (StripForTypes is not None): self.stripfortypes = [ x.strip() for x in StripForTypes.split(',') ] self.addashowpublished = getbool(AddAsHowPublished) self.howpublishedtext = HowPublishedText logger.debug('url filter constructor')
def __init__(self, jobname, search_dirs=[], prefix="", journal_ref_in_note=False): """CiteArxivFilter constructor. Arguments: - jobname: the base name of the latex file. Will search for jobname.aux and look for `\citation{..}' commands as they are generated by latex. - search_dirs(CommaStrList): the .aux file will be searched for in this list of directories; separate directories with commas e.g. 'path/to/dir1,path/to/dir2' (escape commas and backslashes with a backslash) - journal_ref_in_note(bool): keep the journal reference given by the arXiv in the note={} bibtex field. (default: No) - prefix: if set, citations should be in the the form `\cite{prefix:id}' (default: no prefix) """ BibFilter.__init__(self); self.jobname = jobname self.search_dirs = CommaStrList(search_dirs) self.journal_ref_in_note = getbool(journal_ref_in_note) self.prefix = prefix if (not self.search_dirs): self.search_dirs = ['.', '_cleanlatexfiles'] # also for my cleanlatex utility :) logger.debug('citearxiv: jobname=%r' % (jobname,));
def __init__(self, format="%(author)s%(year)s%(journal_abb)s_%(title_word)s", if_published=None, if_type=None): """ CiteKeyFilter Constructor. Arguments: - format: Format of the citation key. Should contain placeholders %(author)s etc. (See complete filter reference for details). - if_published(bool): If this option is specified, then only apply this filter to published or unpublished items, depending on the value given. - if_type(CommaStrList): You may specify a list of entry types to restrict the application of this filter to. By default, or if the list is empty, the filter applies to all entries. """ super(CiteKeyFilter, self).__init__() self.fmt = format if if_published is None or if_published == '': self.if_published = None else: self.if_published = butils.getbool(if_published) self.if_type = None if (if_type is None or if_type == '') else [x.strip() for x in if_type] logger.debug('citekey: fmt=%r', self.fmt)
def __init__(self, mode="random", use_uppercase_text=False): r""" Constructor method for TestFilter. Note that this part of the constructor docstring itself isn't that useful, but the argument list below is parsed and used by the default automatic option parser for filter arguments. So document your arguments! If your filter accepts `**kwargs`, you may add more arguments below than you explicitly declare in your constructor prototype. If this function accepts `*args`, then additional positional arguments on the filter line will be passed to those args. (And not to the declared arguments.) Arguments: - mode(Mode): the operating mode to adopt - use_uppercase_text(bool): if set to True, then transform our added text to uppercase characters. """ BibFilter.__init__(self) self.mode = Mode(mode) self.use_uppercase_text = getbool(use_uppercase_text) # debug log messages are seen by the user in verbose output mode logger.debug('my filter constructor: mode=%s, uppercase=%s', self.mode, self.use_uppercase_text)
def __init__(self, message=None, *args, **kwargs): """Echo a custom message into the bibolamazi logger. Arguments: - message: the message to echo - level(LogLevel): the logger level required to display the message (one of 'LONGDEBUG', 'DEBUG', 'WARNING', 'INFO', 'ERROR' or 'CRITICAL') - format(EchoFormat): how to display the message (one of 'default', 'simple' or 'warn') - warn(bool): short for '-sFormat=warn -sLevel=WARNING' """ BibFilter.__init__(self) self.message = message if len(args) > 0: if self.message is None: self.message = " ".join(args) else: raise BibFilterError( self.name(), "Got unexpected additional arguments: %s" % (", ".join(('"' + s + '"' for s in args))) ) iswarn = kwargs.get("warn", None) if iswarn is not None and getbool(iswarn): if "level" not in kwargs: kwargs["level"] = "WARNING" if "format" not in kwargs: kwargs["format"] = "warn" self.loglevel = LogLevel(kwargs.get("level", logging.INFO)) f = EchoFormat(kwargs.get("format", FMT_DEFAULT)) self.fmt = msgformats[f.msgformat]
def __init__(self, only_single_letter_firsts=False, names_to_utf8=True, *roles, **kwargs): """ Arguments: - only_single_letter_firsts(bool): Make proper initials (e.g. C. H. Bennett) only if the entry itself only has initials. This is useful if your entries don't contain the proper punctuation (e.g. C H Bennett). (default: False) - names_to_utf8(bool): Convert LaTeX escapes to UTF-8 characters in names in bib file. (default: True) """ BibFilter.__init__(self); self.roles = roles; if not self.roles: self.roles = ['author']; self._names_to_utf8 = getbool(names_to_utf8) self._only_single_letter_firsts = getbool(only_single_letter_firsts) logger.debug('NameInitialsFilter constructor')
def update_allow_remote_filterpackages(self): settings = QSettings() settings.beginGroup('RemoteFilterPackages') # on Windows, Python 3.4 and Qt 5.7, this returns str... so ensure it is a bool allow_remote = butils.getbool(settings.value('AllowRemote', False)) settings.endGroup() with BlockedSignals(self.ui.chkRemoteAllow): self.ui.chkRemoteAllow.setChecked(allow_remote) self.ui.grpGithubAuth.setEnabled(allow_remote)
def __init__(self, order=None, reverse=False): """ Arguments: - order(OrderMode): The strategy according to which to order all the entries. Possible values: see below. - reverse(bool): Reverse the sorting order. Has no effect with 'raw' order mode. """ super(OrderEntriesFilter, self).__init__() self.order = OrderMode(order) self.reverse = butils.getbool(reverse) logger.debug('orderentries: self.order=%r' % self.order)
def __init__(self, xmlfile="publications_%Y-%m-%dT%H-%M-%S.xml", export_annote=True, no_arxiv_urls=False, fixes_for_ethz=False, print_diff_to_last=False): """ Bib2EnXmlFilter constructor. Arguments: - xmlfile: The name of the XML file to output to. This string will be parsed with `strftime()`, see [https://docs.python.org/2/library/time.html#time.strftime]. If the file exists, it will not be overwritten and an error will be reported. The default value is 'publications_%Y-%m-%dT%H-%M-%S.xml'. - export_annote(bool): If set to `False`, then annote={} fields in the bibtex will not be exported into <notes>, as when this is set to `True` (`True` is the default). - no_arxiv_urls(bool): If set to `True`, then arxiv URLs will automatically be added to the entry. Note that this is the only way to link to the online arXiv version, but you may disable this option if the URL is already present in the entry. - fixes_for_ethz(bool): If set to `True`, includes some fixes & changes to prepare for proper upload on ETHZ Silva's CMS publication database. - print_diff_to_last(bool): If `True`, then print out the difference between the new outputted XML file and the latest file generated with the same pattern. """ BibFilter.__init__(self); self.xmlfilepattern = xmlfile self.xmlfile = datetime.now().strftime(xmlfile) self.export_annote = getbool(export_annote) self.no_arxiv_urls = getbool(no_arxiv_urls) self.fixes_for_ethz = getbool(fixes_for_ethz) self.print_diff_to_last = getbool(print_diff_to_last) logger.debug('bib2enxml: xmlfile=%r', self.xmlfile)
def __init__(self, scheme=JAbbrevModule('defaults'), dot_at_abbrev=True, dot_at_abbrev_cmd=r'\@'): r""" Arguments: * scheme(JAbbrevModule): Use the given abbreviations scheme. * dot_at_abbrev(bool): If true (the default), then abbreviations are written e.g. as "Phys.\@ Rev.\@ Lett.\@" which gets the spacing right in LaTeX (not end of sentence). Set to false to keep the simple "Phys. Rev. Lett." * dot_at_abbrev_cmd: The command to use after a dot when -dDotAtAbbrev is set. You can set this, e.g., to "\@" or any other custom command. """ self.scheme = scheme self.dot_at_abbrev = butils.getbool(dot_at_abbrev) self.dot_at_abbrev_cmd = dot_at_abbrev_cmd self.repl = [] # for a in args: # abbrev, name = a.split('=', 2) # pat = re.sub(sep_pat, sep_pat, name) # "Phys. Rev. Lett." -> "Phys(\.\s*|\s+)Rev(\.\s*|\s+)Lett" # rx = re.compile(pat, flags=re.IGNORECASE) # self.repl.append( (rx, abbrev) ) # import the corresponding module strscheme = str(scheme) if '.' in strscheme: mod = importlib.import_module(strscheme) else: mod = importlib.import_module('bibolamazi_qi_filters.jab.' + strscheme) replacement_pairs = mod.__dict__['replacement_pairs'] for k, v in replacement_pairs: # does nothing if k is already a re object: self.repl.append( (jab.mkrxs(k), jab.mkvalrepl(v, dot_at_abbrev=self.dot_at_abbrev, dot_at_abbrev_cmd=self.dot_at_abbrev_cmd))) logger.debug("JNameAbbrevFilter: repl=%r", self.repl)
def make_filter_option(farg): fopt = farg.replace('_', '-'); argdoc = argdocs.get(farg, _ArgDoc(farg,None,None)) if argdoc.doc is not None: argdocdoc = argdoc.doc.replace('%', '%%') argdocdoc = textwrap.TextWrapper(width=80, replace_whitespace=True, drop_whitespace=True).fill( argdocdoc ) else: argdocdoc = None optkwargs = { 'action': 'store', 'dest': farg, 'help': argdocdoc, } if argdoc.argtypename == 'bool': # boolean switch optkwargs['metavar'] = '<BOOLEAN ARG>' if not fopt.startswith('no-'): optkwargs['help'] = '' # only provide help for second option group_filter.add_argument('--'+fopt, nargs='?', default=None, const=True, type=butils.getbool, **optkwargs) if not fopt.startswith('no-'): optkwargs['help'] = argdocdoc # only provide help for second option group_filter.add_argument('--no-'+fopt, nargs='?', default=None, const=False, type=lambda val: not butils.getbool(val), **optkwargs) # remember that we've seen a bool arg ns.has_a_boolean_arg = True else: if argdoc.argtypename: if (argdoc.argtypename not in ns.seen_types): ns.seen_types.append(argdoc.argtypename) optkwargs['metavar'] = '<%s>'%(argdoc.argtypename) else: optkwargs['metavar'] = '<ARG>' group_filter.add_argument('--'+fopt, **optkwargs) return argdoc
def __init__(self, jobname=None, search_dirs=[], prefix="", journal_ref_in_note=False): r""" CiteArxivFilter constructor. Arguments: - jobname: the base name of the latex file whose citations we should analyze. Will search for jobname.aux and look for '\citation{..}' commands as they are generated by latex. The corresponding AUX file is searched for and analyzed. If -sJobname is not specified, then the LaTeX file name is guessed from the bibolamazi file name, as for the only_used filter and the duplicates filter. - search_dirs(CommaStrList): the .aux file will be searched for in this list of directories; separate directories with commas e.g. 'path/to/dir1,path/to/dir2'. Paths are absolute or relative to bibolamazi file. - journal_ref_in_note(bool): keep the journal reference given by the arXiv in the note={} bibtex field. (default: No) - prefix: if set, citations should be in the the form '\cite{prefix:id}' (default: no prefix) """ super(CiteArxivFilter, self).__init__() self.jobname = jobname self.search_dirs = CommaStrList(search_dirs) self.journal_ref_in_note = getbool(journal_ref_in_note) self.prefix = prefix if not self.search_dirs: self.search_dirs = ['.', '_cleanlatexfiles'] # also for my cleanlatex utility :) logger.debug('citearxiv: jobname=%r' % (self.jobname,))
def __init__(self, fix_swedish_a=False, encode_utf8_to_latex=False, encode_latex_to_utf8=False, remove_type_from_phd=False, remove_full_braces=False, protect_names=None, remove_file_field=False): """ Constructor method for a useless filter. """ BibFilter.__init__(self); self.fix_swedish_a = butils.getbool(fix_swedish_a); self.encode_utf8_to_latex = butils.getbool(encode_utf8_to_latex); self.encode_latex_to_utf8 = butils.getbool(encode_latex_to_utf8); if (self.encode_utf8_to_latex and self.encode_latex_to_utf8): raise FilterError("Conflicting options: `encode_utf8_to_latex' and `encode_latex_to_utf8'."); self.remove_type_from_phd = butils.getbool(remove_type_from_phd); try: self.remove_full_braces = butils.getbool(remove_full_braces); self.remove_full_braces_fieldlist = None; # all fields except ValueError: # not boolean, we have provided a field list. self.remove_full_braces = True; self.remove_full_braces_fieldlist = [ x.strip().lower() for x in remove_full_braces.split(',') ]; if protect_names is not None: self.protect_names = dict([ (x.strip(), re.compile(r'\b'+x.strip()+r'\b', re.IGNORECASE)) for x in protect_names.split(',') ]); else: self.protect_names = None; self.remove_file_field = butils.getbool(remove_file_field); logger.debug('useless test filter: fix_swedish_a=%r; encode_utf8_to_latex=%r; encode_latex_to_utf8=%r; ' 'remove_type_from_phd=%r; ' 'remove_full_braces=%r [fieldlist=%r], protect_names=%r, remove_file_field=%r' % (self.fix_swedish_a, self.encode_utf8_to_latex, self.encode_latex_to_utf8, self.remove_type_from_phd, self.remove_full_braces, self.remove_full_braces_fieldlist, self.protect_names, self.remove_file_field));
def __init__(self, fix_space_after_escape=False, encode_utf8_to_latex=False, encode_latex_to_utf8=False, remove_type_from_phd=False, remove_pages_from_book=False, remove_full_braces=False, remove_full_braces_not_lang=[], protect_names=None, remove_file_field=False, remove_fields=[], remove_doi_prefix=False, map_annote_to_note=False, auto_urlify=False, rename_language={}, fix_mendeley_bug_urls=False, protect_capital_letter_after_dot=False, protect_capital_letter_at_begin=False, convert_dbl_quotes=False, convert_sgl_quotes=False, dbl_quote_macro=r'\qq', sgl_quote_macro=r'\q', unprotect_full_last_names=False, # obsolete: fix_swedish_a=False): """ Constructor method for FixesFilter Arguments: - fix_space_after_escape(bool): transform `\\AA berg' and `M\\o ller' into `\\AA{}berg', `M\\o{}ller' to avoid bibtex styles from wrongfully splitting these words. - encode_utf8_to_latex(bool): encode known non-ascii characters into latex escape sequences. - encode_latex_to_utf8(bool): encode known latex escape sequences to unicode text (utf-8). - remove_type_from_phd(bool): Removes any `type=' field from @phdthesis{..} bibtex entries. - remove_pages_from_book(bool): Removes the `pages=' field from @book{..} bibtex entries. - remove_full_braces(BoolOrFieldList): removes overprotective global braces in field values. - remove_full_braces_not_lang(CommaStrList): (in conjunction with --remove-full-braces) removes the overprotective global braces only if the language of the entry (as per language={..} bibtex field) is not in the given list (case insensitive). - protect_names(CommaStrList): list of names to protect from bibtex style casing. - remove_file_field(bool): removes file={...} fields from all entries. - remove_fields(CommaStrList): removes given fields from all entries. - remove_doi_prefix(bool): removes `doi:' prefix from all DOIs, if present - map_annote_to_note(bool): maps `annote' bibtex field to a `note' field - auto_urlify: automatically wrap URLs into `\\url{}' commands. True/False, or a comma-separated list of fields to act on - rename_language(ColonCommaStrDict): replace e.g. `de' by `Deutsch'. Use format `alias1:language1,alias2:language2...'. - fix_mendeley_bug_urls(BoolOrFieldList): fix the `url' field for Mendeley's buggy output. Pass on a list of fields (comma-separated) to specify which fields to act on; by default if enabled only 'url'. - protect_capital_letter_after_dot(BoolOrFieldList): place first (capital) letter after a full stop or colon in protective braces (for the the given bibtex fields). Pass true or false here, or a list of fields on which to act (by default only 'title') - protect_capital_letter_at_begin(BoolOrFieldList): place first (capital) letter of a field in protective braces (for the the given bibtex fields). Pass true or false here, or a list of fields on which to act (by default only 'title') - convert_dbl_quotes(BoolOrFieldList): detect & convert double-quoted expressions to invoke a LaTeX macro. Pass true or false here, or a list of fields on which to act (by default 'title,abstract,booktitle,series') - dbl_quote_macro: the macro to use for double-quotes when convert_dbl_quotes is set - convert_sgl_quotes(BoolOrFieldList): detect & convert single-quoted expressions to invoke a LaTeX macro. Pass true or false here, or a list of fields on which to act (by default 'title,abstract,booktitle,series') - sgl_quote_macro: the macro to use for single-quotes when convert_sgl_quotes is set - unprotect_full_last_names(bool): remove curly braces around complete last names - fix_swedish_a(bool): (OBSOLETE, use -dFixSpaceAfterEscape instead.) transform `\\AA berg' into `\\AA{}berg' for `\\AA' and `\\o' (this problem occurs in files generated e.g. by Mendeley); revtex tends to insert a blank after the `\\AA' or `\\o' otherwise. """ super(FixesFilter, self).__init__() self.fix_space_after_escape = butils.getbool(fix_space_after_escape) self.fix_swedish_a = butils.getbool(fix_swedish_a); # OBSOLETE if (self.fix_swedish_a): logger.warning("Fixes Filter: option -dFixSwedishA is now obsolete, in favor of the more" " general and better option -dFixSpaceAfterEscape. The old option will" " still work for backwards compatibility, but please consider changing to" " the new option.") self.encode_utf8_to_latex = butils.getbool(encode_utf8_to_latex) self.encode_latex_to_utf8 = butils.getbool(encode_latex_to_utf8) if (self.encode_utf8_to_latex and self.encode_latex_to_utf8): raise BibFilterError(self.name(), "Conflicting options: `encode_utf8_to_latex' and `encode_latex_to_utf8'.") self.remove_type_from_phd = butils.getbool(remove_type_from_phd) self.remove_pages_from_book = butils.getbool(remove_pages_from_book) remove_full_braces = BoolOrFieldList(remove_full_braces) if remove_full_braces.valuetype is bool: self.remove_full_braces = remove_full_braces.value self.remove_full_braces_fieldlist = None else: self.remove_full_braces = bool(len(remove_full_braces.value)) self.remove_full_braces_fieldlist = [ x.strip().lower() for x in remove_full_braces.value ] if self.remove_full_braces: if not remove_full_braces_not_lang: self.remove_full_braces_not_lang = [] else: self.remove_full_braces_not_lang = [ x.lower() for x in CommaStrList(remove_full_braces_not_lang) ] else: self.remove_full_braces_not_lang = None if protect_names is not None: def mkpatternrx(x): x = x.strip() if not x: return tuple() # x may be a name, e.g. 'Bell', but it may also be a more complex string, e.g. 'i.i.d.'. # pattern = re.escape(x) # We need to make sure that a match doesn't begin or end in the # middle of a word. (e.g., "Bell" shouldn't match in "doorbell") if x[0].isalpha(): pattern = r'\b' + pattern if x[-1].isalpha(): pattern = pattern + r'\b' return (x, re.compile(pattern, re.IGNORECASE),) self.protect_names = [ t for t in [ mkpatternrx(x) for x in protect_names ] if len(t) ] else: self.protect_names = None self.remove_file_field = butils.getbool(remove_file_field) self.remove_fields = CommaStrList(remove_fields) self.remove_doi_prefix = butils.getbool(remove_doi_prefix) self.map_annote_to_note = butils.getbool(map_annote_to_note) try: auto_urlify_bool = butils.getbool(auto_urlify) # raises ValueError if not a boolean self.auto_urlify = [ "note" ] if auto_urlify_bool else [] except ValueError: self.auto_urlify = CommaStrList(auto_urlify) # make sure key (language alias) is made lower-case self.rename_language = dict([ (k.lower(), v) for k, v in iteritems(ColonCommaStrDict(rename_language)) ]) self.rename_language_rx = None if self.rename_language: # e.g. with rename_language={'en':'english','de':'deutsch', # 'german':'deutsch', 'french':'francais'}, prepare the regexp # '^en|de|german|french$'. Case INsensitive. self.rename_language_rx = re.compile( r'^\s*(?P<lang>' + "|".join([re.escape(k.strip()) for k in self.rename_language]) + r'\s*)$', flags=re.IGNORECASE ) fix_mendeley_bug_urls = BoolOrFieldList(fix_mendeley_bug_urls) if fix_mendeley_bug_urls.valuetype is bool: self.fix_mendeley_bug_urls = ['url'] if fix_mendeley_bug_urls.value else [] else: self.fix_mendeley_bug_urls = fix_mendeley_bug_urls.value protect_capital_letter_after_dot = BoolOrFieldList(protect_capital_letter_after_dot) if protect_capital_letter_after_dot.valuetype is bool: self.protect_capital_letter_after_dot = ['title'] if protect_capital_letter_after_dot.value else [] else: self.protect_capital_letter_after_dot = protect_capital_letter_after_dot.value protect_capital_letter_at_begin = BoolOrFieldList(protect_capital_letter_at_begin) if protect_capital_letter_at_begin.valuetype is bool: self.protect_capital_letter_at_begin = ['title'] if protect_capital_letter_at_begin.value else [] else: self.protect_capital_letter_at_begin = protect_capital_letter_at_begin.value self.dbl_quote_macro = dbl_quote_macro self.sgl_quote_macro = sgl_quote_macro convert_dbl_quotes = BoolOrFieldList(convert_dbl_quotes) if convert_dbl_quotes.valuetype is CommaStrList: self.convert_dbl_quotes = convert_dbl_quotes.value else: # just passed a bool, e.g. 'True' self.convert_dbl_quotes = ['title','abstract','booktitle','series'] if convert_dbl_quotes.value else [] convert_sgl_quotes = BoolOrFieldList(convert_sgl_quotes) if convert_sgl_quotes.valuetype is CommaStrList: self.convert_sgl_quotes = convert_sgl_quotes.value else: # just passed a bool, e.g. 'True' self.convert_sgl_quotes = ['title','abstract','booktitle','series'] if convert_sgl_quotes.value else [] self.unprotect_full_last_names = unprotect_full_last_names logger.debug(('fixes filter: fix_space_after_escape=%r; encode_utf8_to_latex=%r; encode_latex_to_utf8=%r; ' 'remove_type_from_phd=%r; ' 'remove_pages_from_book=%r; ' 'remove_full_braces=%r [fieldlist=%r, not lang=%r], ' 'protect_names=%r, remove_file_field=%r, ' 'remove_fields=%r, remove_doi_prefix=%r, fix_swedish_a=%r, ' 'map_annote_to_note=%r, auto_urlify=%r, rename_language=%r, rename_language_rx=%r, ' 'fix_mendeley_bug_urls=%r,' 'protect_capital_letter_after_dot=%r,protect_capital_letter_at_begin=%r,' 'convert_dbl_quotes=%r,dbl_quote_macro=%r,convert_sgl_quotes=%r,sgl_quote_macro=%r,' 'unprotect_full_last_names=%r') % (self.fix_space_after_escape, self.encode_utf8_to_latex, self.encode_latex_to_utf8, self.remove_type_from_phd, self.remove_pages_from_book, self.remove_full_braces, self.remove_full_braces_fieldlist, self.remove_full_braces_not_lang, self.protect_names, self.remove_file_field, self.remove_fields, self.remove_doi_prefix, self.fix_swedish_a, self.map_annote_to_note, self.auto_urlify, self.rename_language, (self.rename_language_rx.pattern if self.rename_language_rx else None), self.fix_mendeley_bug_urls, self.protect_capital_letter_after_dot, self.protect_capital_letter_at_begin, self.convert_dbl_quotes,self.dbl_quote_macro,self.convert_sgl_quotes,self.sgl_quote_macro, self.unprotect_full_last_names ))
def __init__(self, mode="eprint", unpublished_mode=None, arxiv_journal_name="ArXiv e-prints", strip_unpublished_fields=[], note_string="", note_string_fmt="", no_archive_prefix=False, default_archive_prefix="arXiv", no_primary_class_for_old_ids=False, no_primary_class=False, theses_count_as_published=False, warn_journal_ref=True): """ Constructor method for ArxivNormalizeFilter Arguments: - mode(Mode): the behavior to adopt for published articles which also have an arxiv ID - unpublished_mode(Mode): the behavior to adopt for unpublished articles who have an arxiv ID (if None, use the same mode as `mode'). - strip_unpublished_fields(CommaStrList): (all modes): a list of bibtex fields to remove from all unpublished entries. - arxiv_journal_name: (in eprint mode): the string to set the journal={} entry to for unpublished entries - note_string: (obsolete, prefer -sNoteStringFmt) the string to insert in the `note' field (for modes 'unpublished-note', 'note', and 'unpublished-note-notitle'). Use `%(arxivid)s' to include the ArXiv ID itself in the string. Default: '{arXiv:%(arxivid)s}'. Possible substitutions keys are 'arxivid','primaryclass','published','doi'. You can't specify both (-sNoteString and -sNoteStringFmt). - note_string_fmt: the string to insert in the `note' field for modes 'unpublished-note', 'note' and 'unpublished-note-notitle'. This field uses Python's new advanced formatting mini-language (see `string.Formatter`). The available fields and formats are documented below in the filter documentation. - no_archive_prefix(bool): If set, then removes the 'archiveprefix' key entirely. - default_archive_prefix: In `eprint' mode, entries which don't have an archive prefix are given this one. Additionally, other entries whose archive prefix match this one up to letter casing are adjusted to this one. (Default: "arXiv") - no_primary_class_for_old_ids(bool): if True, then in `eprint' mode no 'primaryclass' field is set if the entry has an "old" arXiv ID identifier already containing the primary-class, e.g. "quant-ph/YYYYZZZ". - no_primary_class(bool): if True, then the `primaryclass' field is always stripped. - theses_count_as_published(bool): if True, then entries of type @phdthesis and @mastersthesis count as published entries, otherwise not (the default). - warn_journal_ref(bool): if True, then for all articles that look unpublished in our database, but for which the arXiv.org API reports a published version, we produce a warning (this is the default; set this option to false to suppress these warnings). """ BibFilter.__init__(self); self.mode = Mode(mode); self.unpublished_mode = (Mode(unpublished_mode) if unpublished_mode is not None else self.mode); self.strip_unpublished_fields = CommaStrList(strip_unpublished_fields) self.arxiv_journal_name = arxiv_journal_name; self.note_string = note_string; self.note_string_fmt = note_string_fmt; if (self.note_string and self.note_string_fmt): raise BibFilterError('arXiv', "Can't give both -sNoteString and -sNoteStringFmt !") if not self.note_string and not self.note_string_fmt: # nothing given, defaults to: self.note_string_fmt = "{{arXiv:{arxiv.arxivid}{if:(arxiv.isnewarxivid)( [{arxiv.primaryclass}])}}" self.no_archive_prefix = no_archive_prefix; self.default_archive_prefix = default_archive_prefix; self.no_primary_class_for_old_ids = butils.getbool(no_primary_class_for_old_ids); self.no_primary_class = butils.getbool(no_primary_class); self.theses_count_as_published = butils.getbool(theses_count_as_published); self.warn_journal_ref = butils.getbool(warn_journal_ref); logger.debug('arxiv filter constructor: mode=%s; unpublished_mode=%s' % (self.mode, self.unpublished_mode));
def __init__(self, fix_space_after_escape=False, encode_utf8_to_latex=False, encode_latex_to_utf8=False, remove_type_from_phd=False, remove_full_braces=False, remove_full_braces_not_lang=[], protect_names=None, remove_file_field=False, remove_fields=[], remove_doi_prefix=False, map_annote_to_note=False, auto_urlify=False, rename_language={}, fix_mendeley_bug_urls=False, fix_swedish_a=False): """ Constructor method for FixesFilter Filter Arguments: - fix_space_after_escape(bool): transform `\\AA berg' and `M\\o ller' into `\\AA{}berg', `M\\o{}ller' to avoid bibtex styles from wrongfully splitting these words. - encode_utf8_to_latex(bool): encode known non-ascii characters into latex escape sequences. - encode_latex_to_utf8(bool): encode known latex escape sequences to unicode text (utf-8). - remove_type_from_phd(bool): Removes any `type=' field from @phdthesis{..} bibtex entries. - remove_full_braces: removes overprotective global braces in field values. - remove_full_braces_not_lang(CommaStrList): (in conjunction with --remove-full-braces) removes the overprotective global braces only if the language of the entry (as per language={..} bibtex field) is not in the given list (case insensitive). - protect_names: list of names to protect from bibtex style casing. - remove_file_field(bool): removes file={...} fields from all entries. - remove_fields(CommaStrList): removes given fields from all entries. - remove_doi_prefix(bool): removes `doi:' prefix from all DOIs, if present - map_annote_to_note(bool): maps `annote' bibtex field to a `note' field - auto_urlify: automatically wrap URLs into `\\url{}' commands - rename_language(ColonCommaStrDict): replace e.g. `de' by `Deutsch'. Use format `alias1:language1,alias2:language2...'. - fix_mendeley_bug_urls(bool): fix the `url' field for Mendeley's buggy output. Pass on a list of fields (comma-separated) to specify which fields to act on; by default if enabled only 'url'. - fix_swedish_a(bool): (OBSOLETE, use -dFixSpaceAfterEscape instead.) transform `\\AA berg' into `\\AA{}berg' for `\\AA' and `\\o' (this problem occurs in files generated e.g. by Mendeley); revtex tends to insert a blank after the `\\AA' or `\\o' otherwise. """ BibFilter.__init__(self); self.fix_space_after_escape = butils.getbool(fix_space_after_escape); self.fix_swedish_a = butils.getbool(fix_swedish_a); # OBSOLETE if (self.fix_swedish_a): logger.warning("Fixes Filter: option -dFixSwedishA is now obsolete, in favor of the more" " general and better option -dFixSpaceAfterEscape. The old option will" " still work for backwards compatibility, but please consider changing to" " the new option.") self.encode_utf8_to_latex = butils.getbool(encode_utf8_to_latex); self.encode_latex_to_utf8 = butils.getbool(encode_latex_to_utf8); if (self.encode_utf8_to_latex and self.encode_latex_to_utf8): raise BibFilterError("Conflicting options: `encode_utf8_to_latex' and `encode_latex_to_utf8'."); self.remove_type_from_phd = butils.getbool(remove_type_from_phd); try: self.remove_full_braces = butils.getbool(remove_full_braces); self.remove_full_braces_fieldlist = None; # all fields except ValueError: # not boolean, we have provided a field list. self.remove_full_braces = True; self.remove_full_braces_fieldlist = [ x.strip().lower() for x in remove_full_braces.split(',') ]; if self.remove_full_braces: if not remove_full_braces_not_lang: self.remove_full_braces_not_lang = [] else: self.remove_full_braces_not_lang = [ x.lower() for x in CommaStrList(remove_full_braces_not_lang) ] else: self.remove_full_braces_not_lang = None if protect_names is not None: self.protect_names = dict([ (x.strip(), re.compile(ur'\b'+re.escape(x.strip())+ur'\b', re.IGNORECASE)) for x in protect_names.split(u',') ]); else: self.protect_names = None; self.remove_file_field = butils.getbool(remove_file_field); self.remove_fields = CommaStrList(remove_fields); self.remove_doi_prefix = butils.getbool(remove_doi_prefix) self.map_annote_to_note = butils.getbool(map_annote_to_note) try: auto_urlify_bool = butils.getbool(auto_urlify) # raises ValueError if not a boolean self.auto_urlify = [ "note" ] if auto_urlify_bool else [] except ValueError: self.auto_urlify = CommaStrList(auto_urlify) # make sure key (language alias) is made lower-case self.rename_language = dict([ (k.lower(), v) for k, v in ColonCommaStrDict(rename_language).iteritems() ]) self.rename_language_rx = None if self.rename_language: # e.g. with rename_language={'en':'english','de':'deutsch', # 'german':'deutsch', 'french':'francais'}, prepare the regexp # '^en|de|german|french$'. Case INsensitive. self.rename_language_rx = re.compile( r'^\s*(?P<lang>' + "|".join([re.escape(k.strip()) for k in self.rename_language.iterkeys()]) + r'\s*)$', flags=re.IGNORECASE ) if fix_mendeley_bug_urls: try: self.fix_mendeley_bug_urls = CommaStrList(fix_mendeley_bug_urls) except TypeError: # just passed, e.g., `True` self.fix_mendeley_bug_urls = ['url'] else: self.fix_mendeley_bug_urls = [] logger.debug(('fixes filter: fix_space_after_escape=%r; encode_utf8_to_latex=%r; encode_latex_to_utf8=%r; ' 'remove_type_from_phd=%r; ' 'remove_full_braces=%r [fieldlist=%r, not lang=%r], ' 'protect_names=%r, remove_file_field=%r, ' 'remove_fields=%r, remove_doi_prefix=%r, fix_swedish_a=%r, ' 'map_annote_to_note=%r, auto_urlify=%r, rename_language=%r, rename_language_rx=%r, ' 'fix_mendeley_bug_urls=%r') % (self.fix_space_after_escape, self.encode_utf8_to_latex, self.encode_latex_to_utf8, self.remove_type_from_phd, self.remove_full_braces, self.remove_full_braces_fieldlist, self.remove_full_braces_not_lang, self.protect_names, self.remove_file_field, self.remove_fields, self.remove_doi_prefix, self.fix_swedish_a, self.map_annote_to_note, self.auto_urlify, self.rename_language, (self.rename_language_rx.pattern if self.rename_language_rx else None), self.fix_mendeley_bug_urls ));