Exemplo n.º 1
0
def doi2Bib(doi):
    """Returns a bibTeX string of metadata for a given DOI.

    From : https://gist.github.com/jrsmith3/5513926
    """
    url = "http://dx.doi.org/" + doi
    headers = {"accept": "application/x-bibtex"}
    req = Request(url, headers=headers)
    try:
        r = urlopen(req)

        try:
            if dict(r.info())['content-type'] == 'application/x-bibtex':
                return r.read().decode('utf-8')
            else:
                return ''
        except KeyError:
            try:
                if dict(r.info())['Content-Type'] == 'application/x-bibtex':
                    return r.read().decode('utf-8')
                else:
                    return ''
            except KeyError:
                return ''
    except URLError:
        tools.warning('Unable to contact remote server to get the bibtex ' +
                      'entry for doi '+doi)
        return ''
Exemplo n.º 2
0
Arquivo: backend.py Projeto: m000/BMC
def diffFilesIndex():
    """Compute differences between Bibtex index and PDF files

    Returns a dict with bibtex entry:
        * full bibtex entry with file='' if file is not found
        * only file entry if file with missing bibtex entry
    """
    files = tools.listDir(config.get("folder"))
    files = [i for i in files if tools.getExtension(i) in ['.pdf', '.djvu']]
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            index = bibtexparser.load(fh)
        index_diff = index.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    for key in index_diff.keys():
        if index_diff[key]['file'] not in files:
            index_diff[key]['file'] = ''
        else:
            files.remove(index_diff[key]['file'])

    for filename in files:
        index_diff[filename] = {'file': filename}

    return index.entries_dict
Exemplo n.º 3
0
Arquivo: backend.py Projeto: m000/BMC
def getBibtex(entry, file_id='both', clean=False):
    """Returns the bibtex entry corresponding to entry, as a dict

    entry is either a filename or a bibtex ident
    file_id is file or id or both to search for a file / id / both
    clean is to clean the ignored fields specified in config
    """
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    bibtex_entry = False
    if file_id == 'both' or file_id == 'id':
        try:
            bibtex_entry = bibtex[entry]
        except KeyError:
            pass
    if file_id == 'both' or file_id == 'file':
        if os.path.isfile(entry):
            for key in bibtex.keys():
                if os.path.samefile(bibtex[key]['file'], entry):
                    bibtex_entry = bibtex[key]
                    break
    if clean:
        for field in config.get("ignore_fields"):
            try:
                del(bibtex_entry[field])
            except KeyError:
                pass
    return bibtex_entry
Exemplo n.º 4
0
 def save(self):
     try:
         with open(self.config_path + "bmc.json", 'w') as fh:
             fh.write(json.dumps(self.config))
     except IOError:
         tools.warning("Could not write config file.")
         sys.exit(1)
Exemplo n.º 5
0
def findHALId(src):
    """Searches for a valid HAL id in src

    Returns a tuple of the HAL id and the version
    or False if not found or an error occurred.
    """
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    else:
        return False

    while totext.poll() is None:
        extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
        extractID = HAL_re.search(extractfull)
        if extractID:
            totext.terminate()
            break

    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return False
    else:
        return extractID.group(1), extractID.group(2)
Exemplo n.º 6
0
Arquivo: backend.py Projeto: m000/BMC
def getBibtex(entry, file_id='both', clean=False):
    """Returns the bibtex entry corresponding to entry, as a dict

    entry is either a filename or a bibtex ident
    file_id is file or id or both to search for a file / id / both
    clean is to clean the ignored fields specified in config
    """
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    bibtex_entry = False
    if file_id == 'both' or file_id == 'id':
        try:
            bibtex_entry = bibtex[entry]
        except KeyError:
            pass
    if file_id == 'both' or file_id == 'file':
        if os.path.isfile(entry):
            for key in bibtex.keys():
                if os.path.samefile(bibtex[key]['file'], entry):
                    bibtex_entry = bibtex[key]
                    break
    if clean:
        for field in config.get("ignore_fields"):
            try:
                del (bibtex_entry[field])
            except KeyError:
                pass
    return bibtex_entry
Exemplo n.º 7
0
Arquivo: backend.py Projeto: m000/BMC
def diffFilesIndex():
    """Compute differences between Bibtex index and PDF files

    Returns a dict with bibtex entry:
        * full bibtex entry with file='' if file is not found
        * only file entry if file with missing bibtex entry
    """
    files = tools.listDir(config.get("folder"))
    files = [i for i in files if tools.getExtension(i) in ['.pdf', '.djvu']]
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            index = bibtexparser.load(fh)
        index_diff = index.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    for key in index_diff.keys():
        if index_diff[key]['file'] not in files:
            index_diff[key]['file'] = ''
        else:
            files.remove(index_diff[key]['file'])

    for filename in files:
        index_diff[filename] = {'file': filename}

    return index.entries_dict
Exemplo n.º 8
0
def findHALId(src):
    """Searches for a valid HAL id in src

    Returns a tuple of the HAL id and the version
    or False if not found or an error occurred.
    """
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    else:
        return False

    while totext.poll() is None:
        extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
        extractID = HAL_re.search(extractfull)
        if extractID:
            totext.terminate()
            break

    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return False
    else:
        return extractID.group(1), extractID.group(2)
Exemplo n.º 9
0
def doi2Bib(doi):
    """Returns a bibTeX string of metadata for a given DOI.

    From : https://gist.github.com/jrsmith3/5513926
    """
    url = "http://dx.doi.org/" + doi
    headers = {"accept": "application/x-bibtex"}
    req = Request(url, headers=headers)
    try:
        r = urlopen(req)

        try:
            if dict(r.info())['content-type'] == 'application/x-bibtex':
                return r.read().decode('utf-8')
            else:
                return ''
        except KeyError:
            try:
                if dict(r.info())['Content-Type'] == 'application/x-bibtex':
                    return r.read().decode('utf-8')
                else:
                    return ''
            except KeyError:
                return ''
    except:
        tools.warning('Unable to contact remote server to get the bibtex ' +
                      'entry for doi '+doi)
        return ''
Exemplo n.º 10
0
Arquivo: backend.py Projeto: m000/BMC
def getNewName(src, bibtex, tag='', override_format=None):
    """
    Return the formatted name according to config for the given
    bibtex entry
    """
    authors = re.split(' and ', bibtex['author'])

    if bibtex['type'] == 'article':
        if override_format is None:
            new_name = config.get("format_articles")
        else:
            new_name = override_format
        try:
            new_name = new_name.replace("%j", bibtex['journal'])
        except KeyError:
            pass
    elif bibtex['type'] == 'book':
        if override_format is None:
            new_name = config.get("format_books")
        else:
            new_name = override_format

    new_name = new_name.replace("%t", bibtex['title'])
    try:
        new_name = new_name.replace("%Y", bibtex['year'])
    except KeyError:
        pass
    new_name = new_name.replace("%f", authors[0].split(',')[0].strip())
    new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
    new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip()
                                                for i in authors]))
    if('archiveprefix' in bibtex and
       'arXiv' in bibtex['archiveprefix']):
        new_name = new_name.replace("%v",
                                    '-' +
                                    bibtex['eprint'][bibtex['eprint'].
                                                     rfind('v'):])
    else:
        new_name = new_name.replace("%v", '')

    for custom in config.get("format_custom"):
        new_name = custom(new_name)

    if tag == '':
        new_name = (config.get("folder") + tools.slugify(new_name) +
                    tools.getExtension(src))
    else:
        if not os.path.isdir(config.get("folder") + tag):
            try:
                os.mkdir(config.get("folder") + tag)
            except OSError:
                tools.warning("Unable to create tag dir " +
                              config.get("folder")+tag+".")

        new_name = (config.get("folder") + tools.slugify(tag) + '/' +
                    tools.slugify(new_name) + tools.getExtension(src))

    return new_name
Exemplo n.º 11
0
def editEntry(entry, file_id='both'):
    bibtex = backend.getBibtex(entry, file_id)
    if bibtex is False:
        tools.warning("Entry "+entry+" does not exist.")
        return False

    if file_id == 'file':
        filename = entry
    else:
        filename = bibtex['file']
    new_bibtex = checkBibtex(filename, tools.parsed2Bibtex(bibtex))

    # Tag update
    if new_bibtex['tag'] != bibtex['tag']:
        print("Editing tag, moving file.")
        new_name = backend.getNewName(new_bibtex['file'],
                                      new_bibtex,
                                      new_bibtex['tag'])

        while os.path.exists(new_name):
            tools.warning("file "+new_name+" already exists.")
            default_rename = new_name.replace(tools.getExtension(new_name),
                                              " (2)" +
                                              tools.getExtension(new_name))
            rename = tools.rawInput("New name ["+default_rename+"]? ")
            if rename == '':
                new_name = default_rename
            else:
                new_name = rename
        new_bibtex['file'] = new_name

        try:
            shutil.move(bibtex['file'], new_bibtex['file'])
        except shutil.Error:
            tools.warning('Unable to move file '+bibtex['file']+' to ' +
                          new_bibtex['file'] + ' according to tag edit.')

        try:
            if not os.listdir(os.path.dirname(bibtex['file'])):
                os.rmdir(os.path.dirname(bibtex['file']))
        except OSError:
            tools.warning("Unable to delete empty tag dir " +
                          os.path.dirname(bibtex['file']))

    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            index = BibTexParser(fh.read())
        index = index.get_entry_dict()
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    index[new_bibtex['id']] = new_bibtex
    backend.bibtexRewrite(index)
    return True
Exemplo n.º 12
0
 def save(self):
     try:
         with open(self.config_path + "bmc.json", 'w') as fh:
             fh.write(json.dumps(self.config,
                                 sort_keys=True,
                                 indent=4,
                                 separators=(',', ': ')))
     except IOError:
         tools.warning("Could not write config file.")
         sys.exit(1)
Exemplo n.º 13
0
Arquivo: backend.py Projeto: m000/BMC
def getNewName(src, bibtex, tag='', override_format=None):
    """
    Return the formatted name according to config for the given
    bibtex entry
    """
    authors = re.split(' and ', bibtex['author'])

    if bibtex['type'] == 'article':
        if override_format is None:
            new_name = config.get("format_articles")
        else:
            new_name = override_format
        try:
            new_name = new_name.replace("%j", bibtex['journal'])
        except KeyError:
            pass
    elif bibtex['type'] == 'book':
        if override_format is None:
            new_name = config.get("format_books")
        else:
            new_name = override_format

    new_name = new_name.replace("%t", bibtex['title'])
    try:
        new_name = new_name.replace("%Y", bibtex['year'])
    except KeyError:
        pass
    new_name = new_name.replace("%f", authors[0].split(',')[0].strip())
    new_name = new_name.replace("%l", authors[-1].split(',')[0].strip())
    new_name = new_name.replace(
        "%a", ', '.join([i.split(',')[0].strip() for i in authors]))
    if ('archiveprefix' in bibtex and 'arXiv' in bibtex['archiveprefix']):
        new_name = new_name.replace(
            "%v", '-' + bibtex['eprint'][bibtex['eprint'].rfind('v'):])
    else:
        new_name = new_name.replace("%v", '')

    for custom in config.get("format_custom"):
        new_name = custom(new_name)

    if tag == '':
        new_name = (config.get("folder") + tools.slugify(new_name) +
                    tools.getExtension(src))
    else:
        if not os.path.isdir(config.get("folder") + tag):
            try:
                os.mkdir(config.get("folder") + tag)
            except OSError:
                tools.warning("Unable to create tag dir " +
                              config.get("folder") + tag + ".")

        new_name = (config.get("folder") + tools.slugify(tag) + '/' +
                    tools.slugify(new_name) + tools.getExtension(src))

    return new_name
Exemplo n.º 14
0
def editEntry(entry, file_id='both'):
    bibtex = backend.getBibtex(entry, file_id)
    if bibtex is False:
        tools.warning("Entry " + entry + " does not exist.")
        return False

    if file_id == 'file':
        filename = entry
    else:
        filename = bibtex['file']
    new_bibtex = checkBibtex(filename, tools.parsed2Bibtex(bibtex))

    # Tag update
    if new_bibtex['tag'] != bibtex['tag']:
        print("Editing tag, moving file.")
        new_name = backend.getNewName(new_bibtex['file'], new_bibtex,
                                      new_bibtex['tag'])

        while os.path.exists(new_name):
            tools.warning("file " + new_name + " already exists.")
            default_rename = new_name.replace(
                tools.getExtension(new_name),
                " (2)" + tools.getExtension(new_name))
            rename = tools.rawInput("New name [" + default_rename + "]? ")
            if rename == '':
                new_name = default_rename
            else:
                new_name = rename
        new_bibtex['file'] = new_name

        try:
            shutil.move(bibtex['file'], new_bibtex['file'])
        except shutil.Error:
            tools.warning('Unable to move file ' + bibtex['file'] + ' to ' +
                          new_bibtex['file'] + ' according to tag edit.')

        try:
            if not os.listdir(os.path.dirname(bibtex['file'])):
                os.rmdir(os.path.dirname(bibtex['file']))
        except OSError:
            tools.warning("Unable to delete empty tag dir " +
                          os.path.dirname(bibtex['file']))

    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            index = bibtexparser.load(fh)
        index = index.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    index[new_bibtex['id']] = new_bibtex
    backend.bibtexRewrite(index)
    return True
Exemplo n.º 15
0
Arquivo: config.py Projeto: m000/BMC
 def save(self):
     try:
         with open(self.config_path + "bmc.json", 'w') as fh:
             fh.write(
                 json.dumps(self.config,
                            sort_keys=True,
                            indent=4,
                            separators=(',', ': ')))
     except IOError:
         tools.warning("Could not write config file.")
         sys.exit(1)
Exemplo n.º 16
0
def getEntries():
    """Returns the list of all entries in the bibtex index"""
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = BibTexParser(fh.read())
        bibtex = bibtex.get_entry_dict()
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    return list(bibtex.keys())
Exemplo n.º 17
0
Arquivo: backend.py Projeto: m000/BMC
def bibtexAppend(data):
    """Append data to the main bibtex file

    data is a dict for one entry in bibtex, as the one from bibtexparser output
    """
    try:
        with open(config.get("folder")+'index.bib', 'a', encoding='utf-8') \
                as fh:
            fh.write(tools.parsed2Bibtex(data) + "\n")
    except IOError as e:
        raise e
        tools.warning("Unable to open index file.")
        return False
Exemplo n.º 18
0
Arquivo: backend.py Projeto: m000/BMC
def bibtexAppend(data):
    """Append data to the main bibtex file

    data is a dict for one entry in bibtex, as the one from bibtexparser output
    """
    try:
        with open(config.get("folder")+'index.bib', 'a', encoding='utf-8') \
                as fh:
            fh.write(tools.parsed2Bibtex(data)+"\n")
    except IOError as e:
        raise e
        tools.warning("Unable to open index file.")
        return False
Exemplo n.º 19
0
Arquivo: backend.py Projeto: m000/BMC
def deleteId(ident, keep=False):
    """Delete a file based on its id in the bibtex file"""
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (IOError, TypeError):
        tools.warning("Unable to open index file.")
        return False

    if ident not in bibtex.keys():
        return False

    if not keep:
        try:
            os.remove(bibtex[ident]['file'])
        except (KeyError, OSError):
            tools.warning("Unable to delete file associated to id " + ident +
                          " : " + bibtex[ident]['file'])

    try:
        if not os.listdir(os.path.dirname(bibtex[ident]['file'])):
            os.rmdir(os.path.dirname(bibtex[ident]['file']))
    except (KeyError, OSError):
        tools.warning("Unable to delete empty tag dir " +
                      os.path.dirname(bibtex[ident]['file']))

    try:
        del(bibtex[ident])
        bibtexRewrite(bibtex)
    except KeyError:
        tools.warning("No associated bibtex entry in index for file " +
                      bibtex[ident]['file'])
    return True
Exemplo n.º 20
0
Arquivo: backend.py Projeto: m000/BMC
def deleteId(ident, keep=False):
    """Delete a file based on its id in the bibtex file"""
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (IOError, TypeError):
        tools.warning("Unable to open index file.")
        return False

    if ident not in bibtex.keys():
        return False

    if not keep:
        try:
            os.remove(bibtex[ident]['file'])
        except (KeyError, OSError):
            tools.warning("Unable to delete file associated to id " + ident +
                          " : " + bibtex[ident]['file'])

    try:
        if not os.listdir(os.path.dirname(bibtex[ident]['file'])):
            os.rmdir(os.path.dirname(bibtex[ident]['file']))
    except (KeyError, OSError):
        tools.warning("Unable to delete empty tag dir " +
                      os.path.dirname(bibtex[ident]['file']))

    try:
        del (bibtex[ident])
        bibtexRewrite(bibtex)
    except KeyError:
        tools.warning("No associated bibtex entry in index for file " +
                      bibtex[ident]['file'])
    return True
Exemplo n.º 21
0
Arquivo: backend.py Projeto: m000/BMC
def bibtexRewrite(data):
    """Rewrite the bibtex index file.

    data is a dict of bibtex entry dict.
    """
    bibtex = ''
    for entry in data.keys():
        bibtex += tools.parsed2Bibtex(data[entry]) + "\n"
    try:
        with open(config.get("folder")+'index.bib', 'w', encoding='utf-8') \
                as fh:
            fh.write(bibtex)
    except (IOError, TypeError):
        tools.warning("Unable to open index file.")
        return False
Exemplo n.º 22
0
def openFile(ident):
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    if ident not in list(bibtex.keys()):
        return False
    else:
        subprocess.Popen(['xdg-open', bibtex[ident]['file']])
        return True
Exemplo n.º 23
0
Arquivo: backend.py Projeto: m000/BMC
def bibtexEdit(ident, modifs):
    """Update ident key in bibtex file, modifications are in modifs dict"""

    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (IOError, TypeError):
        tools.warning("Unable to open index file.")
        return False

    for key in modifs.keys():
        bibtex[ident][key] = modifs[key]
    bibtexRewrite(bibtex)
Exemplo n.º 24
0
Arquivo: backend.py Projeto: m000/BMC
def bibtexRewrite(data):
    """Rewrite the bibtex index file.

    data is a dict of bibtex entry dict.
    """
    bibtex = ''
    for entry in data.keys():
        bibtex += tools.parsed2Bibtex(data[entry])+"\n"
    try:
        with open(config.get("folder")+'index.bib', 'w', encoding='utf-8') \
                as fh:
            fh.write(bibtex)
    except (IOError, TypeError):
        tools.warning("Unable to open index file.")
        return False
Exemplo n.º 25
0
Arquivo: backend.py Projeto: m000/BMC
def getEntries(full=False):
    """Returns the list of all entries in the bibtex index"""
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    if full:
        return bibtex
    else:
        return list(bibtex.keys())
Exemplo n.º 26
0
Arquivo: backend.py Projeto: m000/BMC
def getEntries(full=False):
    """Returns the list of all entries in the bibtex index"""
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    if full:
        return bibtex
    else:
        return list(bibtex.keys())
Exemplo n.º 27
0
Arquivo: backend.py Projeto: m000/BMC
def bibtexEdit(ident, modifs):
    """Update ident key in bibtex file, modifications are in modifs dict"""

    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (IOError, TypeError):
        tools.warning("Unable to open index file.")
        return False

    for key in modifs.keys():
        bibtex[ident][key] = modifs[key]
    bibtexRewrite(bibtex)
Exemplo n.º 28
0
def openFile(ident):
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = BibTexParser(fh.read())
        bibtex = bibtex.get_entry_dict()
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    if ident not in list(bibtex.keys()):
        return False
    else:
        subprocess.Popen(['xdg-open', bibtex[ident]['file']])
        return True
Exemplo n.º 29
0
Arquivo: config.py Projeto: m000/BMC
 def load_masks(self):
     if os.path.isfile(self.config_path + "masks.py"):
         try:
             self.info = imp.find_module("masks", [self.config_path])
             self.masks = imp.load_module("masks", *self.info)
             for mask in inspect.getmembers(self.masks, inspect.isfunction):
                 self.config["format_custom"].append(mask[1])
         except ImportError:
             self.clean()
             tools.warning("Unable to import masks config file.")
             pass
         finally:
             try:
                 self.info[0].close()
             except AttributeError:
                 pass
Exemplo n.º 30
0
 def load_masks(self):
     if os.path.isfile(self.config_path + "masks.py"):
         try:
             self.info = imp.find_module("masks", [self.config_path])
             self.masks = imp.load_module("masks", *self.info)
             for mask in inspect.getmembers(self.masks, inspect.isfunction):
                 self.config["format_custom"].append(mask[1])
         except ImportError:
             self.clean()
             tools.warning("Unable to import masks config file.")
             pass
         finally:
             try:
                 self.info[0].close()
             except AttributeError:
                 pass
Exemplo n.º 31
0
def downloadFile(url, filetype, manual, autoconfirm, tag):
    print('Downloading ' + url)
    dl, contenttype = fetcher.download(url)

    if dl is not False:
        print('Download finished')
        tmp = tempfile.NamedTemporaryFile(suffix='.' + contenttype)

        with open(tmp.name, 'wb+') as fh:
            fh.write(dl)
        new_name = addFile(tmp.name, filetype, manual, autoconfirm, tag)
        if new_name is False:
            return False
        tmp.close()
        return new_name
    else:
        tools.warning("Could not fetch " + url)
        return False
Exemplo n.º 32
0
def downloadFile(url, filetype, manual, autoconfirm, tag):
    print('Downloading '+url)
    dl, contenttype = fetcher.download(url)

    if dl is not False:
        print('Download finished')
        tmp = tempfile.NamedTemporaryFile(suffix='.'+contenttype)

        with open(tmp.name, 'wb+') as fh:
            fh.write(dl)
        new_name = addFile(tmp.name, filetype, manual, autoconfirm, tag)
        if new_name is False:
            return False
        tmp.close()
        return new_name
    else:
        tools.warning("Could not fetch "+url)
        return False
Exemplo n.º 33
0
 def load(self):
     try:
         folder_exists = make_sure_path_exists(self.config_path)
         if folder_exists and os.path.isfile(self.config_path + "bmc.json"):
             initialized = True
         else:
             initialized = False
     except OSError:
         tools.warning("Unable to create ~/.config folder.")
         sys.exit(1)
     if not initialized:
         self.initialize()
     else:
         try:
             with open(self.config_path + "bmc.json", 'r') as fh:
                 self.config = json.load(fh)
         except (ValueError, IOError):
             tools.warning("Config file could not be read.")
             sys.exit(1)
     self.load_masks()
Exemplo n.º 34
0
def update(entry):
    update = backend.updateArXiv(entry)
    if update is not False:
        print("New version found for " + entry)
        print("\t Title: " + update['title'])
        confirm = tools.rawInput("Download it ? [Y/n] ")
        if confirm.lower() == 'n':
            return
        new_name = downloadFile('http://arxiv.org/pdf/' + update['eprint'],
                                'article', False)
        if new_name is not False:
            print(update['eprint'] + " successfully imported as " + new_name)
        else:
            tools.warning("An error occurred while downloading " + url)
        confirm = tools.rawInput("Delete previous version ? [y/N] ")
        if confirm.lower() == 'y':
            if not backend.deleteId(entry):
                if not backend.deleteFile(entry):
                    tools.warning("Unable to remove previous version.")
                    return
            print("Previous version successfully deleted.")
Exemplo n.º 35
0
def update(entry):
    update = backend.updateArXiv(entry)
    if update is not False:
        print("New version found for "+entry)
        print("\t Title: "+update['title'])
        confirm = tools.rawInput("Download it ? [Y/n] ")
        if confirm.lower() == 'n':
            return
        new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'],
                                'article', False)
        if new_name is not False:
            print(update['eprint']+" successfully imported as "+new_name)
        else:
            tools.warning("An error occurred while downloading "+url)
        confirm = tools.rawInput("Delete previous version ? [y/N] ")
        if confirm.lower() == 'y':
            if not backend.deleteId(entry):
                if not backend.deleteFile(entry):
                    tools.warning("Unable to remove previous version.")
                    return
            print("Previous version successfully deleted.")
Exemplo n.º 36
0
Arquivo: config.py Projeto: m000/BMC
 def load(self):
     try:
         folder_exists = make_sure_path_exists(self.config_path)
         if folder_exists and os.path.isfile(self.config_path + "bmc.json"):
             initialized = True
         else:
             initialized = False
     except OSError:
         tools.warning("Unable to create ~/.config folder.")
         sys.exit(1)
     if not initialized:
         self.initialize()
     else:
         try:
             with open(self.config_path + "bmc.json", 'r') as fh:
                 self.config = json.load(fh)
         except (ValueError, IOError):
             tools.warning("Config file could not be read.")
             sys.exit(1)
         try:
             folder_exists = make_sure_path_exists(self.get("folder"))
         except OSError:
             tools.warning("Unable to create paper storage folder.")
             sys.exit(1)
     self.load_masks()
Exemplo n.º 37
0
def findISBN(src):
    """Search for a valid ISBN in src.

    Returns the ISBN or false if not found or an error occurred."""
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  bufsize=1)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  bufsize=1)
    else:
        return False

    while totext.poll() is None:
        extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
        extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
                                                                 '-'))
        if extractISBN:
            totext.terminate()
            break

    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return False

    cleanISBN = False
    # Clean ISBN is the ISBN number without separators
    if extractISBN:
        cleanISBN = extractISBN.group(1).replace('-', '').replace(' ', '')
    return cleanISBN
Exemplo n.º 38
0
def findISBN(src):
    """Search for a valid ISBN in src.

    Returns the ISBN or false if not found or an error occurred."""
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  bufsize=1)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  bufsize=1)
    else:
        return False

    while totext.poll() is None:
        extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
        extractISBN = isbn_re.search(extractfull.lower().replace('Œ',
                                                                 '-'))
        if extractISBN:
            totext.terminate()
            break

    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return False

    cleanISBN = False
    # Clean ISBN is the ISBN number without separators
    if extractISBN:
        cleanISBN = extractISBN.group(1).replace('-', '').replace(' ', '')
    return cleanISBN
Exemplo n.º 39
0
Arquivo: backend.py Projeto: m000/BMC
def deleteFile(filename, keep=False):
    """Delete a file based on its filename"""
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    found = False
    for key in list(bibtex.keys()):
        try:
            if os.path.samefile(bibtex[key]['file'], filename):
                found = True
                if not keep:
                    try:
                        os.remove(bibtex[key]['file'])
                    except (KeyError, OSError):
                        tools.warning("Unable to delete file associated " +
                                      "to id " + key + " : " +
                                      bibtex[key]['file'])

                try:
                    if not os.listdir(os.path.dirname(filename)):
                        os.rmdir(os.path.dirname(filename))
                except OSError:
                    tools.warning("Unable to delete empty tag dir " +
                                  os.path.dirname(filename))

                try:
                    del (bibtex[key])
                except KeyError:
                    tools.warning("No associated bibtex entry in index for " +
                                  "file " + bibtex[key]['file'])
        except (KeyError, OSError):
            pass
    if found:
        bibtexRewrite(bibtex)
    elif os.path.isfile(filename):
        os.remove(filename)
    return found
Exemplo n.º 40
0
Arquivo: backend.py Projeto: m000/BMC
def deleteFile(filename, keep=False):
    """Delete a file based on its filename"""
    try:
        with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \
                as fh:
            bibtex = bibtexparser.load(fh)
        bibtex = bibtex.entries_dict
    except (TypeError, IOError):
        tools.warning("Unable to open index file.")
        return False

    found = False
    for key in list(bibtex.keys()):
        try:
            if os.path.samefile(bibtex[key]['file'], filename):
                found = True
                if not keep:
                    try:
                        os.remove(bibtex[key]['file'])
                    except (KeyError, OSError):
                        tools.warning("Unable to delete file associated " +
                                      "to id " + key+" : "+bibtex[key]['file'])

                try:
                    if not os.listdir(os.path.dirname(filename)):
                        os.rmdir(os.path.dirname(filename))
                except OSError:
                    tools.warning("Unable to delete empty tag dir " +
                                  os.path.dirname(filename))

                try:
                    del(bibtex[key])
                except KeyError:
                    tools.warning("No associated bibtex entry in index for " +
                                  "file " + bibtex[key]['file'])
        except (KeyError, OSError):
            pass
    if found:
        bibtexRewrite(bibtex)
    elif os.path.isfile(filename):
        os.remove(filename)
    return found
Exemplo n.º 41
0
def checkBibtex(filename, bibtex_string):
    print("The bibtex entry found for " + filename + " is:")

    bibtex = bibtexparser.loads(bibtex_string)
    bibtex = bibtex.entries_dict
    try:
        bibtex = bibtex[list(bibtex.keys())[0]]
        # Check entries are correct
        if "title" not in bibtex:
            raise AssertionError
        if "authors" not in bibtex and "author" not in bibtex:
            raise AssertionError
        if "year" not in bibtex:
            raise AssertionError
        # Print the bibtex and confirm
        print(tools.parsed2Bibtex(bibtex))
        check = tools.rawInput("Is it correct? [Y/n] ")
    except KeyboardInterrupt:
        sys.exit()
    except (IndexError, KeyError, AssertionError):
        print("Missing author, year or title in bibtex.")
        check = 'n'

    try:
        old_filename = bibtex['file']
    except KeyError:
        old_filename = False

    while check.lower() == 'n':
        with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
            tmpfile.write(bibtex_string.encode('utf-8'))
            tmpfile.flush()
            subprocess.call([EDITOR, tmpfile.name])
            tmpfile.seek(0)
            bibtex = bibtexparser.loads(tmpfile.read().decode('utf-8') + "\n")

        bibtex = bibtex.entries_dict
        try:
            bibtex = bibtex[list(bibtex.keys())[0]]
        except (IndexError, KeyError):
            tools.warning("Invalid bibtex entry")
            bibtex_string = ''
            tools.rawInput("Press Enter to go back to editor.")
            continue
        if ('authors' not in bibtex and 'title' not in bibtex
                and 'year' not in bibtex):
            tools.warning("Invalid bibtex entry")
            bibtex_string = ''
            tools.rawInput("Press Enter to go back to editor.")
            continue

        if old_filename is not False and 'file' not in bibtex:
            tools.warning("Invalid bibtex entry. No filename given.")
            tools.rawInput("Press Enter to go back to editor.")
            check = 'n'
        else:
            bibtex_string = tools.parsed2Bibtex(bibtex)
            print("\nThe bibtex entry for " + filename + " is:")
            print(bibtex_string)
            check = tools.rawInput("Is it correct? [Y/n] ")
    if old_filename is not False and old_filename != bibtex['file']:
        try:
            print("Moving file to new location…")
            shutil.move(old_filename, bibtex['file'])
        except shutil.Error:
            tools.warning("Unable to move file " + old_filename + " to " +
                          bibtex['file'] + ". You should check it manually.")

    return bibtex
Exemplo n.º 42
0
                               nargs='+',
                               help="your query, see README for more info.",
                               type=commandline_arg)
    parser_search.set_defaults(func='search')

    args = parser.parse_args()
    try:
        if args.func == 'download':
            skipped = []
            for url in args.url:
                new_name = downloadFile(url, args.type, args.manual, args.y,
                                        args.tag)
                if new_name is not False:
                    print(url + " successfully imported as " + new_name)
                else:
                    tools.warning("An error occurred while downloading " + url)
                    skipped.append(url)
            if len(skipped) > 0:
                print("\nSkipped files:")
                for i in skipped:
                    print(i)
            sys.exit()

        if args.func == 'import':
            skipped = []
            for filename in list(set(args.file) - set(args.skip)):
                new_name = addFile(filename, args.type, args.manual, args.y,
                                   args.tag, not args.inplace)
                if new_name is not False:
                    print(filename + " successfully imported as " + new_name +
                          ".")
Exemplo n.º 43
0
def findArticleID(src, only=["DOI", "arXiv"]):
    """Search for a valid article ID (DOI or ArXiv) in src.

    Returns a tuple (type, first matching ID) or False if not found
    or an error occurred.
    From : http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/
    and https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb
    """
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    else:
        return (False, False)

    extractfull = ''
    extract_type = False
    extractID = None
    while totext.poll() is None:
        extractfull += ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
        # Try to extract DOI
        if "DOI" in only:
            extractID = doi_re.search(extractfull.lower().replace('Œ', '-'))
            if not extractID:
                # PNAS fix
                extractID = doi_pnas_re.search(extractfull.
                                               lower().
                                               replace('pnas', '/pnas'))
                if not extractID:
                    # JSB fix
                    extractID = doi_jsb_re.search(extractfull.lower())
            if extractID:
                extract_type = "DOI"
                totext.terminate()
        # Try to extract arXiv
        if "arXiv" in only:
            tmp_extractID = arXiv_re.search(extractfull)
            if tmp_extractID:
                if not extractID or extractID.start(0) > tmp_extractID.start(1):
                    # Only use arXiv id if it is before the DOI in the pdf
                    extractID = tmp_extractID
                    extract_type = "arXiv"
                    totext.terminate()
        if extract_type is not False:
            break

    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return (False, False)

    if extractID is not None and extract_type == "DOI":
        # If DOI extracted, clean it and return it
        cleanDOI = False
        cleanDOI = extractID.group(0).replace(':', '').replace(' ', '')
        if clean_doi_re.search(cleanDOI):
            cleanDOI = cleanDOI[1:]
        # FABSE J fix
        if clean_doi_fabse_re.search(cleanDOI):
            cleanDOI = cleanDOI[:20]
        # Second JCB fix
        if clean_doi_jcb_re.search(cleanDOI):
            cleanDOI = cleanDOI[:21]
        if len(cleanDOI) > 40:
            cleanDOItemp = clean_doi_len_re.sub('000', cleanDOI)
            reps = {'.': 'A', '-': '0'}
            cleanDOItemp = tools.replaceAll(cleanDOItemp[8:], reps)
            digitStart = 0
            for i in range(len(cleanDOItemp)):
                if cleanDOItemp[i].isdigit():
                    digitStart = 1
                    if cleanDOItemp[i].isalpha() and digitStart:
                        break
            cleanDOI = cleanDOI[0:(8+i)]
        return ("DOI", cleanDOI)
    elif extractID is not None and extract_type == "arXiv":
        # If arXiv id is extracted, return it
        return ("arXiv", extractID.group(1))
    return (False, False)
Exemplo n.º 44
0
def addFile(src, filetype, manual, autoconfirm, tag):
    """
    Add a file to the library
    """
    doi = False
    arxiv = False
    isbn = False

    if not manual:
        try:
            if filetype == 'article' or filetype is None:
                id_type, article_id = fetcher.findArticleID(src)
                if id_type == "DOI":
                    doi = article_id
                elif id_type == "arXiv":
                    arxiv = article_id

            if filetype == 'book' or (doi is False and arxiv is False and
                                      filetype is None):
                isbn = fetcher.findISBN(src)
        except KeyboardInterrupt:
            doi = False
            arxiv = False
            isbn = False

    if doi is False and isbn is False and arxiv is False:
        if filetype is None:
            tools.warning("Could not determine the DOI nor the arXiv id nor " +
                          "the ISBN for "+src+". Switching to manual entry.")
            doi_arxiv_isbn = ''
            while(doi_arxiv_isbn not in
                  ['doi', 'arxiv', 'isbn', 'manual', 'skip']):
                doi_arxiv_isbn = (tools.rawInput("DOI / arXiv " +
                                                 "/ ISBN / manual / skip? ").
                                  lower())
            if doi_arxiv_isbn == 'doi':
                doi = tools.rawInput('DOI? ')
            elif doi_arxiv_isbn == 'arxiv':
                arxiv = tools.rawInput('arXiv id? ')
            elif doi_arxiv_isbn == 'isbn':
                isbn = tools.rawInput('ISBN? ')
            elif doi_arxiv_isbn == 'skip':
                return False
        elif filetype == 'article':
            tools.warning("Could not determine the DOI nor the arXiv id for " +
                          src+", switching to manual entry.")
            doi_arxiv = ''
            while doi_arxiv not in ['doi', 'arxiv', 'manual', 'skip']:
                doi_arxiv = (tools.rawInput("DOI / arXiv / manual / skip? ").
                             lower())
            if doi_arxiv == 'doi':
                doi = tools.rawInput('DOI? ')
            elif doi_arxiv == 'arxiv':
                arxiv = tools.rawInput('arXiv id? ')
            elif doi_arxiv == 'skip':
                return False
        elif filetype == 'book':
            isbn_manual = ''
            while isbn_manual not in ['isbn', 'manual', 'skip']:
                isbn_manual = tools.rawInput("ISBN / manual / skip? ").lower()
            if isbn_manual == 'isbn':
                isbn = (tools.rawInput('ISBN? ').
                        replace(' ', '').
                        replace('-', ''))
            elif isbn_manual == 'skip':
                return False
    elif doi is not False:
        print("DOI for "+src+" is "+doi+".")
    elif arxiv is not False:
        print("ArXiv id for "+src+" is "+arxiv+".")
    elif isbn is not False:
        print("ISBN for "+src+" is "+isbn+".")

    if doi is not False and doi != '':
        # Add extra \n for bibtexparser
        bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n"
    elif arxiv is not False and arxiv != '':
        bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n")+"\n"
    elif isbn is not False and isbn != '':
        # Idem
        bibtex = fetcher.isbn2Bib(isbn).strip()+"\n"
    else:
        bibtex = ''

    bibtex = BibTexParser(bibtex)
    bibtex = bibtex.get_entry_dict()
    if len(bibtex) > 0:
        bibtex_name = list(bibtex.keys())[0]
        bibtex = bibtex[bibtex_name]
        bibtex_string = tools.parsed2Bibtex(bibtex)
    else:
        bibtex_string = ''

    if not autoconfirm:
        bibtex = checkBibtex(src, bibtex_string)

    if not autoconfirm:
        tag = tools.rawInput("Tag for this paper (leave empty for default) ? ")
    else:
        tag = args.tag
    bibtex['tag'] = tag

    new_name = backend.getNewName(src, bibtex, tag)

    while os.path.exists(new_name):
        tools.warning("file "+new_name+" already exists.")
        default_rename = new_name.replace(tools.getExtension(new_name),
                                          " (2)"+tools.getExtension(new_name))
        rename = tools.rawInput("New name ["+default_rename+"]? ")
        if rename == '':
            new_name = default_rename
        else:
            new_name = rename
    bibtex['file'] = new_name

    try:
        shutil.copy2(src, new_name)
    except shutil.Error:
        new_name = False
        sys.exit("Unable to move file to library dir " +
                 config.get("folder")+".")

    # Remove first page of IOP papers
    try:
        if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article':
            tearpages.tearpage(new_name)
    except (KeyError, shutil.Error, IOError):
        pass

    backend.bibtexAppend(bibtex)
    return new_name
Exemplo n.º 45
0
def download(url):
    """Download url tofile

    Check that it is a valid pdf or djvu file. Tries all the
    available proxies sequentially. Returns the raw content of the file, or
    false if it could not be downloaded.
    """
    for proxy in config.get("proxies"):
        if proxy.startswith('socks'):
            if proxy[5] == '4':
                proxy_type = socks.SOCKS4
            else:
                proxy_type = socks.SOCKS5
            proxy = proxy[proxy.find('://')+3:]
            try:
                proxy, port = proxy.split(':')
            except ValueError:
                port = None
            socks.set_default_proxy(proxy_type, proxy, port)
            socket.socket = socks.socksocket
        elif proxy == '':
            socket.socket = default_socket
        else:
            try:
                proxy, port = proxy.split(':')
            except ValueError:
                port = None
            socks.set_default_proxy(socks.HTTP, proxy, port)
            socket.socket = socks.socksocket
        try:
            r = urlopen(url)
            try:
                size = int(dict(r.info())['content-length'].strip())
            except KeyError:
                try:
                    size = int(dict(r.info())['Content-Length'].strip())
                except KeyError:
                    size = 0
            dl = b""
            dl_size = 0
            while True:
                buf = r.read(1024)
                if buf:
                    dl += buf
                    dl_size += len(buf)
                    if size != 0:
                        done = int(50 * dl_size / size)
                        sys.stdout.write("\r[%s%s]" % ('='*done, ' '*(50-done)))
                        sys.stdout.write(" "+str(int(float(done)/52*100))+"%")
                        sys.stdout.flush()
                else:
                    break
            contenttype = False
            contenttype_req = None
            try:
                contenttype_req = dict(r.info())['content-type']
            except KeyError:
                try:
                    contenttype_req = dict(r.info())['Content-Type']
                except KeyError:
                    continue
            try:
                if 'pdf' in contenttype_req:
                    contenttype = 'pdf'
                elif 'djvu' in contenttype_req:
                    contenttype = 'djvu'
            except KeyError:
                pass

            if r.getcode() != 200 or contenttype is False:
                continue

            return dl, contenttype
        except ValueError:
            tools.warning("Invalid URL")
            return False, None
        except (URLError, socket.error):
            if proxy != "":
                proxy_txt = "using proxy "+proxy
            else:
                proxy_txt = "without using any proxy"
            tools.warning("Unable to get "+url+" "+proxy_txt+". It " +
                          "may not be available at the moment.")
            continue
    return False, None
Exemplo n.º 46
0
def resync():
    diff = backend.diffFilesIndex()

    if diff is False:
        return False

    for key in diff:
        entry = diff[key]
        if entry['file'] == '':
            print("\nFound entry in index without associated file: " +
                  entry['id'])
            print("Title:\t"+entry['title'])
            loop = True
            while confirm:
                filename = tools.rawInput("File to import for this entry " +
                                          "(leave empty to delete the " +
                                          "entry)? ")
                if filename == '':
                    break
                else:
                    if 'doi' in list(entry.keys()):
                        doi = fetcher.findArticleID(filename, only=["DOI"])
                        if doi is not False and doi != entry['doi']:
                            loop = tools.rawInput("Found DOI does not " +
                                                  "match bibtex entry " +
                                                  "DOI, continue anyway " +
                                                  "? [y/N]")
                            loop = (loop.lower() != 'y')
                    if 'Eprint' in list(entry.keys()):
                        arxiv = fetcher.findArticleID(filename, only=["arXiv"])
                        if arxiv is not False and arxiv != entry['Eprint']:
                            loop = tools.rawInput("Found arXiv id does " +
                                                  "not match bibtex " +
                                                  "entry arxiv id, " +
                                                  "continue anyway ? [y/N]")
                            loop = (loop.lower() != 'y')
                    if 'isbn' in list(entry.keys()):
                        isbn = fetcher.findISBN(filename)
                        if isbn is not False and isbn != entry['isbn']:
                            loop = tools.rawInput("Found ISBN does not " +
                                                  "match bibtex entry " +
                                                  "ISBN, continue anyway " +
                                                  "? [y/N]")
                            loop = (loop.lower() != 'y')
                    continue
            if filename == '':
                backend.deleteId(entry['id'])
                print("Deleted entry \""+entry['id']+"\".")
            else:
                new_name = backend.getNewName(filename, entry)
                try:
                    shutil.copy2(filename, new_name)
                    print("Imported new file "+filename+" for entry " +
                          entry['id']+".")
                except shutil.Error:
                    new_name = False
                    sys.exit("Unable to move file to library dir " +
                             config.get("folder")+".")
                backend.bibtexEdit(entry['id'], {'file': filename})
        else:
            print("Found file without any associated entry in index:")
            print(entry['file'])
            action = ''
            while action.lower() not in ['import', 'delete']:
                action = tools.rawInput("What to do? [import / delete] ")
                action = action.lower()
            if action == 'import':
                tmp = tempfile.NamedTemporaryFile()
                shutil.copy(entry['file'], tmp.name)
                filetype = tools.getExtension(entry['file'])
                try:
                    os.remove(entry['file'])
                except OSError:
                    tools.warning("Unable to delete file "+entry['file'])
                if not addFile(tmp.name, filetype):
                    tools.warning("Unable to reimport file "+entry['file'])
                tmp.close()
            else:
                backend.deleteFile(entry['file'])
                print(entry['file'] + " removed from disk and " +
                      "index.")
    # Check for empty tag dirs
    for i in os.listdir(config.get("folder")):
        if os.path.isdir(i) and not os.listdir(config.get("folder") + i):
            try:
                os.rmdir(config.get("folder") + i)
            except OSError:
                tools.warning("Found empty tag dir "+config.get("folder") + i +
                              " but could not delete it.")
Exemplo n.º 47
0
    parser_search.add_argument('query', metavar='entry', nargs='+',
                               help="your query, see README for more info.",
                               type=commandline_arg)
    parser_search.set_defaults(func='search')

    args = parser.parse_args()
    try:
        if args.func == 'download':
            skipped = []
            for url in args.url:
                new_name = downloadFile(url, args.type, args.manual, args.y,
                                        args.tag)
                if new_name is not False:
                    print(url+" successfully imported as "+new_name)
                else:
                    tools.warning("An error occurred while downloading "+url)
                    skipped.append(url)
            if len(skipped) > 0:
                print("\nSkipped files:")
                for i in skipped:
                    print(i)
            sys.exit()

        if args.func == 'import':
            skipped = []
            for filename in list(set(args.file) - set(args.skip)):
                new_name = addFile(filename, args.type, args.manual, args.y,
                                   args.tag)
                if new_name is not False:
                    print(filename+" successfully imported as " +
                          new_name+".")
Exemplo n.º 48
0
def findArticleID(src, only=["DOI", "arXiv"]):
    """Search for a valid article ID (DOI or ArXiv) in src.

    Returns a tuple (type, first matching ID) or False if not found
    or an error occurred.
    From : http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/
    and https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb
    """
    if src.endswith(".pdf"):
        totext = subprocess.Popen(["pdftotext", src, "-"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    elif src.endswith(".djvu"):
        totext = subprocess.Popen(["djvutxt", src],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
    else:
        return (False, False)

    extractfull = ''
    extract_type = False
    extractID = None
    while totext.poll() is None:
        extractfull += ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()])
        # Try to extract DOI
        if "DOI" in only:
            extractID = doi_re.search(extractfull.lower().replace('Œ', '-'))
            if not extractID:
                # PNAS fix
                extractID = doi_pnas_re.search(extractfull.
                                               lower().
                                               replace('pnas', '/pnas'))
                if not extractID:
                    # JSB fix
                    extractID = doi_jsb_re.search(extractfull.lower())
            if extractID:
                extract_type = "DOI"
                totext.terminate()
        # Try to extract arXiv
        if "arXiv" in only:
            tmp_extractID = arXiv_re.search(extractfull)
            if tmp_extractID:
                if not extractID or extractID.start(0) > tmp_extractID.start(1):
                    # Only use arXiv id if it is before the DOI in the pdf
                    extractID = tmp_extractID
                    extract_type = "arXiv"
                    totext.terminate()
        if extract_type is not False:
            break

    err = totext.communicate()[1]
    if totext.returncode > 0:
        # Error happened
        tools.warning(err)
        return (False, False)

    if extractID is not None and extract_type == "DOI":
        # If DOI extracted, clean it and return it
        cleanDOI = False
        cleanDOI = extractID.group(0).replace(':', '').replace(' ', '')
        if clean_doi_re.search(cleanDOI):
            cleanDOI = cleanDOI[1:]
        # FABSE J fix
        if clean_doi_fabse_re.search(cleanDOI):
            cleanDOI = cleanDOI[:20]
        # Second JCB fix
        if clean_doi_jcb_re.search(cleanDOI):
            cleanDOI = cleanDOI[:21]
        if len(cleanDOI) > 40:
            cleanDOItemp = clean_doi_len_re.sub('000', cleanDOI)
            reps = {'.': 'A', '-': '0'}
            cleanDOItemp = tools.replaceAll(cleanDOItemp[8:], reps)
            digitStart = 0
            for i in range(len(cleanDOItemp)):
                if cleanDOItemp[i].isdigit():
                    digitStart = 1
                    if cleanDOItemp[i].isalpha() and digitStart:
                        break
            cleanDOI = cleanDOI[0:(8+i)]
        return ("DOI", cleanDOI)
    elif extractID is not None and extract_type == "arXiv":
        # If arXiv id is extracted, return it
        return ("arXiv", extractID.group(1))
    return (False, False)
Exemplo n.º 49
0
def resync():
    diff = backend.diffFilesIndex()

    if diff is False:
        return False

    for key in diff:
        entry = diff[key]
        if entry['file'] == '':
            print("\nFound entry in index without associated file: " +
                  entry['id'])
            print("Title:\t" + entry['title'])
            loop = True
            while confirm:
                filename = tools.rawInput("File to import for this entry " +
                                          "(leave empty to delete the " +
                                          "entry)? ")
                if filename == '':
                    break
                else:
                    if 'doi' in list(entry.keys()):
                        doi = fetcher.findArticleID(filename, only=["DOI"])
                        if doi is not False and doi != entry['doi']:
                            loop = tools.rawInput("Found DOI does not " +
                                                  "match bibtex entry " +
                                                  "DOI, continue anyway " +
                                                  "? [y/N]")
                            loop = (loop.lower() != 'y')
                    if 'Eprint' in list(entry.keys()):
                        arxiv = fetcher.findArticleID(filename, only=["arXiv"])
                        if arxiv is not False and arxiv != entry['Eprint']:
                            loop = tools.rawInput("Found arXiv id does " +
                                                  "not match bibtex " +
                                                  "entry arxiv id, " +
                                                  "continue anyway ? [y/N]")
                            loop = (loop.lower() != 'y')
                    if 'isbn' in list(entry.keys()):
                        isbn = fetcher.findISBN(filename)
                        if isbn is not False and isbn != entry['isbn']:
                            loop = tools.rawInput("Found ISBN does not " +
                                                  "match bibtex entry " +
                                                  "ISBN, continue anyway " +
                                                  "? [y/N]")
                            loop = (loop.lower() != 'y')
                    continue
            if filename == '':
                backend.deleteId(entry['id'])
                print("Deleted entry \"" + entry['id'] + "\".")
            else:
                new_name = backend.getNewName(filename, entry)
                try:
                    shutil.copy2(filename, new_name)
                    print("Imported new file " + filename + " for entry " +
                          entry['id'] + ".")
                except shutil.Error:
                    new_name = False
                    sys.exit("Unable to move file to library dir " +
                             config.get("folder") + ".")
                backend.bibtexEdit(entry['id'], {'file': filename})
        else:
            print("Found file without any associated entry in index:")
            print(entry['file'])
            action = ''
            while action.lower() not in ['import', 'delete']:
                action = tools.rawInput("What to do? [import / delete] ")
                action = action.lower()
            if action == 'import':
                tmp = tempfile.NamedTemporaryFile()
                shutil.copy(entry['file'], tmp.name)
                filetype = tools.getExtension(entry['file'])
                try:
                    os.remove(entry['file'])
                except OSError:
                    tools.warning("Unable to delete file " + entry['file'])
                if not addFile(tmp.name, filetype):
                    tools.warning("Unable to reimport file " + entry['file'])
                tmp.close()
            else:
                backend.deleteFile(entry['file'])
                print(entry['file'] + " removed from disk and " + "index.")
    # Check for empty tag dirs
    for i in os.listdir(config.get("folder")):
        if os.path.isdir(i) and not os.listdir(config.get("folder") + i):
            try:
                os.rmdir(config.get("folder") + i)
            except OSError:
                tools.warning("Found empty tag dir " + config.get("folder") +
                              i + " but could not delete it.")
Exemplo n.º 50
0
def download(url):
    """Download url tofile

    Check that it is a valid pdf or djvu file. Tries all the
    available proxies sequentially. Returns the raw content of the file, or
    false if it could not be downloaded.
    """
    for proxy in config.get("proxies"):
        if proxy.startswith('socks'):
            if proxy[5] == '4':
                proxy_type = socks.SOCKS4
            else:
                proxy_type = socks.SOCKS5
            proxy = proxy[proxy.find('://')+3:]
            try:
                proxy, port = proxy.split(':')
            except ValueError:
                port = None
            socks.set_default_proxy(proxy_type, proxy, port)
            socket.socket = socks.socksocket
        elif proxy == '':
            socket.socket = default_socket
        else:
            try:
                proxy, port = proxy.split(':')
            except ValueError:
                port = None
            socks.set_default_proxy(socks.HTTP, proxy, port)
            socket.socket = socks.socksocket
        try:
            r = urlopen(url)
            try:
                size = int(dict(r.info())['content-length'].strip())
            except KeyError:
                try:
                    size = int(dict(r.info())['Content-Length'].strip())
                except KeyError:
                    size = 0
            dl = b""
            dl_size = 0
            while True:
                buf = r.read(1024)
                if buf:
                    dl += buf
                    dl_size += len(buf)
                    if size != 0:
                        done = int(50 * dl_size / size)
                        sys.stdout.write("\r[%s%s]" % ('='*done, ' '*(50-done)))
                        sys.stdout.write(" "+str(int(float(done)/52*100))+"%")
                        sys.stdout.flush()
                else:
                    break
            contenttype = False
            contenttype_req = None
            try:
                contenttype_req = dict(r.info())['content-type']
            except KeyError:
                try:
                    contenttype_req = dict(r.info())['Content-Type']
                except KeyError:
                    continue
            try:
                if 'pdf' in contenttype_req:
                    contenttype = 'pdf'
                elif 'djvu' in contenttype_req:
                    contenttype = 'djvu'
            except KeyError:
                pass

            if r.getcode() != 200 or contenttype is False:
                continue

            return dl, contenttype
        except ValueError:
            tools.warning("Invalid URL")
            return False, None
        except (URLError, socket.error):
            if proxy != "":
                proxy_txt = "using proxy "+proxy
            else:
                proxy_txt = "without using any proxy"
            tools.warning("Unable to get "+url+" "+proxy_txt+". It " +
                          "may not be available at the moment.")
            continue
    return False, None
Exemplo n.º 51
0
def addFile(src, filetype, manual, autoconfirm, tag, rename=True):
    """
    Add a file to the library
    """
    doi = False
    arxiv = False
    isbn = False

    if not manual:
        try:
            if filetype == 'article' or filetype is None:
                id_type, article_id = fetcher.findArticleID(src)
                if id_type == "DOI":
                    doi = article_id
                elif id_type == "arXiv":
                    arxiv = article_id

            if filetype == 'book' or (doi is False and arxiv is False
                                      and filetype is None):
                isbn = fetcher.findISBN(src)
        except KeyboardInterrupt:
            doi = False
            arxiv = False
            isbn = False

    if doi is False and isbn is False and arxiv is False:
        if filetype is None:
            tools.warning("Could not determine the DOI nor the arXiv id nor " +
                          "the ISBN for " + src +
                          ". Switching to manual entry.")
            doi_arxiv_isbn = ''
            while (doi_arxiv_isbn
                   not in ['doi', 'arxiv', 'isbn', 'manual', 'skip']):
                doi_arxiv_isbn = (
                    tools.rawInput("DOI / arXiv " +
                                   "/ ISBN / manual / skip? ").lower())
            if doi_arxiv_isbn == 'doi':
                doi = tools.rawInput('DOI? ')
            elif doi_arxiv_isbn == 'arxiv':
                arxiv = tools.rawInput('arXiv id? ')
            elif doi_arxiv_isbn == 'isbn':
                isbn = tools.rawInput('ISBN? ')
            elif doi_arxiv_isbn == 'skip':
                return False
        elif filetype == 'article':
            tools.warning("Could not determine the DOI nor the arXiv id for " +
                          src + ", switching to manual entry.")
            doi_arxiv = ''
            while doi_arxiv not in ['doi', 'arxiv', 'manual', 'skip']:
                doi_arxiv = (
                    tools.rawInput("DOI / arXiv / manual / skip? ").lower())
            if doi_arxiv == 'doi':
                doi = tools.rawInput('DOI? ')
            elif doi_arxiv == 'arxiv':
                arxiv = tools.rawInput('arXiv id? ')
            elif doi_arxiv == 'skip':
                return False
        elif filetype == 'book':
            isbn_manual = ''
            while isbn_manual not in ['isbn', 'manual', 'skip']:
                isbn_manual = tools.rawInput("ISBN / manual / skip? ").lower()
            if isbn_manual == 'isbn':
                isbn = (tools.rawInput('ISBN? ').replace(' ',
                                                         '').replace('-', ''))
            elif isbn_manual == 'skip':
                return False
    elif doi is not False:
        print("DOI for " + src + " is " + doi + ".")
    elif arxiv is not False:
        print("ArXiv id for " + src + " is " + arxiv + ".")
    elif isbn is not False:
        print("ISBN for " + src + " is " + isbn + ".")

    if doi is not False and doi != '':
        # Add extra \n for bibtexparser
        bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n") + "\n"
    elif arxiv is not False and arxiv != '':
        bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n") + "\n"
    elif isbn is not False and isbn != '':
        # Idem
        bibtex = fetcher.isbn2Bib(isbn).strip() + "\n"
    else:
        bibtex = ''

    bibtex = bibtexparser.loads(bibtex)
    bibtex = bibtex.entries_dict
    if len(bibtex) > 0:
        bibtex_name = list(bibtex.keys())[0]
        bibtex = bibtex[bibtex_name]
        bibtex_string = tools.parsed2Bibtex(bibtex)
    else:
        bibtex_string = ''

    if not autoconfirm:
        bibtex = checkBibtex(src, bibtex_string)

    if not autoconfirm:
        tag = tools.rawInput("Tag for this paper (leave empty for default) ? ")
    else:
        tag = args.tag
    bibtex['tag'] = tag

    if rename:
        new_name = backend.getNewName(src, bibtex, tag)

        while os.path.exists(new_name):
            tools.warning("file " + new_name + " already exists.")
            default_rename = new_name.replace(
                tools.getExtension(new_name),
                " (2)" + tools.getExtension(new_name))
            rename = tools.rawInput("New name [" + default_rename + "]? ")
            if rename == '':
                new_name = default_rename
            else:
                new_name = rename
        try:
            shutil.copy2(src, new_name)
        except shutil.Error:
            new_name = False
            sys.exit("Unable to move file to library dir " +
                     config.get("folder") + ".")
    else:
        new_name = src
    bibtex['file'] = os.path.abspath(new_name)

    # Remove first page of IOP papers
    try:
        if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article':
            tearpages.tearpage(new_name)
    except (KeyError, shutil.Error, IOError):
        pass

    backend.bibtexAppend(bibtex)
    return new_name
Exemplo n.º 52
0
def checkBibtex(filename, bibtex_string):
    print("The bibtex entry found for "+filename+" is:")

    bibtex = BibTexParser(bibtex_string)
    bibtex = bibtex.get_entry_dict()
    try:
        bibtex = bibtex[list(bibtex.keys())[0]]
        # Check entries are correct
        assert bibtex['title']
        if bibtex['type'] == 'article':
            assert bibtex['authors']
        elif bibtex['type'] == 'book':
            assert bibtex['author']
        assert bibtex['year']
        # Print the bibtex and confirm
        print(tools.parsed2Bibtex(bibtex))
        check = tools.rawInput("Is it correct? [Y/n] ")
    except KeyboardInterrupt:
        sys.exit()
    except (IndexError, KeyError, AssertionError):
        check = 'n'

    try:
        old_filename = bibtex['file']
    except KeyError:
        old_filename = False

    while check.lower() == 'n':
        with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile:
            tmpfile.write(bibtex_string.encode('utf-8'))
            tmpfile.flush()
            subprocess.call([EDITOR, tmpfile.name])
            tmpfile.seek(0)
            bibtex = BibTexParser(tmpfile.read().decode('utf-8')+"\n")

        bibtex = bibtex.get_entry_dict()
        try:
            bibtex = bibtex[list(bibtex.keys())[0]]
        except (IndexError, KeyError):
            tools.warning("Invalid bibtex entry")
            bibtex_string = ''
            tools.rawInput("Press Enter to go back to editor.")
            continue
        if('authors' not in bibtex and 'title' not in bibtex and 'year' not in
           bibtex):
            tools.warning("Invalid bibtex entry")
            bibtex_string = ''
            tools.rawInput("Press Enter to go back to editor.")
            continue

        if old_filename is not False and 'file' not in bibtex:
            tools.warning("Invalid bibtex entry. No filename given.")
            tools.rawInput("Press Enter to go back to editor.")
            check = 'n'
        else:
            bibtex_string = tools.parsed2Bibtex(bibtex)
            print("\nThe bibtex entry for "+filename+" is:")
            print(bibtex_string)
            check = tools.rawInput("Is it correct? [Y/n] ")
    if old_filename is not False and old_filename != bibtex['file']:
        try:
            print("Moving file to new location…")
            shutil.move(old_filename, bibtex['file'])
        except shutil.Error:
            tools.warning("Unable to move file "+old_filename+" to " +
                          bibtex['file']+". You should check it manually.")

    return bibtex