def doi2Bib(doi): """Returns a bibTeX string of metadata for a given DOI. From : https://gist.github.com/jrsmith3/5513926 """ url = "http://dx.doi.org/" + doi headers = {"accept": "application/x-bibtex"} req = Request(url, headers=headers) try: r = urlopen(req) try: if dict(r.info())['content-type'] == 'application/x-bibtex': return r.read().decode('utf-8') else: return '' except KeyError: try: if dict(r.info())['Content-Type'] == 'application/x-bibtex': return r.read().decode('utf-8') else: return '' except KeyError: return '' except URLError: tools.warning('Unable to contact remote server to get the bibtex ' + 'entry for doi '+doi) return ''
def diffFilesIndex(): """Compute differences between Bibtex index and PDF files Returns a dict with bibtex entry: * full bibtex entry with file='' if file is not found * only file entry if file with missing bibtex entry """ files = tools.listDir(config.get("folder")) files = [i for i in files if tools.getExtension(i) in ['.pdf', '.djvu']] try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: index = bibtexparser.load(fh) index_diff = index.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False for key in index_diff.keys(): if index_diff[key]['file'] not in files: index_diff[key]['file'] = '' else: files.remove(index_diff[key]['file']) for filename in files: index_diff[filename] = {'file': filename} return index.entries_dict
def getBibtex(entry, file_id='both', clean=False): """Returns the bibtex entry corresponding to entry, as a dict entry is either a filename or a bibtex ident file_id is file or id or both to search for a file / id / both clean is to clean the ignored fields specified in config """ try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False bibtex_entry = False if file_id == 'both' or file_id == 'id': try: bibtex_entry = bibtex[entry] except KeyError: pass if file_id == 'both' or file_id == 'file': if os.path.isfile(entry): for key in bibtex.keys(): if os.path.samefile(bibtex[key]['file'], entry): bibtex_entry = bibtex[key] break if clean: for field in config.get("ignore_fields"): try: del(bibtex_entry[field]) except KeyError: pass return bibtex_entry
def save(self): try: with open(self.config_path + "bmc.json", 'w') as fh: fh.write(json.dumps(self.config)) except IOError: tools.warning("Could not write config file.") sys.exit(1)
def findHALId(src): """Searches for a valid HAL id in src Returns a tuple of the HAL id and the version or False if not found or an error occurred. """ if src.endswith(".pdf"): totext = subprocess.Popen(["pdftotext", src, "-"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif src.endswith(".djvu"): totext = subprocess.Popen(["djvutxt", src], stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: return False while totext.poll() is None: extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()]) extractID = HAL_re.search(extractfull) if extractID: totext.terminate() break err = totext.communicate()[1] if totext.returncode > 0: # Error happened tools.warning(err) return False else: return extractID.group(1), extractID.group(2)
def getBibtex(entry, file_id='both', clean=False): """Returns the bibtex entry corresponding to entry, as a dict entry is either a filename or a bibtex ident file_id is file or id or both to search for a file / id / both clean is to clean the ignored fields specified in config """ try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False bibtex_entry = False if file_id == 'both' or file_id == 'id': try: bibtex_entry = bibtex[entry] except KeyError: pass if file_id == 'both' or file_id == 'file': if os.path.isfile(entry): for key in bibtex.keys(): if os.path.samefile(bibtex[key]['file'], entry): bibtex_entry = bibtex[key] break if clean: for field in config.get("ignore_fields"): try: del (bibtex_entry[field]) except KeyError: pass return bibtex_entry
def doi2Bib(doi): """Returns a bibTeX string of metadata for a given DOI. From : https://gist.github.com/jrsmith3/5513926 """ url = "http://dx.doi.org/" + doi headers = {"accept": "application/x-bibtex"} req = Request(url, headers=headers) try: r = urlopen(req) try: if dict(r.info())['content-type'] == 'application/x-bibtex': return r.read().decode('utf-8') else: return '' except KeyError: try: if dict(r.info())['Content-Type'] == 'application/x-bibtex': return r.read().decode('utf-8') else: return '' except KeyError: return '' except: tools.warning('Unable to contact remote server to get the bibtex ' + 'entry for doi '+doi) return ''
def getNewName(src, bibtex, tag='', override_format=None): """ Return the formatted name according to config for the given bibtex entry """ authors = re.split(' and ', bibtex['author']) if bibtex['type'] == 'article': if override_format is None: new_name = config.get("format_articles") else: new_name = override_format try: new_name = new_name.replace("%j", bibtex['journal']) except KeyError: pass elif bibtex['type'] == 'book': if override_format is None: new_name = config.get("format_books") else: new_name = override_format new_name = new_name.replace("%t", bibtex['title']) try: new_name = new_name.replace("%Y", bibtex['year']) except KeyError: pass new_name = new_name.replace("%f", authors[0].split(',')[0].strip()) new_name = new_name.replace("%l", authors[-1].split(',')[0].strip()) new_name = new_name.replace("%a", ', '.join([i.split(',')[0].strip() for i in authors])) if('archiveprefix' in bibtex and 'arXiv' in bibtex['archiveprefix']): new_name = new_name.replace("%v", '-' + bibtex['eprint'][bibtex['eprint']. rfind('v'):]) else: new_name = new_name.replace("%v", '') for custom in config.get("format_custom"): new_name = custom(new_name) if tag == '': new_name = (config.get("folder") + tools.slugify(new_name) + tools.getExtension(src)) else: if not os.path.isdir(config.get("folder") + tag): try: os.mkdir(config.get("folder") + tag) except OSError: tools.warning("Unable to create tag dir " + config.get("folder")+tag+".") new_name = (config.get("folder") + tools.slugify(tag) + '/' + tools.slugify(new_name) + tools.getExtension(src)) return new_name
def editEntry(entry, file_id='both'): bibtex = backend.getBibtex(entry, file_id) if bibtex is False: tools.warning("Entry "+entry+" does not exist.") return False if file_id == 'file': filename = entry else: filename = bibtex['file'] new_bibtex = checkBibtex(filename, tools.parsed2Bibtex(bibtex)) # Tag update if new_bibtex['tag'] != bibtex['tag']: print("Editing tag, moving file.") new_name = backend.getNewName(new_bibtex['file'], new_bibtex, new_bibtex['tag']) while os.path.exists(new_name): tools.warning("file "+new_name+" already exists.") default_rename = new_name.replace(tools.getExtension(new_name), " (2)" + tools.getExtension(new_name)) rename = tools.rawInput("New name ["+default_rename+"]? ") if rename == '': new_name = default_rename else: new_name = rename new_bibtex['file'] = new_name try: shutil.move(bibtex['file'], new_bibtex['file']) except shutil.Error: tools.warning('Unable to move file '+bibtex['file']+' to ' + new_bibtex['file'] + ' according to tag edit.') try: if not os.listdir(os.path.dirname(bibtex['file'])): os.rmdir(os.path.dirname(bibtex['file'])) except OSError: tools.warning("Unable to delete empty tag dir " + os.path.dirname(bibtex['file'])) try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: index = BibTexParser(fh.read()) index = index.get_entry_dict() except (TypeError, IOError): tools.warning("Unable to open index file.") return False index[new_bibtex['id']] = new_bibtex backend.bibtexRewrite(index) return True
def save(self): try: with open(self.config_path + "bmc.json", 'w') as fh: fh.write(json.dumps(self.config, sort_keys=True, indent=4, separators=(',', ': '))) except IOError: tools.warning("Could not write config file.") sys.exit(1)
def getNewName(src, bibtex, tag='', override_format=None): """ Return the formatted name according to config for the given bibtex entry """ authors = re.split(' and ', bibtex['author']) if bibtex['type'] == 'article': if override_format is None: new_name = config.get("format_articles") else: new_name = override_format try: new_name = new_name.replace("%j", bibtex['journal']) except KeyError: pass elif bibtex['type'] == 'book': if override_format is None: new_name = config.get("format_books") else: new_name = override_format new_name = new_name.replace("%t", bibtex['title']) try: new_name = new_name.replace("%Y", bibtex['year']) except KeyError: pass new_name = new_name.replace("%f", authors[0].split(',')[0].strip()) new_name = new_name.replace("%l", authors[-1].split(',')[0].strip()) new_name = new_name.replace( "%a", ', '.join([i.split(',')[0].strip() for i in authors])) if ('archiveprefix' in bibtex and 'arXiv' in bibtex['archiveprefix']): new_name = new_name.replace( "%v", '-' + bibtex['eprint'][bibtex['eprint'].rfind('v'):]) else: new_name = new_name.replace("%v", '') for custom in config.get("format_custom"): new_name = custom(new_name) if tag == '': new_name = (config.get("folder") + tools.slugify(new_name) + tools.getExtension(src)) else: if not os.path.isdir(config.get("folder") + tag): try: os.mkdir(config.get("folder") + tag) except OSError: tools.warning("Unable to create tag dir " + config.get("folder") + tag + ".") new_name = (config.get("folder") + tools.slugify(tag) + '/' + tools.slugify(new_name) + tools.getExtension(src)) return new_name
def editEntry(entry, file_id='both'): bibtex = backend.getBibtex(entry, file_id) if bibtex is False: tools.warning("Entry " + entry + " does not exist.") return False if file_id == 'file': filename = entry else: filename = bibtex['file'] new_bibtex = checkBibtex(filename, tools.parsed2Bibtex(bibtex)) # Tag update if new_bibtex['tag'] != bibtex['tag']: print("Editing tag, moving file.") new_name = backend.getNewName(new_bibtex['file'], new_bibtex, new_bibtex['tag']) while os.path.exists(new_name): tools.warning("file " + new_name + " already exists.") default_rename = new_name.replace( tools.getExtension(new_name), " (2)" + tools.getExtension(new_name)) rename = tools.rawInput("New name [" + default_rename + "]? ") if rename == '': new_name = default_rename else: new_name = rename new_bibtex['file'] = new_name try: shutil.move(bibtex['file'], new_bibtex['file']) except shutil.Error: tools.warning('Unable to move file ' + bibtex['file'] + ' to ' + new_bibtex['file'] + ' according to tag edit.') try: if not os.listdir(os.path.dirname(bibtex['file'])): os.rmdir(os.path.dirname(bibtex['file'])) except OSError: tools.warning("Unable to delete empty tag dir " + os.path.dirname(bibtex['file'])) try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: index = bibtexparser.load(fh) index = index.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False index[new_bibtex['id']] = new_bibtex backend.bibtexRewrite(index) return True
def save(self): try: with open(self.config_path + "bmc.json", 'w') as fh: fh.write( json.dumps(self.config, sort_keys=True, indent=4, separators=(',', ': '))) except IOError: tools.warning("Could not write config file.") sys.exit(1)
def getEntries(): """Returns the list of all entries in the bibtex index""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = BibTexParser(fh.read()) bibtex = bibtex.get_entry_dict() except (TypeError, IOError): tools.warning("Unable to open index file.") return False return list(bibtex.keys())
def bibtexAppend(data): """Append data to the main bibtex file data is a dict for one entry in bibtex, as the one from bibtexparser output """ try: with open(config.get("folder")+'index.bib', 'a', encoding='utf-8') \ as fh: fh.write(tools.parsed2Bibtex(data) + "\n") except IOError as e: raise e tools.warning("Unable to open index file.") return False
def bibtexAppend(data): """Append data to the main bibtex file data is a dict for one entry in bibtex, as the one from bibtexparser output """ try: with open(config.get("folder")+'index.bib', 'a', encoding='utf-8') \ as fh: fh.write(tools.parsed2Bibtex(data)+"\n") except IOError as e: raise e tools.warning("Unable to open index file.") return False
def deleteId(ident, keep=False): """Delete a file based on its id in the bibtex file""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (IOError, TypeError): tools.warning("Unable to open index file.") return False if ident not in bibtex.keys(): return False if not keep: try: os.remove(bibtex[ident]['file']) except (KeyError, OSError): tools.warning("Unable to delete file associated to id " + ident + " : " + bibtex[ident]['file']) try: if not os.listdir(os.path.dirname(bibtex[ident]['file'])): os.rmdir(os.path.dirname(bibtex[ident]['file'])) except (KeyError, OSError): tools.warning("Unable to delete empty tag dir " + os.path.dirname(bibtex[ident]['file'])) try: del(bibtex[ident]) bibtexRewrite(bibtex) except KeyError: tools.warning("No associated bibtex entry in index for file " + bibtex[ident]['file']) return True
def deleteId(ident, keep=False): """Delete a file based on its id in the bibtex file""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (IOError, TypeError): tools.warning("Unable to open index file.") return False if ident not in bibtex.keys(): return False if not keep: try: os.remove(bibtex[ident]['file']) except (KeyError, OSError): tools.warning("Unable to delete file associated to id " + ident + " : " + bibtex[ident]['file']) try: if not os.listdir(os.path.dirname(bibtex[ident]['file'])): os.rmdir(os.path.dirname(bibtex[ident]['file'])) except (KeyError, OSError): tools.warning("Unable to delete empty tag dir " + os.path.dirname(bibtex[ident]['file'])) try: del (bibtex[ident]) bibtexRewrite(bibtex) except KeyError: tools.warning("No associated bibtex entry in index for file " + bibtex[ident]['file']) return True
def bibtexRewrite(data): """Rewrite the bibtex index file. data is a dict of bibtex entry dict. """ bibtex = '' for entry in data.keys(): bibtex += tools.parsed2Bibtex(data[entry]) + "\n" try: with open(config.get("folder")+'index.bib', 'w', encoding='utf-8') \ as fh: fh.write(bibtex) except (IOError, TypeError): tools.warning("Unable to open index file.") return False
def openFile(ident): try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False if ident not in list(bibtex.keys()): return False else: subprocess.Popen(['xdg-open', bibtex[ident]['file']]) return True
def bibtexEdit(ident, modifs): """Update ident key in bibtex file, modifications are in modifs dict""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (IOError, TypeError): tools.warning("Unable to open index file.") return False for key in modifs.keys(): bibtex[ident][key] = modifs[key] bibtexRewrite(bibtex)
def bibtexRewrite(data): """Rewrite the bibtex index file. data is a dict of bibtex entry dict. """ bibtex = '' for entry in data.keys(): bibtex += tools.parsed2Bibtex(data[entry])+"\n" try: with open(config.get("folder")+'index.bib', 'w', encoding='utf-8') \ as fh: fh.write(bibtex) except (IOError, TypeError): tools.warning("Unable to open index file.") return False
def getEntries(full=False): """Returns the list of all entries in the bibtex index""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False if full: return bibtex else: return list(bibtex.keys())
def openFile(ident): try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = BibTexParser(fh.read()) bibtex = bibtex.get_entry_dict() except (TypeError, IOError): tools.warning("Unable to open index file.") return False if ident not in list(bibtex.keys()): return False else: subprocess.Popen(['xdg-open', bibtex[ident]['file']]) return True
def load_masks(self): if os.path.isfile(self.config_path + "masks.py"): try: self.info = imp.find_module("masks", [self.config_path]) self.masks = imp.load_module("masks", *self.info) for mask in inspect.getmembers(self.masks, inspect.isfunction): self.config["format_custom"].append(mask[1]) except ImportError: self.clean() tools.warning("Unable to import masks config file.") pass finally: try: self.info[0].close() except AttributeError: pass
def downloadFile(url, filetype, manual, autoconfirm, tag): print('Downloading ' + url) dl, contenttype = fetcher.download(url) if dl is not False: print('Download finished') tmp = tempfile.NamedTemporaryFile(suffix='.' + contenttype) with open(tmp.name, 'wb+') as fh: fh.write(dl) new_name = addFile(tmp.name, filetype, manual, autoconfirm, tag) if new_name is False: return False tmp.close() return new_name else: tools.warning("Could not fetch " + url) return False
def downloadFile(url, filetype, manual, autoconfirm, tag): print('Downloading '+url) dl, contenttype = fetcher.download(url) if dl is not False: print('Download finished') tmp = tempfile.NamedTemporaryFile(suffix='.'+contenttype) with open(tmp.name, 'wb+') as fh: fh.write(dl) new_name = addFile(tmp.name, filetype, manual, autoconfirm, tag) if new_name is False: return False tmp.close() return new_name else: tools.warning("Could not fetch "+url) return False
def load(self): try: folder_exists = make_sure_path_exists(self.config_path) if folder_exists and os.path.isfile(self.config_path + "bmc.json"): initialized = True else: initialized = False except OSError: tools.warning("Unable to create ~/.config folder.") sys.exit(1) if not initialized: self.initialize() else: try: with open(self.config_path + "bmc.json", 'r') as fh: self.config = json.load(fh) except (ValueError, IOError): tools.warning("Config file could not be read.") sys.exit(1) self.load_masks()
def update(entry): update = backend.updateArXiv(entry) if update is not False: print("New version found for " + entry) print("\t Title: " + update['title']) confirm = tools.rawInput("Download it ? [Y/n] ") if confirm.lower() == 'n': return new_name = downloadFile('http://arxiv.org/pdf/' + update['eprint'], 'article', False) if new_name is not False: print(update['eprint'] + " successfully imported as " + new_name) else: tools.warning("An error occurred while downloading " + url) confirm = tools.rawInput("Delete previous version ? [y/N] ") if confirm.lower() == 'y': if not backend.deleteId(entry): if not backend.deleteFile(entry): tools.warning("Unable to remove previous version.") return print("Previous version successfully deleted.")
def update(entry): update = backend.updateArXiv(entry) if update is not False: print("New version found for "+entry) print("\t Title: "+update['title']) confirm = tools.rawInput("Download it ? [Y/n] ") if confirm.lower() == 'n': return new_name = downloadFile('http://arxiv.org/pdf/'+update['eprint'], 'article', False) if new_name is not False: print(update['eprint']+" successfully imported as "+new_name) else: tools.warning("An error occurred while downloading "+url) confirm = tools.rawInput("Delete previous version ? [y/N] ") if confirm.lower() == 'y': if not backend.deleteId(entry): if not backend.deleteFile(entry): tools.warning("Unable to remove previous version.") return print("Previous version successfully deleted.")
def load(self): try: folder_exists = make_sure_path_exists(self.config_path) if folder_exists and os.path.isfile(self.config_path + "bmc.json"): initialized = True else: initialized = False except OSError: tools.warning("Unable to create ~/.config folder.") sys.exit(1) if not initialized: self.initialize() else: try: with open(self.config_path + "bmc.json", 'r') as fh: self.config = json.load(fh) except (ValueError, IOError): tools.warning("Config file could not be read.") sys.exit(1) try: folder_exists = make_sure_path_exists(self.get("folder")) except OSError: tools.warning("Unable to create paper storage folder.") sys.exit(1) self.load_masks()
def findISBN(src): """Search for a valid ISBN in src. Returns the ISBN or false if not found or an error occurred.""" if src.endswith(".pdf"): totext = subprocess.Popen(["pdftotext", src, "-"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) elif src.endswith(".djvu"): totext = subprocess.Popen(["djvutxt", src], stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) else: return False while totext.poll() is None: extractfull = ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()]) extractISBN = isbn_re.search(extractfull.lower().replace('Œ', '-')) if extractISBN: totext.terminate() break err = totext.communicate()[1] if totext.returncode > 0: # Error happened tools.warning(err) return False cleanISBN = False # Clean ISBN is the ISBN number without separators if extractISBN: cleanISBN = extractISBN.group(1).replace('-', '').replace(' ', '') return cleanISBN
def deleteFile(filename, keep=False): """Delete a file based on its filename""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False found = False for key in list(bibtex.keys()): try: if os.path.samefile(bibtex[key]['file'], filename): found = True if not keep: try: os.remove(bibtex[key]['file']) except (KeyError, OSError): tools.warning("Unable to delete file associated " + "to id " + key + " : " + bibtex[key]['file']) try: if not os.listdir(os.path.dirname(filename)): os.rmdir(os.path.dirname(filename)) except OSError: tools.warning("Unable to delete empty tag dir " + os.path.dirname(filename)) try: del (bibtex[key]) except KeyError: tools.warning("No associated bibtex entry in index for " + "file " + bibtex[key]['file']) except (KeyError, OSError): pass if found: bibtexRewrite(bibtex) elif os.path.isfile(filename): os.remove(filename) return found
def deleteFile(filename, keep=False): """Delete a file based on its filename""" try: with open(config.get("folder")+'index.bib', 'r', encoding='utf-8') \ as fh: bibtex = bibtexparser.load(fh) bibtex = bibtex.entries_dict except (TypeError, IOError): tools.warning("Unable to open index file.") return False found = False for key in list(bibtex.keys()): try: if os.path.samefile(bibtex[key]['file'], filename): found = True if not keep: try: os.remove(bibtex[key]['file']) except (KeyError, OSError): tools.warning("Unable to delete file associated " + "to id " + key+" : "+bibtex[key]['file']) try: if not os.listdir(os.path.dirname(filename)): os.rmdir(os.path.dirname(filename)) except OSError: tools.warning("Unable to delete empty tag dir " + os.path.dirname(filename)) try: del(bibtex[key]) except KeyError: tools.warning("No associated bibtex entry in index for " + "file " + bibtex[key]['file']) except (KeyError, OSError): pass if found: bibtexRewrite(bibtex) elif os.path.isfile(filename): os.remove(filename) return found
def checkBibtex(filename, bibtex_string): print("The bibtex entry found for " + filename + " is:") bibtex = bibtexparser.loads(bibtex_string) bibtex = bibtex.entries_dict try: bibtex = bibtex[list(bibtex.keys())[0]] # Check entries are correct if "title" not in bibtex: raise AssertionError if "authors" not in bibtex and "author" not in bibtex: raise AssertionError if "year" not in bibtex: raise AssertionError # Print the bibtex and confirm print(tools.parsed2Bibtex(bibtex)) check = tools.rawInput("Is it correct? [Y/n] ") except KeyboardInterrupt: sys.exit() except (IndexError, KeyError, AssertionError): print("Missing author, year or title in bibtex.") check = 'n' try: old_filename = bibtex['file'] except KeyError: old_filename = False while check.lower() == 'n': with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile: tmpfile.write(bibtex_string.encode('utf-8')) tmpfile.flush() subprocess.call([EDITOR, tmpfile.name]) tmpfile.seek(0) bibtex = bibtexparser.loads(tmpfile.read().decode('utf-8') + "\n") bibtex = bibtex.entries_dict try: bibtex = bibtex[list(bibtex.keys())[0]] except (IndexError, KeyError): tools.warning("Invalid bibtex entry") bibtex_string = '' tools.rawInput("Press Enter to go back to editor.") continue if ('authors' not in bibtex and 'title' not in bibtex and 'year' not in bibtex): tools.warning("Invalid bibtex entry") bibtex_string = '' tools.rawInput("Press Enter to go back to editor.") continue if old_filename is not False and 'file' not in bibtex: tools.warning("Invalid bibtex entry. No filename given.") tools.rawInput("Press Enter to go back to editor.") check = 'n' else: bibtex_string = tools.parsed2Bibtex(bibtex) print("\nThe bibtex entry for " + filename + " is:") print(bibtex_string) check = tools.rawInput("Is it correct? [Y/n] ") if old_filename is not False and old_filename != bibtex['file']: try: print("Moving file to new location…") shutil.move(old_filename, bibtex['file']) except shutil.Error: tools.warning("Unable to move file " + old_filename + " to " + bibtex['file'] + ". You should check it manually.") return bibtex
nargs='+', help="your query, see README for more info.", type=commandline_arg) parser_search.set_defaults(func='search') args = parser.parse_args() try: if args.func == 'download': skipped = [] for url in args.url: new_name = downloadFile(url, args.type, args.manual, args.y, args.tag) if new_name is not False: print(url + " successfully imported as " + new_name) else: tools.warning("An error occurred while downloading " + url) skipped.append(url) if len(skipped) > 0: print("\nSkipped files:") for i in skipped: print(i) sys.exit() if args.func == 'import': skipped = [] for filename in list(set(args.file) - set(args.skip)): new_name = addFile(filename, args.type, args.manual, args.y, args.tag, not args.inplace) if new_name is not False: print(filename + " successfully imported as " + new_name + ".")
def findArticleID(src, only=["DOI", "arXiv"]): """Search for a valid article ID (DOI or ArXiv) in src. Returns a tuple (type, first matching ID) or False if not found or an error occurred. From : http://en.dogeno.us/2010/02/release-a-python-script-for-organizing-scientific-papers-pyrenamepdf-py/ and https://github.com/minad/bibsync/blob/3fdf121016f6187a2fffc66a73cd33b45a20e55d/lib/bibsync/utils.rb """ if src.endswith(".pdf"): totext = subprocess.Popen(["pdftotext", src, "-"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif src.endswith(".djvu"): totext = subprocess.Popen(["djvutxt", src], stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: return (False, False) extractfull = '' extract_type = False extractID = None while totext.poll() is None: extractfull += ' '.join([i.decode(stdout_encoding).strip() for i in totext.stdout.readlines()]) # Try to extract DOI if "DOI" in only: extractID = doi_re.search(extractfull.lower().replace('Œ', '-')) if not extractID: # PNAS fix extractID = doi_pnas_re.search(extractfull. lower(). replace('pnas', '/pnas')) if not extractID: # JSB fix extractID = doi_jsb_re.search(extractfull.lower()) if extractID: extract_type = "DOI" totext.terminate() # Try to extract arXiv if "arXiv" in only: tmp_extractID = arXiv_re.search(extractfull) if tmp_extractID: if not extractID or extractID.start(0) > tmp_extractID.start(1): # Only use arXiv id if it is before the DOI in the pdf extractID = tmp_extractID extract_type = "arXiv" totext.terminate() if extract_type is not False: break err = totext.communicate()[1] if totext.returncode > 0: # Error happened tools.warning(err) return (False, False) if extractID is not None and extract_type == "DOI": # If DOI extracted, clean it and return it cleanDOI = False cleanDOI = extractID.group(0).replace(':', '').replace(' ', '') if clean_doi_re.search(cleanDOI): cleanDOI = cleanDOI[1:] # FABSE J fix if clean_doi_fabse_re.search(cleanDOI): cleanDOI = cleanDOI[:20] # Second JCB fix if clean_doi_jcb_re.search(cleanDOI): cleanDOI = cleanDOI[:21] if len(cleanDOI) > 40: cleanDOItemp = clean_doi_len_re.sub('000', cleanDOI) reps = {'.': 'A', '-': '0'} cleanDOItemp = tools.replaceAll(cleanDOItemp[8:], reps) digitStart = 0 for i in range(len(cleanDOItemp)): if cleanDOItemp[i].isdigit(): digitStart = 1 if cleanDOItemp[i].isalpha() and digitStart: break cleanDOI = cleanDOI[0:(8+i)] return ("DOI", cleanDOI) elif extractID is not None and extract_type == "arXiv": # If arXiv id is extracted, return it return ("arXiv", extractID.group(1)) return (False, False)
def addFile(src, filetype, manual, autoconfirm, tag): """ Add a file to the library """ doi = False arxiv = False isbn = False if not manual: try: if filetype == 'article' or filetype is None: id_type, article_id = fetcher.findArticleID(src) if id_type == "DOI": doi = article_id elif id_type == "arXiv": arxiv = article_id if filetype == 'book' or (doi is False and arxiv is False and filetype is None): isbn = fetcher.findISBN(src) except KeyboardInterrupt: doi = False arxiv = False isbn = False if doi is False and isbn is False and arxiv is False: if filetype is None: tools.warning("Could not determine the DOI nor the arXiv id nor " + "the ISBN for "+src+". Switching to manual entry.") doi_arxiv_isbn = '' while(doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual', 'skip']): doi_arxiv_isbn = (tools.rawInput("DOI / arXiv " + "/ ISBN / manual / skip? "). lower()) if doi_arxiv_isbn == 'doi': doi = tools.rawInput('DOI? ') elif doi_arxiv_isbn == 'arxiv': arxiv = tools.rawInput('arXiv id? ') elif doi_arxiv_isbn == 'isbn': isbn = tools.rawInput('ISBN? ') elif doi_arxiv_isbn == 'skip': return False elif filetype == 'article': tools.warning("Could not determine the DOI nor the arXiv id for " + src+", switching to manual entry.") doi_arxiv = '' while doi_arxiv not in ['doi', 'arxiv', 'manual', 'skip']: doi_arxiv = (tools.rawInput("DOI / arXiv / manual / skip? "). lower()) if doi_arxiv == 'doi': doi = tools.rawInput('DOI? ') elif doi_arxiv == 'arxiv': arxiv = tools.rawInput('arXiv id? ') elif doi_arxiv == 'skip': return False elif filetype == 'book': isbn_manual = '' while isbn_manual not in ['isbn', 'manual', 'skip']: isbn_manual = tools.rawInput("ISBN / manual / skip? ").lower() if isbn_manual == 'isbn': isbn = (tools.rawInput('ISBN? '). replace(' ', ''). replace('-', '')) elif isbn_manual == 'skip': return False elif doi is not False: print("DOI for "+src+" is "+doi+".") elif arxiv is not False: print("ArXiv id for "+src+" is "+arxiv+".") elif isbn is not False: print("ISBN for "+src+" is "+isbn+".") if doi is not False and doi != '': # Add extra \n for bibtexparser bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n")+"\n" elif arxiv is not False and arxiv != '': bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n")+"\n" elif isbn is not False and isbn != '': # Idem bibtex = fetcher.isbn2Bib(isbn).strip()+"\n" else: bibtex = '' bibtex = BibTexParser(bibtex) bibtex = bibtex.get_entry_dict() if len(bibtex) > 0: bibtex_name = list(bibtex.keys())[0] bibtex = bibtex[bibtex_name] bibtex_string = tools.parsed2Bibtex(bibtex) else: bibtex_string = '' if not autoconfirm: bibtex = checkBibtex(src, bibtex_string) if not autoconfirm: tag = tools.rawInput("Tag for this paper (leave empty for default) ? ") else: tag = args.tag bibtex['tag'] = tag new_name = backend.getNewName(src, bibtex, tag) while os.path.exists(new_name): tools.warning("file "+new_name+" already exists.") default_rename = new_name.replace(tools.getExtension(new_name), " (2)"+tools.getExtension(new_name)) rename = tools.rawInput("New name ["+default_rename+"]? ") if rename == '': new_name = default_rename else: new_name = rename bibtex['file'] = new_name try: shutil.copy2(src, new_name) except shutil.Error: new_name = False sys.exit("Unable to move file to library dir " + config.get("folder")+".") # Remove first page of IOP papers try: if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article': tearpages.tearpage(new_name) except (KeyError, shutil.Error, IOError): pass backend.bibtexAppend(bibtex) return new_name
def download(url): """Download url tofile Check that it is a valid pdf or djvu file. Tries all the available proxies sequentially. Returns the raw content of the file, or false if it could not be downloaded. """ for proxy in config.get("proxies"): if proxy.startswith('socks'): if proxy[5] == '4': proxy_type = socks.SOCKS4 else: proxy_type = socks.SOCKS5 proxy = proxy[proxy.find('://')+3:] try: proxy, port = proxy.split(':') except ValueError: port = None socks.set_default_proxy(proxy_type, proxy, port) socket.socket = socks.socksocket elif proxy == '': socket.socket = default_socket else: try: proxy, port = proxy.split(':') except ValueError: port = None socks.set_default_proxy(socks.HTTP, proxy, port) socket.socket = socks.socksocket try: r = urlopen(url) try: size = int(dict(r.info())['content-length'].strip()) except KeyError: try: size = int(dict(r.info())['Content-Length'].strip()) except KeyError: size = 0 dl = b"" dl_size = 0 while True: buf = r.read(1024) if buf: dl += buf dl_size += len(buf) if size != 0: done = int(50 * dl_size / size) sys.stdout.write("\r[%s%s]" % ('='*done, ' '*(50-done))) sys.stdout.write(" "+str(int(float(done)/52*100))+"%") sys.stdout.flush() else: break contenttype = False contenttype_req = None try: contenttype_req = dict(r.info())['content-type'] except KeyError: try: contenttype_req = dict(r.info())['Content-Type'] except KeyError: continue try: if 'pdf' in contenttype_req: contenttype = 'pdf' elif 'djvu' in contenttype_req: contenttype = 'djvu' except KeyError: pass if r.getcode() != 200 or contenttype is False: continue return dl, contenttype except ValueError: tools.warning("Invalid URL") return False, None except (URLError, socket.error): if proxy != "": proxy_txt = "using proxy "+proxy else: proxy_txt = "without using any proxy" tools.warning("Unable to get "+url+" "+proxy_txt+". It " + "may not be available at the moment.") continue return False, None
def resync(): diff = backend.diffFilesIndex() if diff is False: return False for key in diff: entry = diff[key] if entry['file'] == '': print("\nFound entry in index without associated file: " + entry['id']) print("Title:\t"+entry['title']) loop = True while confirm: filename = tools.rawInput("File to import for this entry " + "(leave empty to delete the " + "entry)? ") if filename == '': break else: if 'doi' in list(entry.keys()): doi = fetcher.findArticleID(filename, only=["DOI"]) if doi is not False and doi != entry['doi']: loop = tools.rawInput("Found DOI does not " + "match bibtex entry " + "DOI, continue anyway " + "? [y/N]") loop = (loop.lower() != 'y') if 'Eprint' in list(entry.keys()): arxiv = fetcher.findArticleID(filename, only=["arXiv"]) if arxiv is not False and arxiv != entry['Eprint']: loop = tools.rawInput("Found arXiv id does " + "not match bibtex " + "entry arxiv id, " + "continue anyway ? [y/N]") loop = (loop.lower() != 'y') if 'isbn' in list(entry.keys()): isbn = fetcher.findISBN(filename) if isbn is not False and isbn != entry['isbn']: loop = tools.rawInput("Found ISBN does not " + "match bibtex entry " + "ISBN, continue anyway " + "? [y/N]") loop = (loop.lower() != 'y') continue if filename == '': backend.deleteId(entry['id']) print("Deleted entry \""+entry['id']+"\".") else: new_name = backend.getNewName(filename, entry) try: shutil.copy2(filename, new_name) print("Imported new file "+filename+" for entry " + entry['id']+".") except shutil.Error: new_name = False sys.exit("Unable to move file to library dir " + config.get("folder")+".") backend.bibtexEdit(entry['id'], {'file': filename}) else: print("Found file without any associated entry in index:") print(entry['file']) action = '' while action.lower() not in ['import', 'delete']: action = tools.rawInput("What to do? [import / delete] ") action = action.lower() if action == 'import': tmp = tempfile.NamedTemporaryFile() shutil.copy(entry['file'], tmp.name) filetype = tools.getExtension(entry['file']) try: os.remove(entry['file']) except OSError: tools.warning("Unable to delete file "+entry['file']) if not addFile(tmp.name, filetype): tools.warning("Unable to reimport file "+entry['file']) tmp.close() else: backend.deleteFile(entry['file']) print(entry['file'] + " removed from disk and " + "index.") # Check for empty tag dirs for i in os.listdir(config.get("folder")): if os.path.isdir(i) and not os.listdir(config.get("folder") + i): try: os.rmdir(config.get("folder") + i) except OSError: tools.warning("Found empty tag dir "+config.get("folder") + i + " but could not delete it.")
parser_search.add_argument('query', metavar='entry', nargs='+', help="your query, see README for more info.", type=commandline_arg) parser_search.set_defaults(func='search') args = parser.parse_args() try: if args.func == 'download': skipped = [] for url in args.url: new_name = downloadFile(url, args.type, args.manual, args.y, args.tag) if new_name is not False: print(url+" successfully imported as "+new_name) else: tools.warning("An error occurred while downloading "+url) skipped.append(url) if len(skipped) > 0: print("\nSkipped files:") for i in skipped: print(i) sys.exit() if args.func == 'import': skipped = [] for filename in list(set(args.file) - set(args.skip)): new_name = addFile(filename, args.type, args.manual, args.y, args.tag) if new_name is not False: print(filename+" successfully imported as " + new_name+".")
def resync(): diff = backend.diffFilesIndex() if diff is False: return False for key in diff: entry = diff[key] if entry['file'] == '': print("\nFound entry in index without associated file: " + entry['id']) print("Title:\t" + entry['title']) loop = True while confirm: filename = tools.rawInput("File to import for this entry " + "(leave empty to delete the " + "entry)? ") if filename == '': break else: if 'doi' in list(entry.keys()): doi = fetcher.findArticleID(filename, only=["DOI"]) if doi is not False and doi != entry['doi']: loop = tools.rawInput("Found DOI does not " + "match bibtex entry " + "DOI, continue anyway " + "? [y/N]") loop = (loop.lower() != 'y') if 'Eprint' in list(entry.keys()): arxiv = fetcher.findArticleID(filename, only=["arXiv"]) if arxiv is not False and arxiv != entry['Eprint']: loop = tools.rawInput("Found arXiv id does " + "not match bibtex " + "entry arxiv id, " + "continue anyway ? [y/N]") loop = (loop.lower() != 'y') if 'isbn' in list(entry.keys()): isbn = fetcher.findISBN(filename) if isbn is not False and isbn != entry['isbn']: loop = tools.rawInput("Found ISBN does not " + "match bibtex entry " + "ISBN, continue anyway " + "? [y/N]") loop = (loop.lower() != 'y') continue if filename == '': backend.deleteId(entry['id']) print("Deleted entry \"" + entry['id'] + "\".") else: new_name = backend.getNewName(filename, entry) try: shutil.copy2(filename, new_name) print("Imported new file " + filename + " for entry " + entry['id'] + ".") except shutil.Error: new_name = False sys.exit("Unable to move file to library dir " + config.get("folder") + ".") backend.bibtexEdit(entry['id'], {'file': filename}) else: print("Found file without any associated entry in index:") print(entry['file']) action = '' while action.lower() not in ['import', 'delete']: action = tools.rawInput("What to do? [import / delete] ") action = action.lower() if action == 'import': tmp = tempfile.NamedTemporaryFile() shutil.copy(entry['file'], tmp.name) filetype = tools.getExtension(entry['file']) try: os.remove(entry['file']) except OSError: tools.warning("Unable to delete file " + entry['file']) if not addFile(tmp.name, filetype): tools.warning("Unable to reimport file " + entry['file']) tmp.close() else: backend.deleteFile(entry['file']) print(entry['file'] + " removed from disk and " + "index.") # Check for empty tag dirs for i in os.listdir(config.get("folder")): if os.path.isdir(i) and not os.listdir(config.get("folder") + i): try: os.rmdir(config.get("folder") + i) except OSError: tools.warning("Found empty tag dir " + config.get("folder") + i + " but could not delete it.")
def addFile(src, filetype, manual, autoconfirm, tag, rename=True): """ Add a file to the library """ doi = False arxiv = False isbn = False if not manual: try: if filetype == 'article' or filetype is None: id_type, article_id = fetcher.findArticleID(src) if id_type == "DOI": doi = article_id elif id_type == "arXiv": arxiv = article_id if filetype == 'book' or (doi is False and arxiv is False and filetype is None): isbn = fetcher.findISBN(src) except KeyboardInterrupt: doi = False arxiv = False isbn = False if doi is False and isbn is False and arxiv is False: if filetype is None: tools.warning("Could not determine the DOI nor the arXiv id nor " + "the ISBN for " + src + ". Switching to manual entry.") doi_arxiv_isbn = '' while (doi_arxiv_isbn not in ['doi', 'arxiv', 'isbn', 'manual', 'skip']): doi_arxiv_isbn = ( tools.rawInput("DOI / arXiv " + "/ ISBN / manual / skip? ").lower()) if doi_arxiv_isbn == 'doi': doi = tools.rawInput('DOI? ') elif doi_arxiv_isbn == 'arxiv': arxiv = tools.rawInput('arXiv id? ') elif doi_arxiv_isbn == 'isbn': isbn = tools.rawInput('ISBN? ') elif doi_arxiv_isbn == 'skip': return False elif filetype == 'article': tools.warning("Could not determine the DOI nor the arXiv id for " + src + ", switching to manual entry.") doi_arxiv = '' while doi_arxiv not in ['doi', 'arxiv', 'manual', 'skip']: doi_arxiv = ( tools.rawInput("DOI / arXiv / manual / skip? ").lower()) if doi_arxiv == 'doi': doi = tools.rawInput('DOI? ') elif doi_arxiv == 'arxiv': arxiv = tools.rawInput('arXiv id? ') elif doi_arxiv == 'skip': return False elif filetype == 'book': isbn_manual = '' while isbn_manual not in ['isbn', 'manual', 'skip']: isbn_manual = tools.rawInput("ISBN / manual / skip? ").lower() if isbn_manual == 'isbn': isbn = (tools.rawInput('ISBN? ').replace(' ', '').replace('-', '')) elif isbn_manual == 'skip': return False elif doi is not False: print("DOI for " + src + " is " + doi + ".") elif arxiv is not False: print("ArXiv id for " + src + " is " + arxiv + ".") elif isbn is not False: print("ISBN for " + src + " is " + isbn + ".") if doi is not False and doi != '': # Add extra \n for bibtexparser bibtex = fetcher.doi2Bib(doi).strip().replace(',', ",\n") + "\n" elif arxiv is not False and arxiv != '': bibtex = fetcher.arXiv2Bib(arxiv).strip().replace(',', ",\n") + "\n" elif isbn is not False and isbn != '': # Idem bibtex = fetcher.isbn2Bib(isbn).strip() + "\n" else: bibtex = '' bibtex = bibtexparser.loads(bibtex) bibtex = bibtex.entries_dict if len(bibtex) > 0: bibtex_name = list(bibtex.keys())[0] bibtex = bibtex[bibtex_name] bibtex_string = tools.parsed2Bibtex(bibtex) else: bibtex_string = '' if not autoconfirm: bibtex = checkBibtex(src, bibtex_string) if not autoconfirm: tag = tools.rawInput("Tag for this paper (leave empty for default) ? ") else: tag = args.tag bibtex['tag'] = tag if rename: new_name = backend.getNewName(src, bibtex, tag) while os.path.exists(new_name): tools.warning("file " + new_name + " already exists.") default_rename = new_name.replace( tools.getExtension(new_name), " (2)" + tools.getExtension(new_name)) rename = tools.rawInput("New name [" + default_rename + "]? ") if rename == '': new_name = default_rename else: new_name = rename try: shutil.copy2(src, new_name) except shutil.Error: new_name = False sys.exit("Unable to move file to library dir " + config.get("folder") + ".") else: new_name = src bibtex['file'] = os.path.abspath(new_name) # Remove first page of IOP papers try: if 'IOP' in bibtex['publisher'] and bibtex['type'] == 'article': tearpages.tearpage(new_name) except (KeyError, shutil.Error, IOError): pass backend.bibtexAppend(bibtex) return new_name
def checkBibtex(filename, bibtex_string): print("The bibtex entry found for "+filename+" is:") bibtex = BibTexParser(bibtex_string) bibtex = bibtex.get_entry_dict() try: bibtex = bibtex[list(bibtex.keys())[0]] # Check entries are correct assert bibtex['title'] if bibtex['type'] == 'article': assert bibtex['authors'] elif bibtex['type'] == 'book': assert bibtex['author'] assert bibtex['year'] # Print the bibtex and confirm print(tools.parsed2Bibtex(bibtex)) check = tools.rawInput("Is it correct? [Y/n] ") except KeyboardInterrupt: sys.exit() except (IndexError, KeyError, AssertionError): check = 'n' try: old_filename = bibtex['file'] except KeyError: old_filename = False while check.lower() == 'n': with tempfile.NamedTemporaryFile(suffix=".tmp") as tmpfile: tmpfile.write(bibtex_string.encode('utf-8')) tmpfile.flush() subprocess.call([EDITOR, tmpfile.name]) tmpfile.seek(0) bibtex = BibTexParser(tmpfile.read().decode('utf-8')+"\n") bibtex = bibtex.get_entry_dict() try: bibtex = bibtex[list(bibtex.keys())[0]] except (IndexError, KeyError): tools.warning("Invalid bibtex entry") bibtex_string = '' tools.rawInput("Press Enter to go back to editor.") continue if('authors' not in bibtex and 'title' not in bibtex and 'year' not in bibtex): tools.warning("Invalid bibtex entry") bibtex_string = '' tools.rawInput("Press Enter to go back to editor.") continue if old_filename is not False and 'file' not in bibtex: tools.warning("Invalid bibtex entry. No filename given.") tools.rawInput("Press Enter to go back to editor.") check = 'n' else: bibtex_string = tools.parsed2Bibtex(bibtex) print("\nThe bibtex entry for "+filename+" is:") print(bibtex_string) check = tools.rawInput("Is it correct? [Y/n] ") if old_filename is not False and old_filename != bibtex['file']: try: print("Moving file to new location…") shutil.move(old_filename, bibtex['file']) except shutil.Error: tools.warning("Unable to move file "+old_filename+" to " + bibtex['file']+". You should check it manually.") return bibtex