def test_openLibraryStatus_output_on_connection_error(monkeypatch, capsys): def mock_requests_head(_): raise ConnectionError monkeypatch.setattr(requests, "head", mock_requests_head) assert not pageStatus(TEST_URL) captured = capsys.readouterr() assert captured.out == CONNECTION_ERROR_MESSAGE.format(TEST_URL) + "\n"
def test_openLibraryStatus_output_for_wrong_status_code(monkeypatch, capsys): def mock_requests_head(_): return type("_", (), {"status_code": 42}) monkeypatch.setattr(requests, "head", mock_requests_head) assert not pageStatus(TEST_URL) captured = capsys.readouterr() assert captured.out == CONNECTION_ERROR_MESSAGE.format(TEST_URL) + "\n"
def test_openLibraryStatus_output_if_it_can_connect(monkeypatch, capsys, status_code): def mock_requests_head(_): return type("_", (), {"status_code": status_code}) monkeypatch.setattr(requests, "head", mock_requests_head) assert pageStatus(TEST_URL) captured = capsys.readouterr() assert captured.out == f"Connected to: {TEST_URL}\n"
def main(author, similarity): # returns all the books writen by an author from openlibrary # using similarity for filtering the results status = pageStatus(OPEN_LIBRARY_URL) if status is not False: search_url = "http://openlibrary.org/search.json?author=" + author jason = requests.get(search_url) jason = jason.text data = json.loads(jason) data = data["docs"] if data != []: metr = 0 books = [] for i in range(0, len(data) - 1): title = data[metr]["title"] metr = metr + 1 books.append(title) mylist = list(dict.fromkeys(books)) # Filtrering results: trying to erase similar titles words = [ " the ", "The ", " THE ", " The" " a ", " A ", " and ", " of ", " from ", "on", "The", "in", ] noise_re = re.compile( "\\b(%s)\\W" % ("|".join(map(re.escape, words))), re.I) clean_mylist = [noise_re.sub("", p) for p in mylist] for i in clean_mylist: for j in clean_mylist: a = similar(i, j, similarity) if a is True: clean_mylist.pop(a) clean_mylist.sort() print(" ~Books found to OpenLibrary Database:\n") for i in clean_mylist: print(i) return clean_mylist else: print("(!) No valid author name, or bad internet connection.") print("Please try again!") return None
def open_access_button(doi, title): status = pageStatus(OPEN_ACCESS_BUTTON) if status: if doi is not None: query = {"doi": doi} else: query = {"title": title} req = requests.get(OPEN_ACCESS_BUTTON, params=query) response = req.json() return response else: print(CONNECTION_ERROR_MESSAGE.format("Open Access Button"))
def arxiv(term): # Searching Arxiv.org and returns a DataFrame with the founded results. status = pageStatus(ARCHIV_URL) if status: br = mechanize.Browser() br.set_handle_robots(False) # ignore robots br.set_handle_refresh(False) # br.addheaders = [("User-agent", "Firefox")] br.open(ARCHIV_URL) br.select_form(nr=0) input_form = term br.form["query"] = input_form ac = br.submit() html_from_page = ac html_soup = soup(html_from_page, "html.parser") t = html_soup.findAll("div", {"class": "list-title mathjax"}) titles = [] for i in t: raw = i.text raw = raw.replace("Title: ", "") raw = raw.replace("\n", "") titles.append(raw) authors = [] auth_soup = html_soup.findAll("div", {"class": "list-authors"}) for i in auth_soup: raw = i.text raw = raw.replace("Authors:", "") raw = raw.replace("\n", "") authors.append(raw) extensions = [] urls = [] ext = html_soup.findAll("span", {"class": "list-identifier"}) for i in ext: a = i.findAll("a") link = a[1]["href"] extensions.append(str(a[1].text)) urls.append(ARCHIV_BASE + link) arxiv_df = pd.DataFrame({ "Title": titles, "Author(s)": authors, "Url": urls, "Extension": extensions, }) return arxiv_df else: print(CONNECTION_ERROR_MESSAGE.format("ArXiv")) return None
def main_organiser(directory): status = pageStatus(OPEN_LIBRARY_URL) if status is not False: book_list = get_books(directory) # lists only the files in the given directory namepath = [] with os.scandir(directory) as entries: for entry in entries: if entry.is_file(): namepath.append(entry.name) for i in range(0, len(book_list)): print("File:", namepath[i]) try: """splitting file name to author and book title for using as searching terms to OpenLibrary""" a = book_list[i].split("by") book = a[1] author = a[0] a = scraper(book, author) print("\n***", book, " ", author) a = a["genre"] filename = namepath[i] cutpaste(directory, a, filename) except IndexError: try: a = book_list[i].split("-") book = a[1] author = a[0] a = scraper(book, author) print("\n***", book, " ", author) a = a["genre"] filename = namepath[i] cutpaste(directory, a, filename) except IndexError: print("Unable to organise this file.\n") pass
def test_archiv_Status(): status = pageStatus(url="http://export.arxiv.org/") assert status is not False, "Archiv Status =! 200"
def test_open_libraryStatus(): status = pageStatus(url="http://www.openlibrary.org") assert status is not False, "OpenLibrary Status =! 200"