def test_valid_many_dict_update(self):
        """
        Test valid json file of a list of dicts can be used to
        update value of existing object in database.
        First modify books' url and then recover them.
        """
        path = JSON_PATH + "legal_many_books.json"
        book_db, _ = connect_to_mongo()
        with open(path, "r+") as file:
            book_dics = json.load(file)

        true_book_urls = []
        # first modify all target book_urls in db
        for book_dic in book_dics:
            book_id = book_dic["_id"]
            true_book_urls.append(book_dic["book_url"])
            query_key = {"_id": book_id}
            update_val = {"$set": {"book_url": "duckduckgo.com"}}
            book_db.update_one(query_key, update_val)
            self.assertTrue(list(book_db.find(query_key)) != [])

        # recover book url
        insert_into_db(path, db_type="book")
        for i, book_dic in enumerate(book_dics):
            book_id = book_dic["_id"]
            true_url = true_book_urls[i]
            query_key = {"_id": book_id}
            self.assertTrue(book_db.find_one(query_key)["book_url"], true_url)
    def test_valid_one_dict_update(self):
        """
        Test valid json file of a single dict can be used to
        update value of existing object in database.
        First modify one author's url and then recover it.
        """
        path = JSON_PATH + "legal_one_author.json"
        _, author_db = connect_to_mongo()
        with open(path, "r+") as file:
            author_dic = json.load(file)

        author_id = author_dic["_id"]
        true_author_url = author_dic["author_url"]
        query_key = {"_id": author_id}
        update_val = {"$set": {"author_url": "duckduckgo.com"}}
        self.assertTrue(list(author_db.find(query_key)) != [])

        # modify author url to duckduckgo
        author_db.update_one(query_key, update_val)
        author_url_modified = author_db.find_one(query_key)["author_url"]
        self.assertEqual(author_url_modified, "duckduckgo.com")

        # recover author url
        insert_into_db(path, db_type="author")
        author_url_recovered = author_db.find_one(query_key)["author_url"]
        self.assertEqual(author_url_recovered, true_author_url)
 def test_book_missing_key(self):
     """
     Test author with missing related authors will be blocked
     """
     path = JSON_PATH + "author_miss_related_authors.json"
     try:
         insert_into_db(path, "author")
         self.assertTrue(False)  # An error should be thrown
     except:
         self.assertTrue(True)
 def test_author_missing_key(self):
     """
     Test book json file with missing url will be blocked.
     """
     path = JSON_PATH + "book_miss_book_url.json"
     try:
         insert_into_db(path, db_type="book")
         self.assertTrue(False)  # An error should be thrown
     except:
         self.assertTrue(True)
 def test_malformatted_json(self):
     """
     Test non-parseable JSON file will cause error
     """
     path = JSON_PATH + "malformed_author.json"
     try:
         insert_into_db(path, db_type="author")
         self.assertTrue(False)  # An error should be thrown
     except:
         self.assertTrue(True)
    def test_valid_single_dict_create(self):
        """
        Test valid json file that contains a single dictionary could be inserted,
        by deleting one existing author and reinsert it.
        """
        path = JSON_PATH + "legal_one_author.json"
        _, author_db = connect_to_mongo()
        with open(path, "r+") as file:
            author_dic = json.load(file)

        author_id = author_dic["_id"]
        self.assertTrue(list(author_db.find({"_id": author_id})) != [])
        author_db.delete_one({"_id": author_id})
        self.assertTrue(list(author_db.find({"_id": author_id})) == [])

        insert_into_db(path, db_type="author")
        self.assertTrue(list(author_db.find({"_id": author_id})) != [])
    def test_valid_many_dict_create(self):
        """
        Test valid json file that contains a list of dictionaries could be inserted,
        by deleting many existing books and reinserting.
        """
        path = JSON_PATH + "legal_many_books.json"
        book_db, _ = connect_to_mongo()
        with open(path, "r+") as file:
            book_dics = json.load(file)

        for _, book_dic in enumerate(book_dics):
            book_id = book_dic["_id"]
            self.assertTrue(list(book_db.find({"_id": book_id})) != [])
            book_db.delete_one({"_id": book_id})
            self.assertTrue(list(book_db.find({"_id": book_id})) == [])

        insert_into_db(path, "book")
        for _, book_dic in enumerate(book_dics):
            book_id = book_dic["_id"]
            self.assertTrue(list(book_db.find({"_id": book_id})) != [])
Exemplo n.º 8
0
            assert "visited_authors.pkl" in progress_dir, "visited_authors.pkl not found!" + prompt

        assert args.max_book > 0, "max_book must be a positive integer."
        assert args.max_book <= 2000, "max_book should be less than 2000."
        assert args.max_author > 0, "max_author must be a positive integer."
        assert args.max_author <= 2000, "max_author should be less than 2000."


if __name__ == "__main__":
    parser = construct_parser()
    validate_scrape_args(parser)
    args = parser.parse_args()
    if args.which == "scrape":  # run command "scrape"
        start_url = args.start_url
        max_book = args.max_book
        max_author = args.max_author
        new_scrape = args.new
        scrape_start(new_scrape, start_url, max_book, max_author, PROGRESS_DIR)
    elif args.which == "update":  # run command "update"
        type_json = args.type
        src_json = args.srcJSON
        insert_into_db(src_json, type_json)
    elif args.which == "export":  # run command "export"
        db_choice = args.db
        dump_db(db_choice)
    elif args.which == "draw":  # run command "draw"
        build_graph()
    else:  # invalid input
        print("error : invalid input.")
        parser.print_help()