def test_rss_from_google(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) file = os.path.join(path, "data", "subscriptions.xml") assert os.path.exists(file) res = list(StreamRSS.enumerate_stream_from_google_list(file)) if len(res) != TestRSS.nb_rss_blog: dic = {} for i, r in enumerate(sorted(res)): dic[str(r)] = dic.get(str(r), 0) + 1 fLOG(i, r) for k in dic: if dic[k] > 1: fLOG("--double", k) raise Exception( "number of expected feed %d != %d" % (len(res), TestRSS.nb_rss_blog)) li = sorted(res) for i, r in enumerate(li): if i > 0 and li[i] < li[i - 1]: raise Exception("bad order {0} < {1}".format(li[i - 1], li[i])) fLOG("nb:", len(res)) dbfile = os.path.join(path, "temp_rss.db3") if os.path.exists(dbfile): os.remove(dbfile) db = Database(dbfile, LOG=fLOG) db.connect() StreamRSS.fill_table(db, "blogs", res) db.close() db = Database(dbfile, LOG=fLOG) db.connect() assert db.has_table("blogs") assert db.get_table_nb_lines("blogs") == TestRSS.nb_rss_blog sql = "SELECT * FROM blogs" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == TestRSS.nb_rss_blog key, value = val.popitem() assert key is not None # iterator on StreamRSS obj = list(db.enumerate_objects("blogs", StreamRSS)) assert len(obj) == TestRSS.nb_rss_blog db.close()
def test_import_flatflit(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_db_bug") if not os.path.exists(temp): os.mkdir(temp) text = [ "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"] data = [{"name": text[random.randint(0, 9)], "number": random.randint(0, 99)} for i in range(0, 10000)] filename = os.path.join(temp, "out_flatfile_tab_pos2.txt") datatab = data[:1] + [{"name": " one\ttab", "number": 100}] + data[1:] df = pandas.DataFrame(datatab) df.to_csv( filename, sep="\t", encoding="utf8", header=True, index=False) with open(filename, "r", encoding="utf8") as f: content = f.read() content = content.replace('"', '') with open(filename + ".2.txt", "w", encoding="utf8") as f: f.write(content) dbfile = os.path.join(fold, "out_db.db3") if os.path.exists(dbfile): os.remove(dbfile) import_flatfile_into_database( dbfile, filename + ".2.txt", table="example", fLOG=fLOG) db = Database(dbfile, LOG=fLOG) db.connect() count = db.get_table_nb_lines("example") sch = db.get_table_columns("example") values = db.execute_view("SELECT * FROM example") db.close() if count != 10001: rows = [str(v) for v in values][:10] mes = "\n".join(rows) fLOG(datatab[:3]) raise Exception( "expect:10001 not {0}\nROWS:\n{1}".format( count, mes)) exp = [('name', str), ('number', int)] if sch != exp: raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))
def test_rss_parse(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) file = os.path.join(path, "data", "xdbrss.xml") assert os.path.exists(file) rss = StreamRSS(titleb="XD", type="rss", xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml", htmlUrl="http://www.xavierdupre.fr/blog/xd_blog_nojs.html", keywordsb=["python"], id=5) res = rss.enumerate_post(fLOG=fLOG) nb = 0 for _ in res: nb += 1 assert len(_.title) > 0 assert nb > 0 res = rss.enumerate_post(file, fLOG=fLOG) nb = 0 lres = list(res) nb = 0 for _ in lres: nb += 1 assert len(_.title) > 0 assert nb > 0 fLOG("nb post=", nb) dbfile = os.path.join(path, "temp_rssp.db3") if os.path.exists(dbfile): os.remove(dbfile) db = Database(dbfile, LOG=fLOG) db.connect() BlogPost.fill_table(db, "posts", lres) db.close() db = Database(dbfile, LOG=fLOG) db.connect() assert db.has_table("posts") assert db.get_table_nb_lines("posts") == nb sql = "SELECT * FROM posts" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == 6 key, value = val.popitem() assert key is not None # we insert the blog a second time BlogPost.fill_table(db, "posts", lres) sql = "SELECT * FROM posts" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == 6 # we insert the blog a third time BlogPost.fill_table(db, "posts", lres) sql = "SELECT * FROM posts" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == 6 db.close()