示例#1
0
    def test_rss_from_google(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        file = os.path.join(path, "data", "subscriptions.xml")
        assert os.path.exists(file)
        res = list(StreamRSS.enumerate_stream_from_google_list(file))
        if len(res) != TestRSS.nb_rss_blog:
            dic = {}
            for i, r in enumerate(sorted(res)):
                dic[str(r)] = dic.get(str(r), 0) + 1
                fLOG(i, r)
            for k in dic:
                if dic[k] > 1:
                    fLOG("--double", k)
            raise Exception(
                "number of expected feed %d != %d" %
                (len(res), TestRSS.nb_rss_blog))
        li = sorted(res)
        for i, r in enumerate(li):
            if i > 0 and li[i] < li[i - 1]:
                raise Exception("bad order {0} < {1}".format(li[i - 1], li[i]))
        fLOG("nb:", len(res))

        dbfile = os.path.join(path, "temp_rss.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        StreamRSS.fill_table(db, "blogs", res)
        db.close()

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        assert db.has_table("blogs")
        assert db.get_table_nb_lines("blogs") == TestRSS.nb_rss_blog
        sql = "SELECT * FROM blogs"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == TestRSS.nb_rss_blog
        key, value = val.popitem()
        assert key is not None

        # iterator on StreamRSS
        obj = list(db.enumerate_objects("blogs", StreamRSS))
        assert len(obj) == TestRSS.nb_rss_blog

        db.close()
示例#2
0
    def test_import_flatflit(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        fold = os.path.abspath(os.path.split(__file__)[0])
        temp = os.path.join(fold, "temp_db_bug")
        if not os.path.exists(temp):
            os.mkdir(temp)

        text = [
            "one",
            "two",
            "three",
            "four",
            "five",
            "six",
            "seven",
            "eight",
            "nine",
            "ten"]
        data = [{"name": text[random.randint(0, 9)], "number": random.randint(0, 99)}
                for i in range(0, 10000)]

        filename = os.path.join(temp, "out_flatfile_tab_pos2.txt")

        datatab = data[:1] + [{"name": " one\ttab", "number": 100}] + data[1:]
        df = pandas.DataFrame(datatab)
        df.to_csv(
            filename,
            sep="\t",
            encoding="utf8",
            header=True,
            index=False)
        with open(filename, "r", encoding="utf8") as f:
            content = f.read()
        content = content.replace('"', '')
        with open(filename + ".2.txt", "w", encoding="utf8") as f:
            f.write(content)

        dbfile = os.path.join(fold, "out_db.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        import_flatfile_into_database(
            dbfile,
            filename +
            ".2.txt",
            table="example",
            fLOG=fLOG)

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        count = db.get_table_nb_lines("example")
        sch = db.get_table_columns("example")
        values = db.execute_view("SELECT * FROM example")
        db.close()

        if count != 10001:
            rows = [str(v) for v in values][:10]
            mes = "\n".join(rows)
            fLOG(datatab[:3])
            raise Exception(
                "expect:10001 not {0}\nROWS:\n{1}".format(
                    count,
                    mes))

        exp = [('name', str), ('number', int)]
        if sch != exp:
            raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))
示例#3
0
    def test_rss_parse(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        file = os.path.join(path, "data", "xdbrss.xml")
        assert os.path.exists(file)

        rss = StreamRSS(titleb="XD",
                        type="rss",
                        xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml",
                        htmlUrl="http://www.xavierdupre.fr/blog/xd_blog_nojs.html",
                        keywordsb=["python"],
                        id=5)

        res = rss.enumerate_post(fLOG=fLOG)
        nb = 0
        for _ in res:
            nb += 1
            assert len(_.title) > 0
        assert nb > 0

        res = rss.enumerate_post(file, fLOG=fLOG)
        nb = 0
        lres = list(res)

        nb = 0
        for _ in lres:
            nb += 1
            assert len(_.title) > 0
        assert nb > 0
        fLOG("nb post=", nb)

        dbfile = os.path.join(path, "temp_rssp.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        BlogPost.fill_table(db, "posts", lres)
        db.close()

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        assert db.has_table("posts")
        assert db.get_table_nb_lines("posts") == nb

        sql = "SELECT * FROM posts"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == 6
        key, value = val.popitem()
        assert key is not None

        # we insert the blog a second time
        BlogPost.fill_table(db, "posts", lres)
        sql = "SELECT * FROM posts"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == 6

        # we insert the blog a third time
        BlogPost.fill_table(db, "posts", lres)
        sql = "SELECT * FROM posts"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == 6

        db.close()