def test(self):
        class Post(Entity):
            pass

        config = RelationshipConfig(
            [Relationship(Post, Relationship.Option.many, "n_post")])
        assert config.get_relationship(Post) == Relationship.Option.many
        assert config.get_n_child_key(Post) == "n_post"
    def test_check_subclass_implementation_goodcase1(self):
        class Country(Entity):
            n_state = "n_state_field"

        class State(Entity):
            n_zipcode = "n_zipcode_field"

        class Zipcode(Entity):
            pass

        Country.CONF_RELATIONSHIP = RelationshipConfig([
            Relationship(State, Relationship.Option.many, "n_state"),
        ])

        State.CONF_RELATIONSHIP = RelationshipConfig([
            Relationship(Zipcode, Relationship.Option.many, "n_zipcode"),
        ])

        Entity.validate_relationship_config()
    def test_check_subclass_implementation_goodcase2(self):
        class ImagePage(Entity):
            id = "image_page_id"

        class ImageDownload(Entity):
            id = "image_page_id"

        ImagePage.CONF_RELATIONSHIP = RelationshipConfig([
            Relationship(ImageDownload, Relationship.Option.one, None),
        ])

        Entity.validate_relationship_config()
示例#4
0
class ArtistPage(MusicWebsiteEntity):
    CONF_UPDATE_INTERVAL = 3600

    CONF_RELATIONSHIP = RelationshipConfig([
        Relationship(MusicPage, Relationship.Option.many, "n_music", recursive=False)
    ])

    _id = fields.IntField(primary_key=True)
    musics = fields.ListField(fields.IntField())
    n_music = fields.IntField()

    meta = dict(
        collection="site_music_artist",
        db_alias=Config.MongoDB.database,
    )

    @property
    def artist_id(self):
        return self._id

    def build_url(self):
        return url_builder.url_artist(self._id)

    def parse_response(self, url, request, response, html=None, **kwargs):
        if html is None:
            html = response.text

        soup = BeautifulSoup(html, "html.parser")
        div = soup.find("div", id="detail")
        musics = [
            int(a["href"].split("/")[-1])
            for a in div.find_all("a")
        ]
        entity = ArtistPage(musics=musics)

        children = list()
        for music_id in musics:
            music = MusicPage(_id=music_id)
            children.append(music)

        status = Status.S50_Finished.id

        pres = ParseResult(
            entity=entity,
            children=children,
            data={},
            status=status,
        )
        return pres
class ListPage(MovieWebsiteEntity):
    CONF_UPDATE_INTERVAL = 24 * 3600

    CONF_RELATIONSHIP = RelationshipConfig([
        Relationship(MoviePage, Relationship.Option.many, "n_movie")
    ])

    _id = fields.IntField(primary_key=True)
    n_movie = fields.IntField()

    meta = dict(
        collection="site_movie_listpage",
        db_alias=Config.MongoDB.database,
    )

    @property
    def page_num(self):
        return self._id

    def build_url(self):
        return url_builder.url_nth_listpage(self.page_num)

    def parse_response(self, url, request, response, html=None, **kwargs):
        if html is None:
            html = response.text

        soup = BeautifulSoup(html, "html.parser")
        div_listpage = soup.find("div", id="listpage")
        a_tag_list = div_listpage.find_all("a")

        entity = ListPage()

        children = list()
        for a in a_tag_list:
            href = a["href"]
            movie_id = int(href.split("/")[-1])
            movie = MoviePage(_id=movie_id)
            children.append(movie)

        status = Status.S50_Finished.id

        pres = ParseResult(
            entity=entity,
            children=children,
            data={},
            status=status,
        )
        return pres
class HomePage(MovieWebsiteEntity):
    CONF_UPDATE_INTERVAL = 1

    CONF_RELATIONSHIP = RelationshipConfig(
        [Relationship(ListPage, Relationship.Option.many, "n_listpage")])

    _id = fields.IntField(primary_key=True)
    description = fields.StringField()
    max_page_num = fields.IntField()
    n_listpage = fields.IntField()

    meta = dict(
        collection="site_movie_homepage",
        db_alias=Config.MongoDB.database,
    )

    def build_url(self, **kwargs):
        return url_builder.url_first_listpage()

    def parse_response(self, url, request, response, html=None, **kwargs):
        if html is None:
            html = response.text
        soup = BeautifulSoup(html, "html.parser")
        div_pagination = soup.find("div", id="pagination")
        a_tag_list = div_pagination.find_all("a")
        href = a_tag_list[-1]["href"]
        max_page_num = int(href.split("/")[-1])

        entity = HomePage(max_page_num=max_page_num)

        children = list()
        for page_num in range(1, 1 + max_page_num):
            listpage = ListPage(_id=page_num)
            children.append(listpage)

        status = Status.S50_Finished.id

        pres = ParseResult(
            entity=entity,
            children=children,
            data={},
            status=status,
        )
        return pres
示例#7
0
        musics = [
            int(a["href"].split("/")[-1])
            for a in div.find_all("a")
        ]
        entity = GenrePage(musics=musics)

        children = list()
        for music_id in musics:
            music = MusicPage(_id=music_id)
            children.append(music)

        status = Status.S50_Finished.id

        pres = ParseResult(
            entity=entity,
            children=children,
            data={},
            status=status,
        )
        return pres


MusicPage.CONF_RELATIONSHIP = RelationshipConfig([
    Relationship(ArtistPage, Relationship.Option.many, "n_artist"),
    Relationship(GenrePage, Relationship.Option.many, "n_genre"),
])

MusicPage.validate_implementation()
ArtistPage.validate_implementation()
GenrePage.validate_implementation()