class DgContentSpider(scrapy.Spider):
    print('LOGS: Spider DgContentPhantomSpider Staring  ...')

    # get url from db
    result = dbhandle_geturl(urlSettings.GROUP_ID)
    url = result[0]
    spider_name = result[1]
    site = result[2]
    gid = result[3]
    module = result[4]

    # set spider name
    name = contentSettings.SPIDER_NAME
    # name = 'DgUrlSpiderPhantomJS'

    # set domains
    allowed_domains = [contentSettings.DOMAIN]

    # set scrapy url
    start_urls = [url]

    # change status
    """对于爬去网页,无论是否爬取成功都将设置status为1,避免死循环"""
    dbhandle_update_status(url, 1)

    # scrapy crawl
    def parse(self, response):

        # init the item
        item = DgspiderPostItem()

        # get the page source
        sel = Selector(response)

        print(sel)

        # get post title
        title_date = sel.xpath(contentSettings.POST_TITLE_XPATH)
        item['title'] = title_date.xpath('string(.)').extract()

        # get post page source
        item['text'] = sel.xpath(contentSettings.POST_CONTENT_XPATH).extract()

        # get url
        item['url'] = DgContentSpider.url

        yield item
def post_handel(url):
    result = dbhandle_get_content(url)

    title = result[0]
    content = result[1]
    user_id = result[2]
    gid = result[3]
    cs = []

    text_list = content.split('[dgimg]')
    for text_single in text_list:
        text_single_c = text_single.split('[/dgimg]')
        if len(text_single_c) == 1:
            cs_json = {"c": text_single_c[0], "i": '', "w": '', "h": ''}
            cs.append(cs_json)
        else:
            # tmp_img_upload_json = upload_img_result.pop()
            pic_flag = text_single_c[1]
            img_params = text_single_c[0].split(';')
            i = img_params[0]
            w = img_params[1]
            h = img_params[2]
            cs_json = {"c": pic_flag, "i": i, "w": w, "h": h}
            cs.append(cs_json)

    strcs = json.dumps(cs)
    json_data = {
        "apisign": "99ea3eda4b45549162c4a741d58baa60",
        "user_id": user_id,
        "gid": gid,
        "t": title,
        "cs": strcs
    }
    # 上传帖子
    result_uploadpost = upload_post(json_data)

    # 更新状态2,成功上传帖子
    result_updateresult = dbhandle_update_status(url, 2)