示例#1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'lish'
import imdb
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
# base_path=os.path.split( os.path.realpath( sys.argv[0] ) )[0]
base_path='/home/lish/imread/chapcontent/'

IMOfficialDB=imdb.IMReadDB("192.168.0.34",3306,"ebook","ebook%$amRead")

def ClassifiedTags():
    selectsql='SELECT class_id FROM ebook_con.con_class WHERE class_id<60 and class_id<>0 and class_id<>11'
    results=IMOfficialDB.selectdb(selectsql)

    for result in results:
        classid=int(result[0])
        insertsql="""INSERT INTO public_db.tmp_con_tag (tag_id,tag_name,tag_frequency,class_id)
                    SELECT aa.tag_id,aa.tag_name,bb.num,bb.class_id FROM ebook_con.con_tag aa,
                    (SELECT a.tag_id,COUNT(*)num,b.class_id FROM ebook_con.con_tag_content a,
                    (SELECT book_id,book_name,book_tag,class_id FROM ebook_con.con_book WHERE class_id=%s)b
                    WHERE a.content_id=b.book_id GROUP BY a.tag_id)bb WHERE aa.tag_id=bb.tag_id ORDER BY bb.num desc limit 200"""%classid
        results=IMOfficialDB.insertdb(insertsql)


if __name__ == '__main__':
    ClassifiedTags()
示例#2
0
# -*- coding: utf-8 -*-
__author__ = 'lish'
import imdb
import os, sys
base_path = '/opt/www/api/attachment/imread/chapcontent/'
imreaddb = imdb.IMReadDB("100.98.73.21", 3306, "ebook", "4titbrVcvnP6LSFA")

selectsql = 'SELECT book_id from ebook_con.con_book where mcp_id is null and source_id=2 and word_count is null'
resultebids = imreaddb.selectdb(selectsql)

for resultebid in resultebids:
    bid = int(resultebid[0])
    print '正在更新图书bid:%s word_count字段内容!' % bid
    wordcount = 0
    selectsql = 'SELECT chapter_id from ebook_con.con_chapter where book_id=%s' % bid
    resultecids = imreaddb.selectdb(selectsql)
    for resultecid in resultecids:
        cid = int(resultecid[0])
        try:
            chapterpath = base_path + '%s/charpters/%s.txt' % (bid, cid)
            fr = open(chapterpath, 'r')
            conts = fr.readlines()

            for cont in conts:
                wordcount += len(cont.replace('\n', '').replace(' ', '')) / 3
        except:
            continue
    wordcount = str(float(wordcount) / 10000) + '万'

    if wordcount != '0.0万':
        updatesql = "update ebook_con.con_book set  word_count='%s' where book_id=%s" % (
示例#3
0
import imdb, imcrawl, os, sys
import ConfigParser

global base_url, base_path, imreaddb
conf_path = os.path.split(os.path.realpath(sys.argv[0]))[0] + '/'
cf = ConfigParser.ConfigParser()
cf.read(conf_path + "imopenapi.conf")

base_url = cf.get("prefixurl", "base_url")
base_path = cf.get("prefixpath", "base_path")

db_port = cf.getint("db", "db_port")
db_user = cf.get("db", "db_user")
db_host = cf.get("db", "db_host")
db_pass = cf.get("db", "db_pass")
imreaddb = imdb.IMReadDB(db_host, db_port, db_user, db_pass)


def crawlAPI(mcpid):
    selectsql = 'SELECT api_type,api_url from ebook_con.con_mcp_api where mcp_id=%s' % mcpid
    results = imreaddb.selectdb(selectsql)
    apiurls = {}
    for result in results:
        apiurls = dict(apiurls, **{result[0]: str(result[1])})

    yuemingapp = imxml.IMxmlAPI(apiurls)

    bids = yuemingapp.BookIds()
    selectsql = 'select source_bid from ebook_con.con_book where mcp_id=%s' % mcpid
    isOldSids = []
    results = imreaddb.selectdb(selectsql)
示例#4
0
#!/usr/bin/env python
# -*- coding:utf-8 -*-
__author__ = 'lish'

import os

import imdb
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
base_path = os.path.split(os.path.realpath(sys.argv[0]))[0]

formaldb = imdb.IMReadDB("123.56.138.94", 3307, "ebook", "4titbrVcvnP6LSFA",
                         "ebook_con")


#图书对应的标签列表[tag1,tag2,..]
def BooksToTag(tagid):
    # selectsql = 'SELECT content_id,tag_id FROM ebook_con.con_tag_content where tag_id = %d'%tagid
    selectsql = 'SELECT book_id,tag_id FROM public_db.tmp_con_tag_content where tag_id = %d' % tagid
    results = formaldb.selectdb(selectsql)
    BooksToTag = []
    for result in results:
        bid = int(result[0])
        # tagid = int(result[1])
        BooksToTag.append(bid)
    return BooksToTag


#相关度公式
def RelatedFormula(tag1, tag2):
示例#5
0
                records[(GNewid, Gid)] = Gvalue
        Gids.append(GNewid)
        # print Gids

    ##剔除Gconts中旧分类及分类规则键值对
    for key, value in Gconts.items():
        if isinstance(key, str):
            for oldG in value:
                del Gconts[oldG]

    return Gconts


if __name__ == '__main__':
    imformaldb = imdb.IMReadDB("123.56.138.94", 3307, "ebook",
                               "4titbrVcvnP6LSFA", "ebook_con")
    imstatisticaldb = imdb.IMReadDB("182.92.184.14", 3306, "cx_fujun",
                                    "fjfjie%mysql3", "ds_read")
    selectsql = 'SELECT book_id,SUM(read_uv) FROM ds_read.prd_bid_d WHERE stat_day>20161219 GROUP BY book_id'
    resluts = imstatisticaldb.selectdb(selectsql)
    resluts = [(int(reslut[0]), int(reslut[1])) for reslut in resluts]

    tagsdict = {}
    for result in resluts:
        selectsql = 'SELECT tag_id FROM ebook_con.con_tag_content WHERE content_id=%d' % result[
            0]
        tagids = imformaldb.selectdb(selectsql)
        for tagid in tagids:
            tagdictkey = int(tagid[0])
            if tagdictkey not in tagsdict.keys():
                tagsdict[tagdictkey] = int(result[1])
示例#6
0
#!/usr/bin/env python
# -*- coding:utf-8 -*-
__author__ = 'lish'
import urllib2
import jieba, json
import imdb
from collections import Counter
import sys
sys.path.append("../")
sys.setdefaultencoding('utf-8')
IMOfficialDB = imdb.IMReadDB("123.56.138.94", 3307, "ebook",
                             "4titbrVcvnP6LSFA")


##获取图书用于分词的内容
def segerateCont(bookid):
    introduceapi = 'http://readapi.imread.com/api/v1/book/introduce?bid=%s&spm=1.120.0.1&scm=1.320644' % bookid
    introducecont = urllib2.urlopen(introduceapi).read()
    introducecont = json.loads(introducecont)
    alltext = introducecont['book_brief'] + introducecont['tag']
    chapterlistapi = 'http://readapi.imread.com/api/v1/book/chapterlist?bid=%s&page=1&page_size=200000&order_type=asc&vt=9' % bookid
    chapterlistcont = urllib2.urlopen(chapterlistapi).read()
    chapterlistjsoncont = json.loads(chapterlistcont)
    for para in chapterlistjsoncont['chapterList']:
        if int(para['feeType']) == 0:
            chapterid = para['cid']
            # print chapterid
            chaptercontapi = 'http://readapi.imread.com/api/v2/chapter/2/%s/%s/index?auto_pay=0&cm=M3540030' % (
                bookid, chapterid)
            # print chaptercontapi
            chaptercontcont = urllib2.urlopen(chaptercontapi).read()