Пример #1
0
 def __init__(self):
     # host是redis主机,需要redis服务端和客户端都启动
     self.pool = redis.ConnectionPool(
         host=mod_config.get_config("redis", "redis_host"),
         port=mod_config.get_config("redis", "redis_port"),
         decode_responses=True)
     self.r = redis.Redis(connection_pool=self.pool)
Пример #2
0
 def __init__(self):
     self.bucket_name = mod_config.get_config('aliyun_oss', 'bucket_name')
     self.access_key = mod_config.get_config('aliyun_oss', 'access_key')
     self.access_secret_key = mod_config.get_config('aliyun_oss', 'access_secret_key')
     self.http_prefix = mod_config.get_config('aliyun_oss', 'http_prefix')
     self.auth = oss2.Auth(self.access_key, self.access_secret_key)
     self.bucket = oss2.Bucket(self.auth, self.http_prefix, self.bucket_name)
Пример #3
0
 def __init__(self, table_name):
     """
     连接MongoClient
     由3种方法可以选择,看使用情况
     """
     self.client = MongoClient(
         mod_config.get_config("database", "dbhost"),
         int(mod_config.get_config("database", "dbport")))
     self.db = self.client[mod_config.get_config("database", "dbname")]
     self.table = self.db[table_name]
Пример #4
0
class JokeNewsEngine(Document):
    id_str = StringField(required=True, unique=True)
    favorite_count = IntField(required=True)  # 收藏数
    go_detail_count = IntField(required=True)  # 点击量
    comment_count = IntField(required=True)  # 评论数
    share_count = IntField(required=True)  # 分享数
    bury_count = IntField(required=True)  # 不喜欢数
    digg_count = IntField(required=True)  # 喜欢数
    online_time = IntField(required=True)  # 时间戳
    ori_data = DictField(required=True)
    meta = {'collection': mod_config.get_config('mongodb', 'collection')}

    @classmethod
    def create_joke_obj(cls, ori_data):
        try:
            cls(id_str=ori_data['group']['id_str'], favorite_count=ori_data['group']['favorite_count'], comment_count=ori_data['group']['comment_count'], go_detail_count=ori_data['group']['go_detail_count'],
                share_count=ori_data['group']['share_count'], bury_count=ori_data['group']['bury_count'], digg_count=ori_data['group']['digg_count'], online_time=ori_data['online_time'], ori_data=ori_data).save()

        except Exception as e:
            print "news object save error: ", e
            s = str(e)
            mongoDBControllerErrorMessage = get_logger('mongoDBController.log')
            mongoDBControllerErrorMessage.error('joke news object save error: ' + s)

    # 检查此新闻数据库里面是否已经存在
    @classmethod
    def check_joke_obj(cls, ori_data):
        obj = cls.objects(id_str=ori_data['group']['id_str']).first()

        if obj:
            return True
        else:
            return False
Пример #5
0
class RecommendTypedNewsEngine(Document):
    group_id = StringField(required=True, unique=True)
    news_tag = StringField(unique=False)    # 新闻类型
    comment_count = IntField(unique=False)  # 评论数
    behot_time = IntField(unique=False)     # 新闻时间
    ori_data = DictField(required=True)     # 其余项
    meta = {'collection': mod_config.get_config('mongodb', 'collection')}

    @classmethod
    def create_regular_obj(cls, ori_data):
        try:
            cls(group_id=ori_data['group_id'], news_tag=ori_data['tag'], comment_count=ori_data['comments_count'], behot_time=ori_data['behot_time'], ori_data=ori_data).save()

        except Exception as e:
            print "news object save error: ", e
            s = str(e)
            mongoDBControllerErrorMessage = get_logger('mongoDBController.log')
            mongoDBControllerErrorMessage.error('recommend-typed news object save error: ' + s)

    @classmethod
    def check_regular_obj(cls, ori_data):
        obj = cls.objects(group_id=ori_data['group_id']).first()

        if obj:
            return True
        else:
            return False
Пример #6
0
def main():
    """
    输入想要查询的新闻时间戳和类型
    时间戳形式:2017-03-17 20:05:32
    新闻类型:'recommend','hot','image','joke','society','entertainment','tech','sports','car','finance','funny'
    """
    hot_timeStamp = mod_config.get_config(
        'crawler', 'timeStamp'
    )  # 输入想要查询的新闻时间戳 '0'表示最新新闻,'2017-03-15 18:23:05'表示从当前时间的新闻一直记录到目标时间点
    newsType = mod_config.get_config('crawler', 'newsType')  # 输入想要查询的新闻类型
    url = TouTiaoNewsURL.getTargetURL(newsType, '0')  # 获得目标url
    print 'url get: ', url
    """
    获取MongoDB用户端口,连接mongodb
    """
    mongoDataBase = mod_config.get_config('mongodb', 'database_name')
    mongoHost = mod_config.get_config('mongodb', 'host')
    mongoPort = mod_config.get_config('mongodb', 'port')
    """
    连接数据库
    """
    MongoDBController(mongoDataBase, mongoHost,
                      int(mongoPort)).connectToMongoDB()
    """
    此处可以调用爬虫api来控制爬取新闻的类型
    """
    crawler = CrawlerLoopControl()
    print 'crawler control constructed'
    if mod_config.get_config('crawler', 'crawlerMode') == 'timestamp':
        crawler.getTimeStampNewsLooply(hot_timeStamp, newsType, url)
    elif mod_config.get_config('crawler', 'crawlerMode') == 'newest':
        crawler.getRecentNewsLooply(newsType, url)
Пример #7
0
def start_api_tkdata():
    WSGIServer(myapp,
               bindAddress=(mod_config.get_config("server", "server_host"),
                            int(mod_config.get_config("server",
                                                      "tk_data_port")))).run()