def handle_primary_user(self, user):
        start_time = self.env.now
        # Choose a random frequency channel
        channel = random.choice(range(NUMBER_OF_CHANNELS))

        # Get user that using this channel at the moment
        current_user = self.network.get_current_user(channel)
        if current_user is None:
            # Channel is idle, serve PU
            user.channel_id = channel
            content = Content.get_random(self.contents)
            yield from self.network.serve(user, Cache(content.id, CacheType.BASE, 25e6),
                                          PRIMARY_USER_DISTANCE)
            self.logger.new("SERVED", False, "PU", "BASE", start_time, self.env.now, None, None, channel)
        elif current_user.type == UserType.PRIMARY:
            # Channel is used by another PU, block coming PU
            user.print("Block f_" + str(channel + 1), 'red')
            self.logger.new("BLOCKED", False, "PU", "BASE", start_time, self.env.now, None, None, None)
        elif current_user.type == UserType.SECONDARY:
            # Channel is used by SU, drop SU, serve PU
            user.channel_id = channel
            content = Content.get_random(self.contents)
            user.print("Preempt f_" + str(channel + 1), 'blue')
            yield from self.network.serve(user, Cache(content.id, CacheType.BASE, 25e6),
                                          PRIMARY_USER_DISTANCE)
            self.logger.new("SERVED", False, "PU", "BASE", start_time, self.env.now, None, None, channel)
Пример #2
0
def upload_file():
    if request.method == 'POST':
        file = request.files['file']
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            content = Content(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            chars, locs, orgs = content.char_recognition()
            return render_template('uploading.html', name=filename, chars=chars, locs = locs, orgs = orgs)
        elif file and not allowed_file(file.filename):
        	return redirect('/bad_extension')
    return '''
Пример #3
0
 def __get_image_or_video(self, soup): #more like get video
     content = Content()
     video = soup.find('video')
     if video is not None:
         src = soup.findAll('source')[1]
         src = src.get("src")
         content.src = src
         content.type = 'video/mp4'
         thumbnail =  video.get("poster")
         content.thumbnail = self.website + thumbnail
         return content
     else:
         return None
Пример #4
0
 def __get_image_or_video(self, soup):
     content = Content()
     item = soup.find('img', {'class': 'post-image'})
     if item is not None:
         src = item.get('src')
         content.src = src
         if src.endswith(".gif"):
             content.type = "gif"
         else:
             content.type = "image"
         return content
     else:
         return None
Пример #5
0
def stat_user(search_time=None, force_update=False):
    """
    统计用户信息
    """
    if not search_time:
        search_time = datetime.datetime.now()
        search_time = "%s-%s-%s" % (now_time.year, now_time.month,
                                    now_time.day)
    content = Content.get(search_time)
    if not content:
        return
    #遍历所有用户
    for types, ids in content.weibo.iteritems():
        for id, context in ids.iteritems():
            user = Weibo.get(id)
            #如果还没有今天的统计就统计一下
            if not user.stat_info.get(search_time) or force_update:
                create_at = []
                for tmp_content in context:
                    if not tmp_content.get('created_at'): continue
                    print types, tmp_content['created_at']
                    create_at.append(
                        tmp_content['created_at'].split(' ')[3].split(':')[0])
                user.stat_info[search_time] = {}
                user.stat_info[search_time] = {
                    'send_count': len(context),
                    'create_at': create_at
                }
                #print ,create_atuser.stat_info[search_time]
                user.put()
Пример #6
0
    def __scrape(self, page_source, minimumUpvotes, minimumComments):

        results = []
        soup = BeautifulSoup(page_source, "html.parser")
        #save source
        #self.gather_web(soup.prettify())
        articles = soup.findAll("article", "badge-entry-container")
        for ele in articles:
            try:
                read_more_link = ele.find("a", {'class': 'post-read-more'})
                if read_more_link is not None:
                    continue
                upvotes = ele.find("span", {'class': 'badge-item-love-count'})
                comments = ele.find("a", {'class': 'comment'})
                if upvotes is not None:
                   likes = int(upvotes.text.replace(",", ""))
                   if likes > minimumUpvotes or \
                           (comments is not None and int(comments.text.replace(" comments", "")) > minimumComments):
                      title = ele.find("h2", {'class': 'badge-item-title'})
                      content = Content()
                      content = self.__get_image_or_video(ele)
                      if content is not None and title is not None:
                        src = content.src
                        post = PostModel(title.text, src, content.type, src, likes, content.thumbnail)
                        results.append(post)
            except Exception as ex:
                   print('Exception has occured when scraping data! ' + str(ex))
        return results
Пример #7
0
def stat_user(search_time=None,force_update=False):
    """
    统计用户信息
    """
    if not search_time:
        search_time = datetime.datetime.now()
        search_time = "%s-%s-%s"%(now_time.year,now_time.month,now_time.day)
    content = Content.get(search_time)
    if not content:
        return
    #遍历所有用户
    for types,ids in content.weibo.iteritems():
        for id,context in ids.iteritems():
            user = Weibo.get(id)
            #如果还没有今天的统计就统计一下
            if not user.stat_info.get(search_time) or force_update:
                create_at = []
                for tmp_content in context:
                    if not tmp_content.get('created_at'):continue
                    print types,tmp_content['created_at']
                    create_at.append(tmp_content['created_at'].split(' ')[3].split(':')[0])
                user.stat_info[search_time] = {}
                user.stat_info[search_time] = {'send_count':len(context),'create_at':create_at}
                #print ,create_atuser.stat_info[search_time]
                user.put()
Пример #8
0
    def __scrape(self, posts, minimumUpvotes):

        results = []
        ele = posts
        html = ele.get_attribute('innerHTML')
        soup = BeautifulSoup(html, "html.parser")
        try:
            upvotes = soup.find("div", {'class': 'ratingblock'})
            if upvotes is not None:
               parseRating = upvotes.text.split("Rating: ")[1].split("(")[0].split("/")[0] #ugly but easy.

               likes = float(parseRating)

               if likes > minimumUpvotes:
                  title = soup.find("h1")
                  content = Content()
                  content = self.__get_image_or_video(soup)
                  likes = int(likes * 1000) #it's rating 3.5/5
                  if content is not None and title is not None:
                    src = content.src
                    post = PostModel(title.text, src, content.type, src, likes, content.thumbnail)
                    results.append(post)
        except Exception as ex:
               print('Exception has occured when scraping data! ' + str(ex))
        return results
Пример #9
0
 def user_upvote_based_recommend(self, upvotes, target_content):
     """
     @data: data is lists of recommend contents's ids that can get from key of user id
     """
     engine = UserUpvoteBasedRecommendEngine(self)
     data = engine.run(Upvotes(upvotes), Content(target_content))
     return data
Пример #10
0
 def user_comment_based_recommend(self, comments, target_content):
     """
     @data: data is lists of recommend contents's ids that can get from key of user id
     """
     engine = UserCommentBasedRecommendEngine(self)
     data = engine.run(Contents(comments), Content(target_content))
     return data
Пример #11
0
 def user_viewed_based_recommend(self, viewHistories, target_content):
     """
     @data: data is lists of recommend contents's ids that can get from key of user id
     """
     engine = UserViewedBasedRecommendEngine(self)
     data = engine.run(ViewHistories(viewHistories), Content(target_content))
     return data
Пример #12
0
    def __scrape(self, posts, minimumUpvotes):
        results = []
        for ele in posts:
            html = ele.get_attribute('innerHTML')
            soup = BeautifulSoup(html, "html.parser")
            #self.gather_web(soup.prettify())
            try:
                upvotes = soup.find("div", {'class': 'score unvoted'})
                if upvotes is not None:
                    if upvotes.text == '•':
                        continue
                    likes = int(upvotes.text)

                    if likes > minimumUpvotes:
                        title = soup.find("a", {'class': 'title'})
                        content = Content()
                        content = self.__get_image_or_video(soup)
                        if content is not None and title is not None:
                            src = content.src
                            post = PostModel(title.text, src, content.type,
                                             src, likes, content.thumbnail)
                            results.append(post)
            except Exception as ex:
                print('Exception has occured when scraping data! ' + str(ex))
        return results
Пример #13
0
 def get_random(contents):
     """ Return random cache for initial filling"""
     content = Content.get_random(contents)
     if random.random() < .5:
         return Cache(content.id, CacheType.ENHANCEMENT,
                      content.enhancement)
     else:
         return Cache(content.id, CacheType.BASE, content.base)
Пример #14
0
def upload_file():
    if request.method == 'POST':
        file = request.files['file']
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            content = Content(
                os.path.join(app.config['UPLOAD_FOLDER'], filename))
            chars, locs, orgs = content.char_recognition()
            return render_template('uploading.html',
                                   name=filename,
                                   chars=chars,
                                   locs=locs,
                                   orgs=orgs)
        elif file and not allowed_file(file.filename):
            return redirect('/bad_extension')
    return '''
Пример #15
0
 def get(self):
     counts = {}
     counts["content"] = Content.all(keys_only=True).count()
     counts["image"] = Image.all(keys_only=True).count()
     counts["image_data"] = ImageData.all(keys_only=True).count()
     counts["link"] = Link.all(keys_only=True).count()
     counts["style"] = Style.all(keys_only=True).count()
     counts["workspace"] = Workspace.all(keys_only=True).count()
     self.response.out.write(str(counts) + "<p><form method='POST' action='/cleanup'><input type=submit value='Clean up'></form>")
Пример #16
0
 def post(self, args):
     ## Check the carousel exists
     carousel = Carousel.query.filter_by(name=args['carousel']).first()
     if carousel is None:
         abort(404, message="Carousel not found")
     ## Parse the parameters into mappable data
     contentData = {}
     contentData['name'] = args['name']
     contentData['content_type'] = args['type']
     contentData['content_location'] = args['location']
     contentData['slide_interval'] = args['interval']
     contentData['is_enabled'] = args['enabled']
     ## Create the content instance
     content = Content(**contentData)
     content.carousel_id = carousel.identifier
     ## Insert the Content instance into the database
     db.session.add(content)
     db.session.commit()
     ## Return that the resource has been created
     return "", 201
Пример #17
0
def when_get_contents_with_one_far_around_keyword_returns_no_result(app):
    # given
    content1 = Content(url="http://content1.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Can hipster-neo-farmers save the world ?")
    content2 = Content(url="http://content2.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Do we have enough quinoa for all the children ?")

    ApiHandler.save(content1, content2)

    # when
    contents = filter_contents_with_keywords('hipsterssss').all()

    # then
    assert len(contents) == 0
Пример #18
0
 def post(self):
     user = users.get_current_user()
     if user and user.nickname() == "coolcucumber":
         # Deletes all Datastore data!!!
         db.delete(Content.all(keys_only=True).fetch(None))
         db.delete(Image.all(keys_only=True).fetch(None))
         db.delete(ImageData.all(keys_only=True).fetch(None))
         db.delete(Link.all(keys_only=True).fetch(None))
         db.delete(Style.all(keys_only=True).fetch(None))
         db.delete(Workspace.all(keys_only=True).fetch(None))
         self.response.out.write("Cleaned up")
     else:
         self.response.out.write("Unauthorized user")
Пример #19
0
def when_get_contents_with_keyword_tag_returns_result(app):
    # given
    content1 = Content(url="http://content1.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Can hipster-neo-farmers save the world ?")
    content2 = Content(url="http://content2.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Do we have enough quinoa for all the children ?")
    tag1 = Tag(label="Climate")
    content_tag1 = ContentTag(content=content1, tag=tag1)

    ApiHandler.save(content1, content2, content_tag1)

    # when
    contents = filter_contents_with_keywords('clim').all()

    # then
    assert len(contents) == 1
    assert content1 in contents
Пример #20
0
 def __get_image_or_video(self, ele):
     content = Content()
     video = ele.find("source")
     if video is not None:
         content.type = 'video/mp4'
         content.src = video.get('src')
         thumbnail = ele.find("img", {'class': 'badge-item-img'})
         if thumbnail is not None:
             thumbnail = thumbnail.get('src')
             content.thumbnail = thumbnail
         else:
             content.thumbnail = ''
         return content
     else:
          image = ele.find("img", {'class': 'badge-item-img'})
          if image is not None:
             content.type = 'image'
             content.src = image.get('src')
             content.thumbnail = ''
             return content
          else: return None
Пример #21
0
def content_from_url(url, sync_crowdtangle=False, **kwargs):
    content = Content.create_or_modify({'__SEARCH_BY__': 'url', 'url': url})

    if sync_crowdtangle:
        attach_crowdtangle_entities_from_content(
            content, request_start_date='2019-09-01')

    trending = buzzsumo_trending_from_url(url, **kwargs)
    if trending:
        return content.modify(trending)

    if url:
        newspaper = newspaper_from_url(url, **kwargs)
        if newspaper:
            return content.modify(newspaper)

    content.urlNotFound = True
    return content
Пример #22
0
def attach_crowdtangle_entities_from_content(content, request_start_date):

    # create a "CrowdTangle" user to testify that these Facebook posts are connected to the url
    crowdtangle_user = User.create_or_modify({
        '__SEARCH_BY__': 'email',
        'email': "*****@*****.**",
        'password': "******",
        'firstName': "Crowd",
        'lastName': "Tangle"
    })

    # create the Facebook platform so we can link our Facebook posts media to it:
    facebook_platform = Platform.create_or_modify({
        '__SEARCH_BY__': 'name',
        'name': 'Facebook'
    })

    shares = shares_from_url(content.url, request_start_date)

    for share in shares:
        medium_group = Medium.create_or_modify({
            '__SEARCH_BY__': 'name',
            'platform': facebook_platform,
            **share['account']
        })

        content_post = Content.create_or_modify({
            '__SEARCH_BY__': 'url',
            'medium': medium_group,
            'type': ContentType.POST,
            **share['post']
        })

        crowdtangle_identifier = '{}_{}_{}'.format(
            content.id, content_post.crowdtangleIdentifier,
            crowdtangle_user.id)

        appearance = Appearance.create_or_modify({
            '__SEARCH_BY__': 'crowdtangleIdentifier',
            'crowdtangleIdentifier': crowdtangle_identifier,
            'quotedContent': content,
            'quotingContent': content_post,
            'testifier': crowdtangle_user
        })
Пример #23
0
 def __get_image_or_video(self, soup):
     content = Content()
     link = soup.find("a", {'class': 'title'})
     if link is not None:
         #expand = soup.find("div", {'class': 'expando-button'})  # this means it's a gif or video
         src = link.get('href')
         thumbnail_placeholder = soup.find("a", {'class': 'thumbnail'})
         thumbnail_src = ''
         if thumbnail_placeholder is not None:
             thumbnail_src = thumbnail_placeholder.find("img").get('src')
         if not src.strip().endswith(
             ('.jpg', '.jpeg', '.gif', '.png', '.bmp', '.tiff', '.tif',
              '.jpe', '.jfif')):
             content.src = src
             content.type = 'video/mp4'
             content.thumbnail = thumbnail_src
         else:
             content.src = link.get('href')
             content.type = 'image'
             content.thumbnail = thumbnail_src
     return content
Пример #24
0
 def extract(self):
     fs = glob.glob(file_path)
     content = Content(version_id=OID)
     for f in fs:
         cls = f.split('/')[-1].split('.')[0]
         if cls == 'Navi': continue
         if cls == "Configs":
             data = open(f).readlines()
             content.world = data[0]
             content.configs = data[1]
         elif cls == "CreatureType":
             data = [line.rstrip() for line in open(f).readlines()]
             content.creature_types = data
     content.store()
    def start(self):
        """ Runs simulation with given seed"""
        Simulation.semaphore.acquire()
        self.env = simpy.Environment()

        random.seed(self.seed)

        print(change_style("[Simulation #{}]".format(self.id), 'blue') + change_style(" Generating contents", "info"))
        self.contents = Content.generate()

        print(change_style("[Simulation #{}]".format(self.id), 'blue') + change_style(
            " Generating secondary users and fill up their caches",
            "info"))
        self.users = User.generate(self.env, self.contents)

        self.network = Network(self.env)
        # Create PU arrivals
        self.env.process(self.arrival_process(LAMBDA_PRIMARY_USER, UserType.PRIMARY))

        # Create SU arrivals
        self.env.process(self.arrival_process(LAMBDA_SECONDARY_USER, UserType.SECONDARY))

        print(change_style("[Simulation #{}]".format(self.id), 'blue') + change_style(" Starting", "info"))
        self.env.run(until=self.time)
        print(change_style("[Simulation #{}]".format(self.id), 'blue') + change_style(" Ending", "info"))

        self.logger.save()
        Simulation.semaphore.release()

        performance = self.calculate_performance()
        Simulation.lock.acquire()
        Simulation.performances['latency'] += performance['latency']
        Simulation.performances['p']['sq'] += performance['p']['sq']
        Simulation.performances['p']['hq']['base'] += performance['p']['hq']['base']
        Simulation.performances['p']['hq']['enh']['base_local_hit'] += performance['p']['hq']['enh']['base_local_hit']
        Simulation.performances['p']['hq']['enh']['base_d2d'] += performance['p']['hq']['enh']['base_d2d']
        Simulation.lock.release()
Пример #26
0
 def __scrape(self, posts, minimumUpvotes, __blank):
     results = []
     for ele in posts:
         html = ele.get_attribute('innerHTML')
         soup = BeautifulSoup(html, "html.parser")
         try:
             upvotes = soup.find("div", {'class': 'sharecounts'})
             if upvotes is not None:
                 upvotes = upvotes.p
             if upvotes is not None:
                 likes = int(
                     upvotes.text.replace(",", "").replace(" shares", ""))
                 if likes > minimumUpvotes:
                     title = soup.find("h2", {'class': 'post-title'})
                     content = Content()
                     content = self.__get_image_or_video(soup)
                     if content is not None and title is not None:
                         src = content.src
                         post = PostModel(title.text, src, content.type,
                                          src, likes)
                         results.append(post)
         except Exception as ex:
             print('Exception has occured when scraping data! ' + str(ex))
     return results
Пример #27
0
    def get(self, migration_type=None):
        current_user = users.get_current_user()
        if current_user.email() != '*****@*****.**':
            abort(403)
        else:
            if migration_type == 'clear_content':
                ndb.delete_multi(Curriculum.query().fetch(keys_only=True))

            elif migration_type == 'clear_teacher_courses':
                teachers = User.query()
                for teacher in teachers:
                    logging.info('clearing courses for %s' % teacher.key.id())
                    teacher.courses = []
                    teacher.put()
                    logging.info('Completed clearing courses for %s' % teacher.key.id())

            elif migration_type == 'course':
                courses = Content.query(Content.contentType == 'course')
                for course in courses:
                    if course.listed != 'done_migrating3':
                        try:
                            logging.info("Begin course migration for %s" % course.key.id())
                            app_user = course.key.parent().get()
                            teacher = get_user_by_google_id(app_user.googleID)
                            course_data = {
                                'teacher' : teacher.key.id(),
                                'title' : course.title,
                                'body' : course.body
                            }
                            new_course_id = new_course(course_data)
                            logging.info("Saved data for Curriculum ID: %s" % new_course_id)
                            units = Content.query(Content.contentType == 'unit', ancestor=course.key)
                            for unit in units:
                                logging.info("Begin unit migration for %s" % unit.key.id())
                                unit_data = {
                                    'teacher' : teacher.key.id(),
                                    'course' : new_course_id,
                                    'title' : unit.title,
                                    'body' : unit.body
                                }
                                new_unit_id = new_unit(unit_data)
                                logging.info("Saved data for Unit ID: %s" % new_unit_id)

                                lessons = Content.query(Content.contentType == 'lesson', ancestor=unit.key)
                                for lesson in lessons:
                                    logging.info("Begin lesson migration for %s" % lesson.key.id())
                                    lesson_data = {
                                        'teacher' : teacher.key.id(),
                                        'course' : new_course_id,
                                        'unit' : new_unit_id,
                                        'title' : lesson.title,
                                        'body' : lesson.body
                                    }
                                    lesson_id = new_lesson(lesson_data)
                                    logging.info("Saved data for Lesson ID: %s" % lesson_id)

                            course.listed = 'done_migrating3'
                            course.put()
                            logging.info("Finished course migration for %s" % course.key.id())
                        except Exception as e:
                            logging.info("Error migrating course %s" % course.key.id())
                            logging.info(str(e))


            return render_template(
                'migrate.html',
                status_msg = 'migration complete'
                )
Пример #28
0
import sys; import os
sys.path.insert(0, os.path.abspath('..'))
from models.content import Content


content = Content("https://en.wikipedia.org/wiki/The_Royal_Opera")
content.push()
Пример #29
0
import sys
import os

sys.path.insert(0, os.path.abspath('..'))
from models.content import Content

content = Content("https://en.wikipedia.org/wiki/The_Royal_Opera")
content.push()
Пример #30
0
def get_content(weibo_type,user_id,debug=False,count=200,force_update=False,content_type=0):
    """
    抓取微博内容
    weibo_type:微博类型  注意需要是已经存在的类别
    user_id:微博的id 注意这里不是微博名字 是微博id 
    debug:调试模式 不插入数据库
    force_update:强制更新  删除所有 重新获取
    content_type:0全部,1原创,2图片,3视频,4音乐
    """
    content_dict = {}
    #ty:时尚 美图 旅游 搞笑.....
    #用户id
    result = client.statuses.user_timeline.get(uid=user_id,count=count,feature=content_type)
    contents = dict(result)
    #遍历所有发的帖子 前100条
    for s_item in contents['statuses']:
        
        #可能是转帖 所以需要再取一次
        if not s_item.get('original_pic'):
            if s_item.get('retweeted_status',{}).get('original_pic'):
                s_item['original_pic'] = s_item['retweeted_status']['original_pic']
            else:
                #如果没有图片 就pass掉
                continue
            
        #filter列表包含这些内容不保存 可能是广告数据
        if "http://" in s_item['text'] or "包邮" in s_item['text']\
        or "去评论中找链接哦" in s_item['text']\
         or "www." in s_item['text'] or re.findall('[0-9]元',s_item['text'])\
         or s_item['text'].count(" ★") >= 3 or s_item['text'].count("(") >= 3\
         or s_item['text'].count(":") > 5 or s_item['text'].count("【") > 2\
         or s_item['text'].count("、") > 5 or '@' in s_item['text']\
         or '#' in s_item['text']:
            continue
        
        #gif图片单独存放
        if '.gif' in s_item.get('original_pic',''):
            response = urllib.urlopen(url=s_item['original_pic'])
            response_dict = dict(response.headers)
            file_size = response_dict.get('content-length')
            if file_size:
                #计算他是多少M的大小
                file_size = float(file_size) / 1000.0 / 1000.0
                file_size = decimal.Decimal(file_size).quantize(decimal.Decimal('0.0'))
                s_item['file_size'] = file_size
                
        #如果是检查视频微博 判断视频长度
        if content_type in [3,'3']:
            if 'http://' in s_item['text']:
                video_url = s_item['text']
            elif 'http://' in s_item['retweeted_status']['text']:
                video_url = s_item['retweeted_status']['text']
            video_index = b.index('http')
            #视频地址
            #视频片段有多少个
            s_item['video_url'] = video_url[video_index:].split(' ')[0]
            video_count = utils.get_video_count(s_item['video_url'])
            s_item['video_count'] = video_count
            print s_item['video_url'],video_count
            
        #判断字数小于5个字过滤
        if len(s_item['text'].decode('utf-8'))<= 5:
            continue
        
#        #计算图片的大小
#        if s_item.get('original_pic'):
#            response = urllib.urlopen(url=s_item['original_pic'])
#            img_data = response.read()
#            io = cStringIO.StringIO(img_data)
#            s_item['width'],s_item['height'] = Image.open(io).size
            
        #格式化时间  按照时间分开存放内容
        created_at = s_item['created_at'].split(' ')
        time_str = created_at[len(created_at)-1] + "-" + str(time_dict[created_at[1]]) + '-' + created_at[2]
        if time_str not in content_dict:
            content_dict[time_str] = {}
            
        #[时间][搞笑][周杰伦的微博的id]  注意是id哦~
        if user_id not in content_dict[time_str]:
            content_dict[time_str][user_id] = []
        need_data = {
                     'id':s_item['id'],
                     'screen_name':weibo_user.userids[int(user_id)],
                     'type':weibo_type,
                     'text':s_item['text'],
                     'bmiddle_pic':s_item.get('bmiddle_pic'),
                     'original_pic':s_item.get('original_pic'),
                     'thumbnail_pic':s_item.get('thumbnail_pic'),
                     'reposts_count':s_item.get('reposts_count'),
                     'comments_count':s_item.get('comments_count'),
                     'attitudes_count':s_item.get('attitudes_count'),
                     'mlevel':s_item.get('mlevel'),
                     'width':s_item.get('width'),
                     'height':s_item.get('height'),
                     'text_size':len(s_item['text'].decode('utf-8')),
                     'created_at':s_item['created_at'],
                     'file_size':s_item.get('file_size'),
                     'video_url':s_item.get('video_url'),
                     
                     'avatar_large':s_item.get('user',{}).get('avatar_large'),
                     'profile_image_url':s_item.get('user',{}).get('profile_image_url'),
                     
                     }
        #[时间][用户id] = [微博,微博,微博]
        content_dict[time_str][user_id].append(need_data)
        
    #按照时间分开存储 k:时间 :{用户id:[]}
    for k,v in content_dict.iteritems():
        cont_obj = Content.get(k)
        if not cont_obj:
            cont_obj = Content._install(k)
        #新添加类别 
        if weibo_type not in cont_obj.weibo:
            cont_obj.weibo[weibo_type] = v
        else:
            #有可能内容已经存在 u_id:用户id item_value:帖子集合[]
            for u_id,item_value in v.iteritems():
                #如果没用该用户的信息 创建
                if u_id not in cont_obj.weibo[weibo_type] or force_update:
                    cont_obj.weibo[weibo_type][u_id] = []
                    cont_obj.weibo[weibo_type][u_id] = item_value
                else:
                    
                #如果有该用户信息 需要判断是否有重复内容
                    now_ids = [va['id'] for va in cont_obj.weibo[weibo_type][u_id]]
                    for cont in item_value:
                        if cont['id'] not in now_ids:
                            cont_obj.weibo[weibo_type][u_id].append(cont)
        if not debug:
            a = time.time()
            cont_obj.put()
            print 'result',time.time()-a
Пример #31
0
#-*- coding: utf-8 -*-
Пример #32
0
def add_page():
    form = EditPageForm()

    if form.validate_on_submit():
        page = Content()

        page.title = form.title.data
        page.content_type = 'page'
        page.url = form.url.data.lower()
        page.data_blob = form.content.data
        page.created_by = session['user'].id
        page.created_on = datetime.now()
        page.edited_by = -1
        page.edited_on = datetime.utcfromtimestamp(0)
        page.required_priv_level = form.level.data
        page.show_in_nav = form.navigation.data

        db.session.add(page)
        db.session.commit()

        flash('Page "' + page.title + '" created.')

        return redirect('/admin/content')

    return render_template('admin/content/edit_page.tmpl', action='Creating New', title='Create Page', form=form)
Пример #33
0
def appearance_from_row(row, unused_index=None):
    reviewed_items = row.get('Item reviewed')
    if not reviewed_items:
        return None

    quoting_content = Content.create_or_modify({
        '__SEARCH_BY__': 'url',
        'url': row['url'].strip()
    })
    medium_science_feedback_ids = row.get('Outlet')
    if medium_science_feedback_ids:
        medium = Medium.query.filter_by(
            scienceFeedbackIdentifier=medium_science_feedback_ids[0]).first()
        quoting_content.mediumId = medium.id

    author_science_feedback_ids = row.get('Authors')
    if author_science_feedback_ids:
        for author_science_feedback_id in author_science_feedback_ids:
            author = User.query.filter_by(
                scienceFeedbackIdentifier=author_science_feedback_id).first()
            author_content = AuthorContent.create_or_modify({
                '__SEARCH_BY__': ['authorId', 'contentId'],
                'authorId': humanize(author.id),
                'contentId': humanize(quoting_content.id)
            })
            quoting_content.authorContents = quoting_content.authorContents + [author_content]

    quoted_claim = Claim.query.filter_by(
        scienceFeedbackIdentifier=reviewed_items[0]).first()
    quoted_content = None
    if not quoted_claim:
        quoted_content = Content.query.filter_by(
            scienceFeedbackIdentifier=reviewed_items[0]).first()
    if not quoted_claim and not quoted_content:
        return None

    testifier_science_feedback_ids = row.get('Verified by')
    if not testifier_science_feedback_ids:
        return None
    testifier = User.query.filter_by(
        scienceFeedbackIdentifier=testifier_science_feedback_ids[0]).first()
    if not testifier:
        return None

    if IS_DEVELOPMENT:
        quoting_content.externalThumbUrl = API_URL + '/static/logo.png' if IS_DEVELOPMENT else None
        quoting_content.title = "/".join(quoting_content.url
                                                        .replace('http://', '') \
                                                        .replace('https://', '') \
                                                        .split('/')[-2:]) \
                                   .replace('-', ' ')

    appearance_dict = {
        '__SEARCH_BY__': 'scienceFeedbackIdentifier',
        'quotedClaim': quoted_claim,
        'quotedContent': quoted_content,
        'quotingContent': quoting_content,
        'scienceFeedbackIdentifier': row['airtableId'],
        'testifier': testifier
    }

    return Appearance.create_or_modify(appearance_dict)
    def handle_secondary_user(self, user):
        start_time = self.env.now
        # Check if it's already in system
        if user.serving:
            user.print("Blocked already serving", 'red')
            self.logger.new("BLOCKED", False, "SU", "BASE", start_time, self.env.now, None, None, None)
            return
        # Get idle channel for SU
        idle_channel = self.network.get_idle_channel()

        if idle_channel is not None:
            # We found idle channel
            content = Content.get_random(self.contents)
            user.channel_id = idle_channel

            success = True
            blocked = False
            is_hq = random.random() < PROBABILITY_HQ
            prev_event_id = None

            # Create cache instance from random content
            cache_base = Cache(content.id, CacheType.BASE, content.base)
            if user.is_cached(cache_base):
                # Has base layer of content at our device
                user.print("Local hit " + str(cache_base), 'green')
                user.used_cache(cache_base)  # Change LFU and LRU values
                prev_event_id = self.logger.new("LOCAL HIT", is_hq, "SU", "BASE", start_time, self.env.now, user.id,
                                                user.id,
                                                None)
            else:
                # Looks for base layer to other users
                source = user.look_other_users(cache_base, self.users)
                if source is None:
                    self.logger.new("BLOCKED", is_hq, "SU", "BASE", start_time, self.env.now, None, None, None)
                    user.print("Not find cache " + str(cache_base), 'red')
                    success = False
                    blocked = True
                else:
                    user.print("Found " + str(cache_base) + " at " + str(source), 'blue')
                    success = yield from self.network.serve(user, cache_base,
                                                            user.position.distance(source.position))
                    prev_event_id = self.logger.new("SERVED" if success else "DROPPED", is_hq, "SU", "BASE",
                                                    start_time,
                                                    self.env.now,
                                                    source.id, user.id, user.channel_id)
                    if success:
                        user.store_cache(cache_base, self.users, self.contents)
            if is_hq:
                # Look for enh layer after base is finished
                start_time = self.env.now
                if success:
                    # Download base layer successfully
                    cache_enhancement = Cache(content.id, CacheType.ENHANCEMENT, content.enhancement)
                    if user.is_cached(cache_enhancement):
                        # Has enh layer of content at our device
                        user.print("Local hit " + str(cache_enhancement), 'green')
                        self.logger.new("LOCAL HIT", is_hq, "SU", "ENH", start_time, self.env.now, user.id, user.id,
                                        None,
                                        prev_event_id)
                    else:
                        source = user.look_other_users(cache_enhancement, self.users)
                        if source is None:
                            self.logger.new("BLOCKED", is_hq, "SU", "ENH", start_time, self.env.now, None, None,
                                            None)
                            user.print("Not find cache " + str(cache_enhancement), 'red')
                        else:
                            user.print("Found " + str(cache_enhancement) + " at " + str(source), 'blue')

                            success = yield from self.network.serve(user, cache_enhancement,
                                                                    user.position.distance(source.position))
                            self.logger.new("SERVED" if success else "DROPPED", is_hq, "SU", "ENH", start_time,
                                            self.env.now,
                                            source.id, user.id, user.channel_id, prev_event_id)
                            user.store_cache(cache_enhancement, self.users, self.contents)

                else:
                    # Couldn't download base layer successfully
                    self.logger.new("BLOCKED" if blocked else "DROPPED", is_hq, "SU", "ENH", start_time,
                                    self.env.now,
                                    None, user.id, user.channel_id, prev_event_id)
        else:
            # We couldn't find idle channel, block coming SU
            user.print("No idle channel", 'red')
            self.logger.new("BLOCKED", False, "SU", "BASE", start_time, self.env.now, None, None, None)
Пример #35
0
from utils.fetchUtil import handleListRequest
from models.content import Content
from models.message import Message
from models.comment import Comment

from requests.auth import HTTPBasicAuth

bauth = HTTPBasicAuth('user', 'pass')
url = "https://organization-name.jiveon.com/api/core/v3/places/72743/contents?count=100"

handleListRequest(url, Content.parse_response, auth=bauth)

apiUrl = "https://organization-name.jiveon.com/api/core/v3"

query = Content.select().where(Content.id >= 0).order_by(Content.id)

print(len(query))

for content in query:
    print(content.id, content.subject)
    if content.type == "discussion":
        handleListRequest(apiUrl + '/messages/contents/' +
                          str(content.content_id),
                          Message.parse_response,
                          auth=bauth)
    elif content.type == "poll" \
        or content.type == "idea" \
        or content.type == "document" \
        or content.type == "update" \
        or content.type == "file" \
        or content.type == "post":
Пример #36
0
def create_contents(with_capture=False):
    logger.info('create_contents')

    contents = []

    contents.append(Content(
        authors='James Delingpole',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        publishedDate=strftime(TODAY),
        summary='Like the thriving polar bear, like the recovering ice caps, like the doing-just-fine Pacific islands, the Great Barrier Reef has become a totem for the liberal-left not because it’s in any kind of danger but because it’s big and famous and photogenic and lots and lots of people would be really sad if it disappeared.',
        tags='great barrier;',
        theme='Climate',
        title='Great Barrier Reef Still Not Dying, Whatever Washington Post Says…',
        url='https://www.breitbart.com/big-government/2017/03/20/delingpole-great-barrier-reef-still-not-dying-whatever-washington-post-says'
    ))

    contents.append(Content(
        authors='David Rose',
        externalThumbUrl=API_URL + '/static/logo.png',
        summary='The Mail on Sunday TODAY reveals astonishing evidence that the organisation that is the world’s leading source of climate data rushed to publish a landmark paper that exaggerated global warming and was timed to influence the historic Paris Agreement on climate change.',
        tags='data',
        title='Daily Mail inflates disagreement between scientists about data handling to make unsupported accusation of data manipulation',
        url='http://www.dailymail.co.uk/sciencetech/article-4192182/World-leaders-duped-manipulated-global-warming-data.html'
    ))

    contents.append(Content(
        authors='Chris Mooney',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        publishedDate=strftime(TODAY + timedelta(days=-1)),
        summary='A large research synthesis, published in one of the world’s most influential scientific journals, has detected a decline in the amount of dissolved oxygen in oceans around the world — a long-predicted result of climate change that could have severe consequences for marine organisms if it continues.',
        tags='ocean;oxygen',
        title='Scientists have just detected a major change to the Earth’s oceans linked to a warming climate',
        url='https://www.washingtonpost.com/news/energy-environment/wp/2017/02/15/its-official-the-oceans-are-losing-oxygen-posing-growing-threats-to-marine-life'
    ))

    contents.append(Content(
        authors='Pascal Santi;Sandrine Cabut',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        summary='Selon plusieurs organisations, les recommandations de prise en charge de cette infection bactérienne sont inadaptées.',
        tags='lyme;',
        theme='Health',
        title='Maladie de Lyme : fronde contre la Haute Autorité de santé',
        url='https://www.lemonde.fr/sciences/content/2018/07/24/maladie-de-lyme-fronde-contre-la-haute-autorite-de-sante_5335369_1650684.html'
    ))

    contents.append(Content(
        authors='C. Le Quéré1, R. Moriarty1, R. M. Andrew2, G. P. Peters2, P. Ciais3, P. Friedlingstein4, S. D. Jones1, S. Sitch5, P. Tans6, A. Arneth7, T. A. Boden8, L. Bopp3, Y. Bozec9,10, J. G. Canadell11, L. P. Chini12, F. Chevallier3, C. E. Cosca13, I. Harris14, M. Hoppema15, R. A. Houghton16, J. I. House17, A. K. Jain18, T. Johannessen19,20, E. Kato21,22, R. F. Keeling23, V. Kitidis24, K. Klein Goldewijk25, C. Koven26, C. S. Landa19,20, P. Landschützer27, A. Lenton28, I. D. Lima29, G. Marland30, J. T. Mathis13, N. Metzl31, Y. Nojiri21, A. Olsen19,20, T. Ono32, S. Peng3, W. Peters33, B. Pfeil19,20, B. Poulter34, M. R. Raupach35,†, P. Regnier36, C. Rödenbeck37, S. Saito38, J. E. Salisbury39, U. Schuster5, J. Schwinger19,20, R. Séférian40, J. Segschneider41, T. Steinhoff42, B. D. Stocker43,44, A. J. Sutton13,45, T. Takahashi46, B. Tilbrook47, G. R. van der Werf48, N. Viovy3, Y.-P. Wang49, R. Wanninkhof50, A. Wiltshire51, and N. Zeng',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        summary='Accurate assessment of anthropogenic carbon dioxide (CO2) emissions and their redistribution among the atmosphere, ocean, and terrestrial biosphere is important to better understand the global carbon cycle, support the development of climate policies, and project future climate change. Here we describe data sets and a methodology to quantify all major components of the global carbon budget, including their uncertainties, based on the combination of a range of data, algorithms, statistics, and model estimates and their interpretation by a broad scientific community. We discuss changes compared to previous estimates, consistency within and among components, alongside methodology and data limitations. CO2 emissions from fossil fuel combustion and cement production (EFF) are based on energy statistics and cement production data, respectively, while emissions from land-use change (ELUC), mainly deforestation, are based on combined evidence from land-cover-change data, fire activity associated with deforestation, and models. The global atmospheric CO2 concentration is measured directly and its rate of growth (GATM) is computed from the annual changes in concentration. The mean ocean CO2 sink (SOCEAN) is based on observations from the 1990s, while the annual anomalies and trends are estimated with ocean models. The variability in SOCEAN is evaluated with data products based on surveys of ocean CO2 measurements. The global residual terrestrial CO2 sink (SLAND) is estimated by the difference of the other terms of the global carbon budget and compared to results of independent dynamic global vegetation models forced by observed climate, CO2, and land-cover-change (some including nitrogen–carbon interactions). We compare the mean land and ocean fluxes and their variability to estimates from three atmospheric inverse methods for three broad latitude bands. All uncertainties are reported as ±1σ, reflecting the current capacity to characterise the annual estimates of each component of the global carbon budget. For the last decade available (2004–2013), EFF was 8.9 ± 0.4 GtC yr−1, ELUC 0.9 ± 0.5 GtC yr−1, GATM 4.3 ± 0.1 GtC yr−1, SOCEAN 2.6 ± 0.5 GtC yr−1, and SLAND 2.9 ± 0.8 GtC yr−1. For year 2013 alone, EFF grew to 9.9 ± 0.5 GtC yr−1, 2.3% above 2012, continuing the growth trend in these emissions, ELUC was 0.9 ± 0.5 GtC yr−1, GATM was 5.4 ± 0.2 GtC yr−1, SOCEAN was 2.9 ± 0.5 GtC yr−1, and SLAND was 2.5 ± 0.9 GtC yr−1. GATM was high in 2013, reflecting a steady increase in EFF and smaller and opposite changes between SOCEAN and SLAND compared to the past decade (2004–2013). The global atmospheric CO2 concentration reached 395.31 ± 0.10 ppm averaged over 2013. We estimate that EFF will increase by 2.5% (1.3–3.5%) to 10.1 ± 0.6 GtC in 2014 (37.0 ± 2.2 GtCO2 yr−1), 65% above emissions in 1990, based on projections of world gross domestic product and recent changes in the carbon intensity of the global economy. From this projection of EFF and assumed constant ELUC for 2014, cumulative emissions of CO2 will reach about 545 ± 55 GtC (2000 ± 200 GtCO2) for 1870–2014, about 75% from EFF and 25% from ELUC. This paper documents changes in the methods and data sets used in this new carbon budget compared with previous publications of this living data set (Le Quéré et al., 2013, 2014). All observations presented here can be downloaded from the Carbon Dioxide Information Analysis Center (doi:10.3334/CDIAC/GCP_2014).',
        tags='carbon;PeerVerified',
        title='Global carbon budget 2014',
        url='https://www.earth-syst-sci-data.net/7/47/2015/essd-7-47-2015.html'
    ))

    contents.append(Content(
        authors='Clarisse Fabre',
        isReviewable=False,
        externalThumbUrl=API_URL + '/static/logo.png',
        publishedDate=strftime(TODAY + timedelta(hours=-2)),
        summary='C’est l’histoire d’un garçon qui voulait être Iggy Pop. A Mulhouse, dans les années 1980, il s’imaginait torse nu, le pantalon taille basse, électrisant les foules et se roulant par terre. Mais le rêve post-punk s’est dissous dans les paillettes des combinaisons disco. Et Fred Poulet s’est mis à écrire des chansons, tout en gagnant sa vie comme peintre sur des tournages de film. « C’est pour continuer à rêver que j’écris depuis une trentaine d’années. C’est un peu l’histoire de ma vie », résume le chanteur, emmitouflé dans son imperméable. A 57 ans,il revendique « la désinvolture » comme attitude, au sens de la liberté et de l’élégance.',
        tags='KFC;OnEstChampion;',
        title='Cocorico, Fred Poulet revient à la chanson',
        url='https://www.lemonde.fr/cinema/content/2019/01/10/cocorico-fred-poulet-revient-a-la-chanson_5407141_3476.html'
    ))

    ApiHandler.save(*contents)

    if with_capture:
        for content in contents:
            if content.url:
                logger.info('capture screenshot for {}...'.format(content.url))
                thumb = capture(content.url)
                save_thumb(content, thumb, 0)
                logger.info('capture screenshot for {}...Done.'.format(content.url))

    logger.info('created {} contents'.format(len(contents)))
from models.message import Message
from models.comment import Comment
from models.content import Content

messages = Message.select().order_by(Message.id)

comments = Comment.select().order_by(Comment.id)

contents = Content.select().order_by(Content.id)

for message in messages:
    print(str(message))

for comment in comments:
    print(str(comment))

for content in contents:
    print(str(content))
Пример #38
0
#! /usr/bin/env python

from models.source import Source
from models.content import Content
import random


# Main
if __name__ == "__main__":
    source = Source()
    content = Content(source.url)
    content.save()

    content.push()
Пример #39
0
#-*- coding: utf-8 -*-