def rebuild_love_count(): utc_dt = datetime.datetime.utcnow() - datetime.timedelta( days=7) # rebuild last week and this week week_start, _ = utc_week_limits(utc_dt) set_toggle_state(LOVE_SENDING_ENABLED, False) logging.info('Deleting LoveCount table... {}MB'.format( memory_usage().current())) ndb.delete_multi( LoveCount.query(LoveCount.week_start >= week_start).fetch( keys_only=True)) employee_dict = {employee.key: employee for employee in Employee.query()} logging.info('Rebuilding LoveCount table... {}MB'.format( memory_usage().current())) cursor = None count = 0 while True: loves, cursor, has_more = Love.query( Love.timestamp >= week_start).fetch_page(500, start_cursor=cursor) for l in loves: LoveCount.update(l, employee_dict=employee_dict) count += len(loves) logging.info('Processed {} loves, {}MB'.format( count, memory_usage().current())) if not has_more: break logging.info('Done. {}MB'.format(memory_usage().current())) set_toggle_state(LOVE_SENDING_ENABLED, True)
def _index_employees(employees): logging.info('Indexing employees... {}MB'.format(memory_usage().current())) index = search.Index(name=INDEX_NAME) # According to appengine, put can handle a maximum of 200 documents, # and apparently batching is more efficient for chunk_of_200 in chunk(employees, 200): documents = [] for employee in chunk_of_200: if employee is not None: # Gross hack to support prefix matching, see documentation for _generate_substrings substrings = u' '.join([ _generate_substrings(employee.first_name), _generate_substrings(employee.last_name), _generate_substrings(employee.username), ]) doc = search.Document(fields=[ # Full name is already unicode search.TextField(name='full_name', value=employee.full_name), search.TextField(name='username', value=unicode(employee.username)), search.TextField(name='substrings', value=substrings), ]) documents.append(doc) index.put(documents) logging.info('Done indexing employees. {}MB'.format( memory_usage().current()))
def get(self): repo = self.request.get('repo') number = self.request.get('number') if self.request.get('format') == 'json': ancestor = models.GithubResource.make_key(repo, number) events = list(models.GithubWebhookRaw.query(ancestor=ancestor)) self.response.headers['content-type'] = 'application/json' self.response.write(json.dumps([e.body for e in events], indent=True)) return self.response.write( '<style>td pre{max-height:200px;max-width:800px;overflow:scroll}</style>') self.response.write('<p>Memory: %s' % memory_usage().current()) self.emit_classified(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if self.request.get('classify_only'): return merged = self.emit_events(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if 'head' in merged: sha = merged['head']['sha'] results = models.GHStatus.query_for_sha(repo, sha) self.response.write('</table><table>') for res in results: self.response.write('<tr><td>%s<td>%s<td><a href="%s">%s</a>\n' % (res.context, res.state, res.target_url, res.description)) models.shrink(merged) self.response.write('</table><pre>%s</pre>' % cgi.escape( json.dumps(merged, indent=2, sort_keys=True))) self.response.write('<p>Memory: %s' % memory_usage().current())
def _clear_index(): logging.info('Clearing index... {}MB'.format(memory_usage().current())) index = search.Index(name=INDEX_NAME) last_id = None while True: # We can batch up to 200 doc_ids in the delete call, and # batching is better according to the docs. Because we're deleting # async, we need to keep track of where we left off each time # we do get_range use_start_object = False if last_id is None: use_start_object = True doc_ids = [ doc.doc_id for doc in index.get_range( ids_only=True, limit=200, start_id=last_id, include_start_object=use_start_object, ) ] if not doc_ids: break last_id = doc_ids[-1] index.delete(doc_ids) logging.info('Done clearing index. {}MB'.format(memory_usage().current()))
def post(self): active_subs = Subscription.get_active_subscriptions() items = json.loads(self.request.get('items')) logging.debug('before parsing, memory: %s' % runtime.memory_usage().current()) parser = RentParser() parsed_items = [] for item in items: try: parsed = parser.parse(item) ret = RentRecord.add_record(parsed) except Exception as e: logging.error(repr(e)) parsed_items.append(parsed) logging.debug('after parsing, memory: %s' % runtime.memory_usage().current()) user2message = filter_items(parsed_items, active_subs) for user, item in user2message.items(): logging.debug('user: %s has %d messages' % (user, len(item))) User.update_user_items(user, item) url = get_short_url(user) if not url: url = site_config.url + '?user=%s' % user msg = [u'新找到%d条租房信息。' % len(item), u'点击以下链接查看:', url] messenger.send_message(user, '\n'.join(msg))
def wrapper(*args, **kwargs): logging.info('Memory before method %s is %s.', method.__name__, runtime.memory_usage().current()) result = method(*args, **kwargs) logging.info('Memory after method %s is %s', method.__name__, runtime.memory_usage().current()) return result
def get(self): repo = self.request.get('repo') number = self.request.get('number') if self.request.get('format') == 'json': ancestor = models.GithubResource.make_key(repo, number) events = list(models.GithubWebhookRaw.query(ancestor=ancestor)) self.response.headers['content-type'] = 'application/json' self.response.write( json.dumps([e.body for e in events], indent=True)) return self.response.write( '<style>td pre{max-height:200px;max-width:800px;overflow:scroll}</style>' ) self.response.write('<p>Memory: %s' % memory_usage().current()) self.emit_classified(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if self.request.get('classify_only'): return merged = self.emit_events(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if 'head' in merged: sha = merged['head']['sha'] results = models.GHStatus.query_for_sha(repo, sha) self.response.write('</table><table>') for res in results: self.response.write( '<tr><td>%s<td>%s<td><a href="%s">%s</a>\n' % (res.context, res.state, res.target_url, res.description)) models.shrink(merged) self.response.write( '</table><pre>%s</pre>' % cgi.escape(json.dumps(merged, indent=2, sort_keys=True))) self.response.write('<p>Memory: %s' % memory_usage().current())
def _update_employees(employee_dicts): """Given a JSON string in the format "[{employee info 1}, {employee info 2}, ...]", create new employee records and update existing records as necessary. Then determine whether any employees have been terminated since the last update, and mark these employees as such. """ logging.info('Updating employees... {}MB'.format(memory_usage().current())) db_employee_dict = { employee.username: employee for employee in Employee.query() } all_employees, new_employees = [], [] current_usernames = set() for d in employee_dicts: existing_employee = db_employee_dict.get(d['username']) if existing_employee is None: new_employee = Employee.create_from_dict(d, persist=False) all_employees.append(new_employee) new_employees.append(new_employee) else: existing_employee.update_from_dict(d) # If the user is in the S3 dump, then the user is no longer # terminated. existing_employee.terminated = False all_employees.append(existing_employee) current_usernames.add(d['username']) if len(all_employees) % 200 == 0: logging.info('Processed {} employees, {}MB'.format( len(all_employees), memory_usage().current())) ndb.put_multi(all_employees) # Figure out if there are any employees in the DB that aren't in the S3 # dump. These are terminated employees, and we need to mark them as such. db_usernames = set(db_employee_dict.keys()) terminated_usernames = db_usernames - current_usernames terminated_employees = [] for username in terminated_usernames: employee = db_employee_dict[username] employee.terminated = True terminated_employees.append(employee) ndb.put_multi(terminated_employees) logging.info('Done updating employees. {}MB'.format( memory_usage().current()))
def fashionista_register_shutdown_hook(): apiproxy_stub_map.apiproxy.CancelApiCalls() # We can save state to datastore here or Log some statistics. logThis(AEL_LEVEL_WARNING, 'SHUTDOWN IN PROGRESS...') logThis(AEL_LEVEL_INFO,'CPU USAGE: %s' % runtime.cpu_usage()) logThis(AEL_LEVEL_INFO,'MEMORY USAGE: %s' % runtime.memory_usage())
def fashionista_register_shutdown_hook(): apiproxy_stub_map.apiproxy.CancelApiCalls() # We can save state to datastore here or Log some statistics. logThis(AEL_LEVEL_WARNING, 'SHUTDOWN IN PROGRESS...') logThis(AEL_LEVEL_INFO, 'CPU USAGE: %s' % runtime.cpu_usage()) logThis(AEL_LEVEL_INFO, 'MEMORY USAGE: %s' % runtime.memory_usage())
def post(self): tx = self.request.get_range('x', None) ty = self.request.get_range('y', None) z = self.request.get_range('z', None) limit = self.request.get_range('limit', min_value=1, max_value=1000, default=1000) offset = self.request.get_range('offset', min_value=0, default=0) name = self.request.get('name', None) source_name = self.request.get('source', None) if tx is None or ty is None or z is None or name is None or source_name is None: self.error(400) return key = 'tile-%s-%s-%s-%s-%s' % (z, ty, tx, source_name, name) png = cache.get(key) if png is None: png = get_tile_png(tx, ty, z, name, source_name, limit, offset) if png is None: png = PointTile.blank() cache.add(key, png, dumps=False) logging.info('TILE BACKEND MEMORY = %s' % runtime.memory_usage().current()) self.response.set_status(200) self.response.headers['Content-Type'] = 'image/png' self.response.out.write(png)
def end(self, spacename, key, ok=True, msg=''): # <editor-fold desc="Verify if a previous init exist. If not return False and self.data['init'][spacename][key]['error']"> if spacename not in self.data['init'].keys(): self.data['init'][spacename] = { key: { "error": "CorePerformance.end with no previous CorePerformance.init" } } return False if key not in self.data['init'][spacename]: self.data['init'][spacename][key] = { "error": "CorePerformance.end with no previous CorePerformance.init" } return False # </editor-fold> # <editor-fold desc="Verify if a previous init exist. If not return False and self.data['init'][spacename][key]['error']"> self.data['init'][spacename][key] = { "mem": self.data['init'][spacename][key]['mem'] - memory_usage().current(), "time": time.time() - self.data['init'][spacename][key]['time'], "ok": ok } if not ok: self.data['init'][spacename][key]['notes'] = msg
def _get_employee_info_from_s3(): from boto import connect_s3 from boto.s3.key import Key logging.info('Reading employees file from S3... {}MB'.format( memory_usage().current())) key = Key( connect_s3( aws_access_key_id=get_secret('AWS_ACCESS_KEY_ID'), aws_secret_access_key=get_secret('AWS_SECRET_ACCESS_KEY'), ).get_bucket(config.S3_BUCKET), 'employees.json', ) employee_dicts = json.loads(key.get_contents_as_string()) logging.info('Done reading employees file from S3. {}MB'.format( memory_usage().current())) return employee_dicts
def log(s): global tmp_count tmp_count = tmp_count + 1 memory_usage = runtime.memory_usage().current() q = Log() q.count = tmp_count q.txt = str(memory_usage) q.log = s q.put()
def log(s): global tmp_count tmp_count=tmp_count+1 memory_usage = runtime.memory_usage().current() q=Log() q.count=tmp_count q.txt=str(memory_usage) q.log=s q.put()
def add(self, title, file='', type='all'): # Hidding full path (security) file = file.replace(os.path.dirname(os.path.dirname(__file__)), "") # Preparing the line to save line = '' if type == 'note': line += "[" + type else: line += str(self.data['lastIndex']) + " [" if file.__len__(): file = " (" + file + ")" # Calculating memory _mem = memory_usage().current() - self.data['lastMemory'] if type == 'all' or type == 'endnote' or type == 'memory': line += str(round(_mem, 3)) + ' Mb' self.data['lastMemory'] = memory_usage().current() # Calculating memory _time = time.time() - self.data['lastMicrotime'] if type == 'all' or type == 'endnote' or type == 'time': line += ', ' + str(round(_time, 3)) + ' secs' self.data['lastMicrotime'] = time.time() # Adding the title line += '] ' + str(title) # Adding accum data if type != 'note': line = "[ " + str(round( memory_usage().current(), 3)) + " Mb, " + str( round(time.time() - self.data['initMicrotime'], 3)) + " secs] / " + line + file if type == 'endnote': line = "[" + type + "] " + line self.data['info'].append(line) self.data['lastIndex'] += 1
def init(self, spacename, key): # <editor-fold desc="Init self.data['init'][spacename][key]['mem'],['time'],['ok']"> if spacename not in self.data['init'].keys(): self.data['init'][spacename] = {} self.data['init'][spacename][key] = { "mem": memory_usage().current(), "time": time.time(), "ok": True }
def gas_update_xls(option="1"): xls_result = ResultIter() logging.info("comienzo gas_update_xls: %s" %memory_usage().current()) if option == "0": option = sorted(FUEL_OPTIONS.keys())[1:] logging.info("Buscando datos de todos los tipos") else: option = [option] for o in option: logging.info("Obteniendo %s" %FUEL_OPTIONS[o]["name"]) response = urlfetch.fetch(URL_XLS+o, deadline=55) if response.status_code == 200: page = html.document_fromstring(response.content) tables = page.xpath("body/table") if tables: # si encuentra tablas en el resultado rows = tables[0].findall("tr") if len(rows)<5: return None for tr in rows[3:]: row_data = [td.text for td in tr.getchildren()] if row_data[7] == "P": # guardo sólo gaslineras de venta público date = map(int, row_data[4].split("/")) date.reverse(); xls_result.add_item( province = row_data[0], town = row_data[1], station = row_data[2] + " [" + re.sub("\s+", "", row_data[3]) + "]", date = date, label = row_data[6], hours = row_data[9], option = {o: float(re.sub(",", ".", row_data[5]))}) logging.info("fin procesando %s: %s" %(o, memory_usage().current())) else: logging.info("sin informacion en %s" %o) return None else: logging.info("Error en el geoportal") return None return xls_result
def get(self): self.setVisInfo(self.request.remote_addr,self.user.nickname(),self.user.email()) now=time.ctime(time.time()) self._print('{} - Hello {} ({}) !'.format(now,self.user.nickname(),self.user.email())) self._hr() self._print("Your IP: "+self.request.remote_addr) self._print("Origin URL: "+self.request.url) self._print("uid: "+str(os.getuid())) self._print("pid: "+str(os.getpid())) self._print("cwd: "+os.getcwd()) self._print(pl.system()+", "+pl.architecture()[0]) self._print(pl.python_implementation()+", "+pl.python_version()) self._print("Used mem: "+str(runtime.memory_usage().current())+" MB") self._print("Used mem last min: "+str(runtime.memory_usage().average1m())+" MB") self._print("CPU usage: "+str(runtime.cpu_usage().total())+" Mcycles") self._print("CPU usage last min: "+str(runtime.cpu_usage().rate1m())+" Mcycles") self._hr() self._form("/primez","de_la","la") self._hr() self.response.out.write("<a href='"+backends.get_url('primer')+"/backend/primer/mumu'>Primer</a>") self._hr() self.response.out.write("<a href='/logoff'>LogOut_1</a>") self._br() self.response.out.write("<a href='"+users.create_logout_url("/")+"'>LogOut_2</a>")
def __init__(self): self.data = OrderedDict() self.data['initMicrotime'] = time.time() self.data['lastMicrotime'] = self.data['initMicrotime'] self.data['initMemory'] = memory_usage().current() self.data['lastMemory'] = self.data['initMemory'] self.data['lastIndex'] = 1 self.data['info'] = [] self.data['info'].append( "File :" + __file__.replace(os.path.dirname(os.path.dirname(__file__)), "")) self.data['info'].append("Init Memory Usage: " + str(self.data['initMemory'])) self.data['init'] = OrderedDict() self.root_path = os.path.dirname(os.path.dirname(__file__))
def post(self): # Parameters checked by frontend name = self.request.get('name') source_name = self.request.get('source') # Get source source = sources.get(source_name) if not source: logging.error('Cannot harvest without a source') self.error(404) # Update job status to 'error' job = get_job(name, source_name, 'error', msg='Unsupported source') cache.add(key, job) return # Check cache for harvest job key = get_job_cache_key(name, source_name) job = cache.get(key) if not job: self.error(404) self.response.headers['Content-Type'] = "application/json" self.response.out.write('{"error":"unknown job %s"}' % key) return count = 0 # Update job status to 'working' cache.add(key, get_job(name, source_name, 'working', msg=count)) # Get points from source and put them into datastore in batches pcount = 0 for points in self.get_points(name, source): logging.info('HARVEST BACKEND MEMORY = %s after %s points' % (runtime.memory_usage().current(), count)) entities = [] for p in points: pkey = Key('Point', '%s-%s-%s' % (source_name, name, pcount)) pcount += 1 entities.append(Point(key=pkey, lat=p[0], lng=p[1])) entities.append(PointIndex.create(pkey, p[0], p[1], name, source_name)) model.put_multi(entities) count += len(points) cache.add(key, get_job(name, source_name, 'working', msg=count)) # Update job status to 'done' # TODO: Done now or after backend rendering completes? cache.add(key, get_job(name, source_name, 'done', msg=count))
def runtime_stats(): mem = {} mem_use = runtime.memory_usage() mem['Memory (current)'] = mem_use.current() mem['Memory (average 1m)'] = mem_use.average1m() mem['Memory (average 10m)'] = mem_use.average10m() cpu = {} cpu_use = runtime.cpu_usage() cpu['CPU (Total)'] = cpu_use.total() cpu['CPU (Average 1m)'] = cpu_use.rate1m() cpu['CPU (Average 10m)'] = cpu_use.rate10m() return render_template('runtime.html', cpu=cpu, mem=mem)
def debug(location, message, params=None, force=False): if not (settings.REMOTE_DEBUG or settings.LOCALE_DEBUG or force): return if params is None: params = {} params["memory"] = runtime.memory_usage().current() params["instance_id"] = settings.INSTANCE_ID debug_message = "%s/%s?%s" % (urllib2.quote(location), urllib2.quote(message), "&".join(["%s=%s" % (p, urllib2.quote(unicode(params[p]).encode("utf-8"))) for p in params])) try: if settings.REMOTE_DEBUG or force: fetch("%s/%s" % (settings.REMOTE_DEBUGGER, debug_message)) except: pass if settings.LOCALE_DEBUG or force: logging.debug(debug_message)
def deferred(request): if 'HTTP_X_APPENGINE_TASKNAME' not in request.META: logging.critical('Detected an attempted XSRF attack. The header ' '"X-AppEngine-Taskname" was not set.') return http.HttpResponse(status=403) in_prod = ( not request.environ.get("SERVER_SOFTWARE").startswith("Devel")) if in_prod and request.environ.get("REMOTE_ADDR") != "0.1.0.2": logging.critical('Detected an attempted XSRF attack. This request did ' 'not originate from Task Queue.') return http.HttpResponse(status=403) headers = ["%s:%s" % (k, v) for k, v in request.META.items() if k.lower().startswith("x-appengine-")] logging.info(", ".join(headers)) try: #logging.info('request3') raw_post_data = request.read() logging.info("memory usage: %s",runtime.memory_usage().current()) run(raw_post_data) return http.HttpResponse(status=200) except SingularTaskFailure: logging.debug("Failure executing task, task retry forced") return http.HttpResponse(status=408) return except PermanentTaskFailure, e: logging.exception("Permanent failure attempting to execute task")
def _ensure_within_memory_limit(self): memory_limit = _MEMORY_LIMIT * 0.8 memory_usage = runtime.memory_usage().current() if memory_usage >= memory_limit: # Create a list of candidate counters to remove. We remove counters # that have not been modified before those that have been modified, # then order them by the last time they were accessed. counters = self._store.values() counters.sort( key=lambda counter: (counter._dirty, counter._last_accessed)) counters_to_cull = int(math.ceil(len(counters) * _CULL_AMOUNT)) counters = counters[:counters_to_cull] logging.info( 'Removing %d entries as we are over the memory limit ' 'by %dMB.', counters_to_cull, memory_limit - memory_usage) self._write_in_batches(counters) for counter in counters: del self._store[counter.key().name()]
def _ensure_within_memory_limit(self): memory_limit = _MEMORY_LIMIT * 0.8 memory_usage = runtime.memory_usage().current() if memory_usage >= memory_limit: # Create a list of candidate counters to remove. We remove counters # that have not been modified before those that have been modified, # then order them by the last time they were accessed. counters = self._store.values() counters.sort(key=lambda counter: (counter._dirty, counter._last_accessed)) counters_to_cull = int(math.ceil(len(counters) * _CULL_AMOUNT)) counters = counters[:counters_to_cull] logging.info('Removing %d entries as we are over the memory limit ' 'by %dMB.', counters_to_cull, memory_limit - memory_usage) self._write_in_batches(counters) for counter in counters: del self._store[counter.key().name()]
def get_memory(): if util.dev_server: try: # This will work in a dev shell, but will raise an error on # a dev server. We convert to MB for consistency with prod. # # TODO(benkraft): Hack the dev server to allow the import. # It prohibits any import that wouldn't be allowed on prod, # but here we would actually like to be able to do the # import anyway, since we already do things differently on # prod. # # TODO(benkraft): Craig thinks the live runtime API is # actually reporting VSS, not RSS, so maybe we should use # that for consistency. Better yet, use both. import resource return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024. except: return 0 else: # This will work anywhere, but will return 0 on dev. It involves an RPC. return runtime.memory_usage().current()
def post(self): de_la=cgi.escape(self.request.get('de_la')) la=cgi.escape(self.request.get('la')) rezults=self.getPrimes(de_la,la) dur=datetime.datetime.fromtimestamp(int(self.f_time))-datetime.datetime.fromtimestamp(int(self.s_time)) stats="Last min.: {} Mcycles {} MB Current: {} Mcycles {} MB".format(runtime.cpu_usage().rate1m(),runtime.memory_usage().average1m(),runtime.cpu_usage().total(),runtime.memory_usage().current()) logging.info("{} {} {} {}\nLast min.: {} Mcycles {} MB\nCurrent: {} {}".format(de_la,la,','.join(rezults),str(dur),runtime.cpu_usage().rate1m(),runtime.memory_usage().average1m(),runtime.cpu_usage().total(),runtime.memory_usage().current())) self.setPrimerInfo(de_la,la,','.join(rezults),str(dur),stats)
def createCsv(cls, helper, login_operator_entry=None): with_cursor = True csv_records = [] # タイトル titles = SearchUtils.getCsvTitles(helper) csv_records.append(UcfUtil.createCsvRecordEx(titles)) # データ一覧取得 q = sateraito_db.SearchList.query() q = q.order(sateraito_db.SearchList.search_name) logging.info('with_cursor=' + str(with_cursor)) max_export_cnt = -1 cnt = 0 limit = 1000 # 通常の、max_export_cnt == 1000 のドメインは1発で取れたほうがいいはずなので 1000 とする start_cursor = None while True: if with_cursor: if start_cursor is not None: each_rows, start_cursor, more = q.fetch_page( limit, start_cursor=start_cursor) else: each_rows, start_cursor, more = q.fetch_page(limit) else: each_rows = q.iter(limit=limit, offset=cnt) each_cnt = 0 for entry in each_rows: vo = entry.exchangeVo(helper._timezone) SearchUtils.editVoForCsv(helper, vo) data = [] data.append('IU') # command data.append(UcfUtil.getHashStr(vo, 'search_name')) # search_name data.append(UcfUtil.getHashStr( vo, 'search_config')) # search_config csv_records.append(UcfUtil.createCsvRecordEx(data)) each_cnt += 1 vo = None entry = None if each_cnt % 100 == 0: gc.collect() cnt += each_cnt logging.info(cnt) # 件数上限 if with_cursor: if cnt >= max_export_cnt or not more: break else: if (max_export_cnt > 0 and cnt >= max_export_cnt) or each_cnt < limit: break csv_text = '\r\n'.join(csv_records) current_memory_usage = runtime.memory_usage().current() gc.collect() current_memory_usage_after_collect = runtime.memory_usage().current() logging.info('current_memory_usage=' + str(current_memory_usage) + ' after_collect=' + str(current_memory_usage_after_collect)) return csv_text
def post(self): try: #global global_dict #global_dict = {} starttime = time.time() cutoff_date = datetime.datetime.now() + datetime.timedelta(-365) cutoff_date_string = cutoff_date.strftime("%Y-%m-%d %H:%M:%S") parts = self.request.body.split("&") requests = {} if parts is not None and parts[0] != "": for pair in parts: ab = pair.split('=') requests[ab[0]] = ab[1] force = bool(requests.get("force", False)) write = bool(requests.get("write", False)) minwrite = bool(requests.get("minwrite", False)) rpcList = [] client = memcache.Client() q = structures.Rumble.all() rumbles = [] for r in q.run(): memr = memcache.get(r.Name) if memr is not None: r = memr if r.BatchScoresAccurate and not force: continue rumbles.append(r) for r in rumbles: scoresdicts = pickle.loads( zlib.decompress(r.ParticipantsScores)) entries = len(scoresdicts) r.__dict__["entries"] = entries rumbles.sort(key=lambda r: -r.__dict__["entries"]) first = True for r in rumbles: if not first: time.sleep(5) gc.collect() gc.collect(2) first = False logging.info("mem usage at start of " + r.Name + ": " + str(runtime.memory_usage().current()) + "MB") try: scores = pickle.loads(zlib.decompress( r.ParticipantsScores)) except: scoresdicts = marshal.loads( zlib.decompress(r.ParticipantsScores)) scoreslist = [structures.LiteBot() for _ in scoresdicts] for s, d in zip(scoreslist, scoresdicts): s.__dict__.update(d) scores = {s.Name: s for s in scoreslist} if len(scores) == 0: continue r.ParticipantsScores = None #gc.collect() particHash = [p + "|" + r.Name for p in scores] particSplit = list_split(particHash, 32) ppDict = {} for l in particSplit: ppDict.update(memcache.get_multi(l)) time.sleep(0.1) particSplit = None bots = [ppDict.get(h, None) for h in particHash] botsdict = {} missingHashes = [] missingIndexes = [] for i in xrange(len(bots)): if bots[i] is None: missingHashes.append(particHash[i]) missingIndexes.append(i) elif isinstance(bots[i], structures.BotEntry): bots[i] = structures.CachedBotEntry(bots[i]) if len(missingHashes) > 0: bmis = structures.BotEntry.get_by_key_name(missingHashes) #lost = False lostList = [] for i in xrange(len(missingHashes)): if bmis[i] is not None: cb = structures.CachedBotEntry(bmis[i]) bots[missingIndexes[i]] = cb botsdict[missingHashes[i]] = cb else: bots[missingIndexes[i]] = None lostList.append(missingHashes[i]) #lost = True while len(particHash) > 0: particHash.pop() particHash = None while len(missingHashes) > 0: missingHashes.pop() missingHashes = None while len(missingIndexes) > 0: missingIndexes.pop() missingIndexes = None logging.info("mem usage after loading bots: " + str(runtime.memory_usage().current()) + "MB") bots = filter(lambda b: b is not None, bots) get_key = attrgetter("APS") bots.sort(key=lambda b: get_key(b), reverse=True) gc.collect() botIndexes = {} for i, b in enumerate(bots): b.Name = b.Name.encode('ascii') intern(b.Name) botIndexes[b.Name] = i b.VoteScore = 0. botlen = len(bots) APSs = numpy.empty([botlen, botlen]) APSs.fill(numpy.nan) totalAlivePairs = 0 for i, b in enumerate(bots): try: pairings = pickle.loads(zlib.decompress( b.PairingsList)) except: pairsDicts = marshal.loads( zlib.decompress(b.PairingsList)) pairings = [structures.ScoreSet() for _ in pairsDicts] for s, d in zip(pairings, pairsDicts): s.__dict__.update(d) removes = [] alivePairings = 0 for q, p in enumerate(pairings): j = botIndexes.get(p.Name, -1) if j != -1: APSs[j, i] = numpy.float64(p.APS) p.Alive = True alivePairings += 1 else: removes.append(q) b.Pairings = alivePairings totalAlivePairs += alivePairings removes.reverse() removed = False for q in removes: p = pairings[q] if p.LastUpload < cutoff_date_string: removed = True pairings.pop(q) else: if p.Alive: removed = True p.Alive = False if removed: b.PairingsList = zlib.compress( pickle.dumps(pairings, -1), 1) gc.collect() APSs += numpy.float64(100) - APSs.transpose() APSs *= numpy.float64(0.5) numpy.fill_diagonal(APSs, numpy.nan) gc.collect() logging.info( str(len(bots)) + " bots loaded, total of " + str(totalAlivePairs) + " alive pairings") logging.info("mem usage after unzipping pairings: " + str(runtime.memory_usage().current()) + "MB") #Vote mins = numpy.nanmax(APSs, 1) for i, minimum in enumerate(mins): minIndexes = numpy.argwhere(APSs[i, ...] == minimum) ties = len(minIndexes) if ties > 0: increment = 1. / ties for minIndex in minIndexes: bots[minIndex].VoteScore += increment #inv_len = 1.0/botlen for b in bots: if b.Pairings > 0: b.VoteScore = 100.0 * b.VoteScore / float(b.Pairings) else: b.VoteScore = 0 #KNN PBI half_k = int(math.ceil(math.sqrt(botlen) / 2)) KNN_PBI = -numpy.ones((botlen, botlen)) for i in xrange(len(bots)): low_bound = max([0, i - half_k]) high_bound = min([botlen - 1, i + half_k]) low_high_bound = min([i + 1, high_bound]) before = APSs[:, low_bound:i] after = APSs[:, low_high_bound:high_bound] compare = numpy.hstack((before, after)) mm = numpy.mean(numpy.ma.masked_array( compare, numpy.isnan(compare)), axis=1) KNN_PBI[:, i] = APSs[:, i] - mm.filled(numpy.nan) # a[i] = 0 # logging.info("mean error of transpose: " + str(numpy.mean(numpy.square(a)))) #KNN_PBI[KNN_PBI == numpy.nan] = -1 #logging.info("mem usage after KNNPBI: " + str(runtime.memory_usage().current()) + "MB") # Avg Normalised Pairing Percentage mins = numpy.nanmin(APSs, 1) maxs = numpy.nanmax(APSs, 1) inv_ranges = numpy.float64(1.0) / (maxs - mins) NPPs = -numpy.ones((botlen, botlen)) for i in range(botlen): if numpy.isfinite(inv_ranges[i]): NPPs[i, :] = numpy.float64(100) * ( APSs[i, :] - mins[i]) * inv_ranges[i] else: NPPs[i, :] = numpy.float64(100) #NPPs[NPPs] = -1 #logging.info("mem usage after ANPP: " + str(runtime.memory_usage().current()) + "MB") changedBots = [] #bots with new pairings since last run # save to cache botsdict = {} for i, b in enumerate(bots): # try: pairings = pickle.loads(zlib.decompress(b.PairingsList)) # except: # pairsDicts = marshal.loads(zlib.decompress(b.PairingsList)) # # pairings = [structures.ScoreSet() for _ in pairsDicts] # for s,d in zip(pairings,pairsDicts): # s.__dict__.update(d) nppCount = 0 totalNPP = 0.0 apsCount = 0 totalAPS = 0.0 aliveCount = 0 changed = False for p in pairings: j = botIndexes.get(p.Name, -1) if j != -1: p.Alive = True changePotential = (p.KNNPBI == 0.0 and p.NPP == -1) aliveCount += 1 p.KNNPBI = float(KNN_PBI[j, i]) p.NPP = float(NPPs[j, i]) if not numpy.isnan(APSs[j, i]): p.APS = float(APSs[j, i]) totalAPS += p.APS apsCount += 1 if numpy.isnan(p.KNNPBI): p.KNNPBI = 0 if numpy.isnan(p.NPP): p.NPP = -1 else: totalNPP += p.NPP nppCount += 1 if changePotential and p.KNNPBI != 0.0 and p.NPP != -1: changed = True else: p.Alive = False p.KNNPBI = 0 p.NPP = -1 if nppCount > 0: b.ANPP = float(totalNPP / nppCount) else: b.ANPP = -1.0 if apsCount > 0: b.APS = float(totalAPS / apsCount) else: b.APS = -1.0 b.PairingsList = zlib.compress(pickle.dumps(pairings, -1), 1) b.Pairings = aliveCount if b.Pairings > 0: botsdict[b.key_name] = b if changed: changedBots.append(b) KNN_PBI = None APSs = None NPPs = None logging.info("mem usage after zipping: " + str(runtime.memory_usage().current()) + "MB") gc.collect() #logging.info("mem usage after gc: " + str(runtime.memory_usage().current()) + "MB") if len(botsdict) > 0: splitlist = dict_split(botsdict, 20) logging.info("split bots into " + str(len(splitlist)) + " sections") for d in splitlist: rpcList.append(client.set_multi_async(d)) time.sleep(.5) #throttle logging.info("wrote " + str(len(botsdict)) + " bots to memcache") botsdict.clear() botsdict = None scores = {b.Name: structures.LiteBot(b) for b in bots} # bots = None r.ParticipantsScores = None gc.collect() r.ParticipantsScores = db.Blob( zlib.compress( pickle.dumps(scores, pickle.HIGHEST_PROTOCOL), 3)) logging.info("mem usage after participants zipping: " + str(runtime.memory_usage().current()) + "MB") #r.ParticipantsScores = zlib.compress(marshal.dumps([scores[s].__dict__ for s in scores]),4) scores = None if write: writebots = [None] * len(bots) for i, b in enumerate(bots): putb = structures.BotEntry(key_name=b.key_name) putb.init_from_cache(b) writebots[i] = putb write_lists = list_split(writebots, 50) for subset in write_lists: db.put(subset) time.sleep(0.1) #throttle logging.info("wrote " + str(len(writebots)) + " bots to database") while len(bots) > 0: bots.pop() bots = None if minwrite: writebots = [None] * len(changedBots) for i, b in enumerate(changedBots): putb = structures.BotEntry(key_name=b.key_name) putb.init_from_cache(b) writebots[i] = putb write_lists = list_split(writebots, 50) for subset in write_lists: db.put(subset) time.sleep(0.1) logging.info("wrote " + str(len(writebots)) + " changed bots to database") while len(changedBots) > 0: changedBots.pop() changedBots = None gc.collect() if write or minwrite: r.BatchScoresAccurate = True rpcList.append(client.set_multi_async({r.Name: r})) db.put([r]) #gc.collect() r = None logging.info("mem usage after write: " + str(runtime.memory_usage().current()) + "MB") for rpc in rpcList: rpc.get_result() elapsed = time.time() - starttime logging.info("Success in " + str(round(1000 * elapsed) / 1000) + "s") self.response.out.write("Success in " + str(round(1000 * elapsed)) + "ms") except: logging.exception('') elapsed = time.time() - starttime logging.info("Error in " + str(round(1000 * elapsed) / 1000) + "s") self.response.out.write("Error in " + str(round(1000 * elapsed)) + "ms")
def processOfRequest(self, tenant, token): self._approot_path = os.path.dirname(__file__) # エラーが1回おきたら処理を終了する if (int(self.request.headers.environ['HTTP_X_APPENGINE_TASKRETRYCOUNT'] ) > 1): logging.error('error over_1_times') return data_key = UcfUtil.nvl(self.getRequest('data_key')) data_kind = UcfUtil.nvl(self.getRequest('data_kind')) login_operator_id = UcfUtil.nvl(self.getRequest('login_operator_id')) login_operator_unique_id = UcfUtil.nvl( self.getRequest('login_operator_unique_id')) login_operator_mail_address = UcfUtil.nvl( self.getRequest('login_operator_mail_address')) login_operator_client_ip = UcfUtil.nvl( self.getRequest('login_operator_client_ip')) # オペレータ情報を取得 login_operator_entry = None if login_operator_unique_id != '': login_operator_entry = OperatorUtils.getData( self, login_operator_unique_id) if login_operator_entry is None: raise Exception('Not found login operator information.') return # preparing blob reader # blobstore 保存しているバイナリデータを取得 blob_key = str(urllib.unquote(self.request.get('key'))) blob_reader = blobstore.BlobReader(blob_key) # ファイルデータを取得(ステータス=CREATINGで作成済) file_entry = FileUtils.getDataEntryByDataKey(self, data_key) if file_entry is None: raise Exception(self.getMsg('MSG_NOTFOUND_TARGET_FILE', (data_key))) return # タスクトークンの取得と更新 last_task_token = file_entry.task_token if file_entry.task_token is not None else '' file_entry.task_token = token file_entry.put() file_vo = file_entry.exchangeVo(self._timezone) FileUtils.editVoForSelect(self, file_vo) file_encoding = UcfUtil.getHashStr(self.getDeptInfo(True), 'file_encoding') if file_encoding == '' or file_encoding == 'SJIS': data_encoding = 'cp932' elif file_encoding == 'JIS': data_encoding = 'jis' elif file_encoding == 'EUC': data_encoding = 'euc-jp' elif file_encoding == 'UTF7': data_encoding = 'utf-7' elif file_encoding == 'UTF8': data_encoding = 'utf-8' elif file_encoding == 'UNICODE': data_encoding = 'utf-16' else: data_encoding = 'cp932' log_msg = [] #is_error = False record_cnt = 0 #insert_cnt = 0 #update_cnt = 0 #delete_cnt = 0 #skip_cnt = 0 #error_cnt = 0 shutdown_record_cnt_str = self.request.get('shutdown_record_cnt') if shutdown_record_cnt_str is not None and shutdown_record_cnt_str != '': shutdown_record_cnt = int(shutdown_record_cnt_str) else: shutdown_record_cnt = 0 logging.info('shutdown_record_cnt=' + str(shutdown_record_cnt)) is_error_str = self.request.get('is_error') if is_error_str is not None and is_error_str.lower() == 'true': is_error = True else: is_error = False logging.info('is_error=' + str(is_error)) insert_cnt_str = self.request.get('insert_cnt') if insert_cnt_str is not None and insert_cnt_str != '': insert_cnt = int(insert_cnt_str) else: insert_cnt = 0 update_cnt_str = self.request.get('update_cnt') if update_cnt_str is not None and update_cnt_str != '': update_cnt = int(update_cnt_str) else: update_cnt = 0 delete_cnt_str = self.request.get('delete_cnt') if delete_cnt_str is not None and delete_cnt_str != '': delete_cnt = int(delete_cnt_str) else: delete_cnt = 0 skip_cnt_str = self.request.get('skip_cnt') if skip_cnt_str is not None and skip_cnt_str != '': skip_cnt = int(skip_cnt_str) else: skip_cnt = 0 error_cnt_str = self.request.get('error_cnt') if error_cnt_str is not None and error_cnt_str != '': error_cnt = int(error_cnt_str) else: error_cnt = 0 try: # 同じトークンで既に処理済みの場合、GAEのタスクが強制終了した後のリトライなのでログを出しておく if last_task_token == token: is_error = True log_msg.append( self._formatLogRecord( UcfMessage.getMessage( self.getMsg('MSG_TASK_FORCE_RETRY')))) self.updateTaskStatus(file_vo, file_entry, log_msg, is_error, login_operator_unique_id, login_operator_id) del log_msg[:] logging.info('csv_analysis start...') new_lines = [] str_record = '' quote_num = 0 old_lines = blob_reader.read().splitlines() for line in old_lines: #str_record += lineline + '\n' #if str_record.count('"') % 2 == 0: # new_lines.append(str_record.rstrip('\n')) # str_record = '' quote_num += line.count('"') if quote_num % 2 == 0: new_lines.append(str_record + line) str_record = '' quote_num = 0 else: str_record += line + '\n' logging.info('csv_analysis end. the record count is ' + str(len(new_lines)) + ' with title line.') # 巨大なCSVファイルを扱えるように対応 2015.03.27 csv.field_size_limit(1000000000) # process uploaded csv file # universal-newline mode に対応 #csvfile = csv.reader(blob_reader, dialect=csv.excel) #csvfile = csv.reader(blob_reader.read().splitlines(), dialect=csv.excel) csvfile = csv.reader(new_lines, dialect=csv.excel) col_names = [] for row in csvfile: # タイトル行の処理 if record_cnt == 0: # first row: column list col_index = 0 for col in row: # BOM付CSVに対応 2016.10.13 if data_encoding == 'utf-8' and col_index == 0: col = col.decode('utf-8-sig').encode('utf-8') col_name = col.strip().strip('"') # # 条件を削除し、一列目の情報は全て列を作成 col_names.append(col_name) col_index += 1 # データ行の処理 elif shutdown_record_cnt <= record_cnt - 1: is_runtime_shutdown = False is_force_runtime_shutdown = False # 5レコードに一回チェックしてみる # シャットダウンを検知した場合 if record_cnt % 5 == 0: is_runtime_shutdown = runtime.is_shutting_down() # 強制対応はとりあえずコメントアウト ## シャットダウン検知しない場合も多いので、500レコードに一回ずつ別タスクにする ⇒ 100 に変更 2014.06.12 #if (shutdown_record_cnt < record_cnt - 1) and (record_cnt - 1) % 100 == 0: # is_force_runtime_shutdown = True if is_runtime_shutdown or is_force_runtime_shutdown: is_shutting_down = True current_memory_usage = runtime.memory_usage().current() logging.info('is_shutting_down=' + str(is_runtime_shutdown) + ' current_memory_usage=' + str(current_memory_usage)) # instance will be shut down soon! # exit here and kick same batch to start next record logging.info( '***** kicking same batch and stopping: shutdown_record_cnt=' + str(record_cnt - 1)) # サマリをログ出力 log_msg.append( self._formatLogRecord( 'development process [record:' + UcfUtil.nvl(record_cnt - 1) + ' skip:' + UcfUtil.nvl(skip_cnt) + ' insert:' + UcfUtil.nvl(insert_cnt) + ' update:' + UcfUtil.nvl(update_cnt) + ' delete:' + UcfUtil.nvl(delete_cnt) + ' error:' + UcfUtil.nvl(error_cnt) + ' ]')) log_msg.append( self._formatLogRecord( 'kicking same batch and stopping: shutdown_record_cnt=' + str(record_cnt - 1))) self.updateTaskStatus(file_vo, file_entry, log_msg, is_error, login_operator_unique_id, login_operator_id) del log_msg[:] # kick start import import_q = taskqueue.Queue('csv-export-import') params = { 'shutdown_record_cnt': record_cnt - 1, 'insert_cnt': insert_cnt, 'update_cnt': update_cnt, 'delete_cnt': delete_cnt, 'skip_cnt': skip_cnt, 'error_cnt': error_cnt, 'is_error': is_error, 'key': blob_key, 'data_key': data_key, 'data_kind': data_kind, 'login_operator_id': login_operator_id, 'login_operator_unique_id': login_operator_unique_id, 'login_operator_mail_address': login_operator_mail_address, 'login_operator_client_ip': login_operator_client_ip } import_t = taskqueue.Task( url='/a/' + tenant + '/' + token + '/queue_csv_import', params=params, target=sateraito_func.getBackEndsModuleName( tenant), countdown='1') import_q.add(import_t) return col_index = 0 # params に配列を作成する。 csv_record = {} for col_value in row: if col_index < len(col_names): # cut off too much csv data columns # csv_record[col_names[col_index]] = unicode(col_value, UcfConfig.DL_ENCODING).strip().strip('"') # csv_record[col_names[col_index]] = unicode(col_value, data_encoding).strip().strip('"') csv_record[col_names[col_index]] = unicode( col_value, data_encoding) col_index += 1 # 1行処理 deal_type = '' row_log_msg = None code = '' if data_kind == 'importgroupcsv': deal_type, code, row_log_msg = self.importOneRecordGroup( csv_record, record_cnt, blob_key, data_key, data_kind, login_operator_unique_id, login_operator_id, login_operator_mail_address, login_operator_client_ip, login_operator_entry) # elif data_kind == 'importusercsv': # deal_type, code, row_log_msg = self.importOneRecordUser(csv_record, record_cnt, blob_key, data_key, data_kind, login_operator_unique_id, login_operator_id, login_operator_mail_address, login_operator_client_ip, login_operator_entry) # elif data_kind == 'importchangeuseridcsv': # deal_type, code, row_log_msg = self.importOneRecordChangeUserID(csv_record, record_cnt, blob_key, data_key, data_kind, login_operator_unique_id, login_operator_id, login_operator_mail_address, login_operator_client_ip, login_operator_entry) # 件数やエラーメッセージを集計 if row_log_msg is not None: log_msg.extend(row_log_msg) if code != '': error_cnt += 1 is_error = True if deal_type == UcfConfig.EDIT_TYPE_NEW: insert_cnt += 1 elif deal_type == UcfConfig.EDIT_TYPE_RENEW: update_cnt += 1 elif deal_type == UcfConfig.EDIT_TYPE_DELETE: delete_cnt += 1 elif deal_type == UcfConfig.EDIT_TYPE_SKIP: skip_cnt += 1 # ユーザーID変更処理はデリケートなので毎回ログを出す if data_kind == 'importchangeuseridcsv' and log_msg is not None and len( log_msg) > 0: self.updateTaskStatus(file_vo, file_entry, log_msg, is_error, login_operator_unique_id, login_operator_id) del log_msg[:] # ときどきメモリ開放 if record_cnt % 100 == 0: current_memory_usage = runtime.memory_usage().current() gc.collect() current_memory_usage2 = runtime.memory_usage().current() logging.info('[memory_usage]record=' + str(record_cnt) + ' before:' + str(current_memory_usage) + ' after:' + str(current_memory_usage2)) record_cnt += 1 except BaseException, e: self.outputErrorLog(e) log_msg.append(self._formatLogRecord('system error.')) is_error = True
def post(self): try: #global global_dict #global_dict = {} starttime = time.time() cutoff_date = datetime.datetime.now() + datetime.timedelta(-365) cutoff_date_string = cutoff_date.strftime("%Y-%m-%d %H:%M:%S") parts = self.request.body.split("&") requests = {} if parts is not None and parts[0] != "": for pair in parts: ab = pair.split('=') requests[ab[0]] = ab[1] force = bool(requests.get("force",False)) write = bool(requests.get("write",False)) minwrite = bool(requests.get("minwrite",False)) rpcList = [] client = memcache.Client() q = structures.Rumble.all() rumbles = [] for r in q.run(): memr = memcache.get(r.Name) if memr is not None: r = memr if r.BatchScoresAccurate and not force: continue rumbles.append(r) for r in rumbles: scoresdicts = pickle.loads(zlib.decompress(r.ParticipantsScores)) entries = len(scoresdicts) r.__dict__["entries"] = entries rumbles.sort(key = lambda r: -r.__dict__["entries"]) first = True for r in rumbles: if not first: time.sleep(5) gc.collect() gc.collect(2) first = False logging.info("mem usage at start of " + r.Name + ": " + str(runtime.memory_usage().current()) + "MB") try: scores = pickle.loads(zlib.decompress(r.ParticipantsScores)) except: scoresdicts = marshal.loads(zlib.decompress(r.ParticipantsScores)) scoreslist = [structures.LiteBot() for _ in scoresdicts] for s,d in zip(scoreslist,scoresdicts): s.__dict__.update(d) scores = {s.Name:s for s in scoreslist} if len(scores) == 0: continue r.ParticipantsScores = None #gc.collect() particHash = [p + "|" + r.Name for p in scores] particSplit = list_split(particHash,32) ppDict = {} for l in particSplit: ppDict.update(memcache.get_multi(l)) time.sleep(0.1) particSplit = None bots = [ppDict.get(h,None) for h in particHash] botsdict = {} missingHashes = [] missingIndexes = [] for i in xrange(len(bots)): if bots[i] is None: missingHashes.append(particHash[i]) missingIndexes.append(i) elif isinstance(bots[i],structures.BotEntry): bots[i] = structures.CachedBotEntry(bots[i]) if len(missingHashes) > 0: bmis = structures.BotEntry.get_by_key_name(missingHashes) #lost = False lostList = [] for i in xrange(len(missingHashes)): if bmis[i] is not None: cb = structures.CachedBotEntry(bmis[i]) bots[missingIndexes[i]] = cb botsdict[missingHashes[i]] = cb else: bots[missingIndexes[i]] = None lostList.append(missingHashes[i]) #lost = True while len(particHash) > 0: particHash.pop() particHash = None while len(missingHashes) > 0: missingHashes.pop() missingHashes = None while len(missingIndexes) > 0: missingIndexes.pop() missingIndexes = None logging.info("mem usage after loading bots: " + str(runtime.memory_usage().current()) + "MB") bots = filter(lambda b: b is not None, bots) get_key = attrgetter("APS") bots.sort( key=lambda b: get_key(b), reverse=True) gc.collect() botIndexes = {} for i,b in enumerate(bots): b.Name = b.Name.encode('ascii') intern(b.Name) botIndexes[b.Name] = i b.VoteScore = 0. botlen = len(bots) APSs = numpy.empty([botlen,botlen]) APSs.fill(numpy.nan) totalAlivePairs = 0 for i,b in enumerate(bots): try: pairings = pickle.loads(zlib.decompress(b.PairingsList)) except: pairsDicts = marshal.loads(zlib.decompress(b.PairingsList)) pairings = [structures.ScoreSet() for _ in pairsDicts] for s,d in zip(pairings,pairsDicts): s.__dict__.update(d) removes = [] alivePairings = 0 for q,p in enumerate(pairings): j = botIndexes.get(p.Name,-1) if j != -1: APSs[j,i] = numpy.float64(p.APS) p.Alive = True alivePairings += 1 else: removes.append(q) b.Pairings = alivePairings totalAlivePairs += alivePairings removes.reverse() removed = False for q in removes: p = pairings[q] if p.LastUpload < cutoff_date_string: removed = True pairings.pop(q) else: if p.Alive: removed = True p.Alive = False if removed: b.PairingsList = zlib.compress(pickle.dumps(pairings,-1),1) gc.collect() APSs += numpy.float64(100) - APSs.transpose() APSs *= numpy.float64(0.5) numpy.fill_diagonal(APSs, numpy.nan) gc.collect() logging.info(str(len(bots)) + " bots loaded, total of " + str(totalAlivePairs) + " alive pairings") logging.info("mem usage after unzipping pairings: " + str(runtime.memory_usage().current()) + "MB") #Vote mins = numpy.nanmax(APSs,1) for i,minimum in enumerate(mins): minIndexes = numpy.argwhere(APSs[i,...] == minimum) ties = len(minIndexes) if ties > 0: increment = 1./ties for minIndex in minIndexes: bots[minIndex].VoteScore += increment #inv_len = 1.0/botlen for b in bots: if b.Pairings > 0: b.VoteScore = 100.0*b.VoteScore/float(b.Pairings) else: b.VoteScore = 0 #KNN PBI half_k = int(math.ceil(math.sqrt(botlen)/2)) KNN_PBI = -numpy.ones((botlen,botlen)) for i in xrange(len(bots)): low_bound = max([0,i-half_k]) high_bound = min([botlen-1,i+half_k]) low_high_bound = min([i+1,high_bound]) before = APSs[:,low_bound:i] after = APSs[:,low_high_bound:high_bound] compare = numpy.hstack((before,after)) mm = numpy.mean(numpy.ma.masked_array(compare,numpy.isnan(compare)),axis=1) KNN_PBI[:,i] = APSs[:,i] - mm.filled(numpy.nan) # a[i] = 0 # logging.info("mean error of transpose: " + str(numpy.mean(numpy.square(a)))) #KNN_PBI[KNN_PBI == numpy.nan] = -1 #logging.info("mem usage after KNNPBI: " + str(runtime.memory_usage().current()) + "MB") # Avg Normalised Pairing Percentage mins = numpy.nanmin(APSs,1) maxs = numpy.nanmax(APSs,1) inv_ranges = numpy.float64(1.0)/(maxs - mins) NPPs = -numpy.ones((botlen,botlen)) for i in range(botlen): if numpy.isfinite(inv_ranges[i]): NPPs[i,:] = numpy.float64(100)*(APSs[i,:] - mins[i]) * inv_ranges[i] else: NPPs[i,:] = numpy.float64(100) #NPPs[NPPs] = -1 #logging.info("mem usage after ANPP: " + str(runtime.memory_usage().current()) + "MB") changedBots = []#bots with new pairings since last run # save to cache botsdict = {} for i,b in enumerate(bots): # try: pairings = pickle.loads(zlib.decompress(b.PairingsList)) # except: # pairsDicts = marshal.loads(zlib.decompress(b.PairingsList)) # # pairings = [structures.ScoreSet() for _ in pairsDicts] # for s,d in zip(pairings,pairsDicts): # s.__dict__.update(d) nppCount = 0 totalNPP = 0.0 apsCount = 0 totalAPS = 0.0 aliveCount = 0 changed = False for p in pairings: j = botIndexes.get(p.Name,-1) if j != -1: p.Alive = True changePotential = (p.KNNPBI == 0.0 and p.NPP == -1 ) aliveCount += 1 p.KNNPBI = float(KNN_PBI[j,i]) p.NPP = float(NPPs[j,i]) if not numpy.isnan(APSs[j,i]): p.APS = float(APSs[j,i]) totalAPS += p.APS apsCount += 1 if numpy.isnan(p.KNNPBI): p.KNNPBI = 0 if numpy.isnan(p.NPP): p.NPP = -1 else: totalNPP += p.NPP nppCount += 1 if changePotential and p.KNNPBI != 0.0 and p.NPP != -1 : changed = True else: p.Alive = False p.KNNPBI = 0 p.NPP = -1 if nppCount > 0: b.ANPP = float(totalNPP/nppCount) else: b.ANPP = -1.0 if apsCount > 0: b.APS = float(totalAPS/apsCount) else: b.APS = -1.0 b.PairingsList = zlib.compress(pickle.dumps(pairings,-1),1) b.Pairings = aliveCount if b.Pairings > 0: botsdict[b.key_name] = b if changed: changedBots.append(b) KNN_PBI = None APSs = None NPPs = None logging.info("mem usage after zipping: " + str(runtime.memory_usage().current()) + "MB") gc.collect() #logging.info("mem usage after gc: " + str(runtime.memory_usage().current()) + "MB") if len(botsdict) > 0: splitlist = dict_split(botsdict,20) logging.info("split bots into " + str(len(splitlist)) + " sections") for d in splitlist: rpcList.append(client.set_multi_async(d)) time.sleep(.5) #throttle logging.info("wrote " + str(len(botsdict)) + " bots to memcache") botsdict.clear() botsdict = None scores = {b.Name: structures.LiteBot(b) for b in bots} # bots = None r.ParticipantsScores = None gc.collect() r.ParticipantsScores = db.Blob(zlib.compress(pickle.dumps(scores,pickle.HIGHEST_PROTOCOL),3)) logging.info("mem usage after participants zipping: " + str(runtime.memory_usage().current()) + "MB") #r.ParticipantsScores = zlib.compress(marshal.dumps([scores[s].__dict__ for s in scores]),4) scores = None if write: writebots = [None]*len(bots) for i,b in enumerate(bots): putb = structures.BotEntry(key_name = b.key_name) putb.init_from_cache(b) writebots[i] = putb write_lists = list_split(writebots,50) for subset in write_lists: db.put(subset) time.sleep(0.1)#throttle logging.info("wrote " + str(len(writebots)) + " bots to database") while len(bots) > 0: bots.pop() bots = None if minwrite: writebots = [None]*len(changedBots) for i,b in enumerate(changedBots): putb = structures.BotEntry(key_name = b.key_name) putb.init_from_cache(b) writebots[i] = putb write_lists = list_split(writebots,50) for subset in write_lists: db.put(subset) time.sleep(0.1) logging.info("wrote " + str(len(writebots)) + " changed bots to database") while len(changedBots) > 0: changedBots.pop() changedBots = None gc.collect() if write or minwrite: r.BatchScoresAccurate = True rpcList.append(client.set_multi_async({r.Name:r})) db.put([r]) #gc.collect() r = None logging.info("mem usage after write: " + str(runtime.memory_usage().current()) + "MB") for rpc in rpcList: rpc.get_result() elapsed = time.time() - starttime logging.info("Success in " + str(round(1000*elapsed)/1000) + "s") self.response.out.write("Success in " + str(round(1000*elapsed)) + "ms") except: logging.exception('') elapsed = time.time() - starttime logging.info("Error in " + str(round(1000*elapsed)/1000) + "s") self.response.out.write("Error in " + str(round(1000*elapsed)) + "ms")
def get(self): repeat = self.request.get("repeat", 5) iter = self.request.get("iter", 100) self.response.headers['Content-Type'] = 'text/plain' stmt1 = """\ import pytz from datetime import datetime tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London', 'America/Indiana/Indianapolis') for tz in tz_strings: dt = datetime(2009, 4, 15) pytz.timezone(tz) """ stmt2 = """\ import spytz3 from datetime import datetime tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London', 'America/Indiana/Indianapolis') for tz in tz_strings: dt = datetime(2009, 4, 15) spytz3.timezone(tz) """ stmt3 = """\ import pytz from datetime import datetime tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London', 'America/Indiana/Indianapolis') for tz in tz_strings: dt = datetime(2009, 4, 15) pytz.timezone(tz) pytz.clear_cache() """ stmt4 = """\ import spytz3 from datetime import datetime tz_strings = ('Australia/Perth', 'Australia/Melbourne', 'Europe/London', 'America/Indiana/Indianapolis') for tz in tz_strings: dt = datetime(2009, 4, 15) spytz3.timezone(tz) spytz3.clear_cache() """ gc.collect() self.response.write("-- cache --\n") mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() t1 = timeit.repeat(stmt=stmt1, number=100, repeat=4) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("PYTZ cpu:{}, memory: {}\n".format(cpu, mem)) self.response.write("timeit: {}\n".format(t1)) mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() gc.collect() time.sleep(1) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("SLEEP cpu:{}, memory: {}\n".format(cpu, mem)) mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() t2 = timeit.repeat(stmt=stmt2, number=100, repeat=4) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("SPYTZ cpu:{}, memory: {}\n".format(cpu, mem)) self.response.write("timeit: {}\n".format(t2)) self.response.write("\n") self.response.write("-- clear cache --\n") mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() gc.collect() time.sleep(1) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("SLEEP cpu:{}, memory: {}\n".format(cpu, mem)) mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() t3 = timeit.repeat(stmt=stmt3, number=100, repeat=4) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("PYTZ cpu:{}, memory: {}\n".format(cpu, mem)) self.response.write("timeit: {}\n".format(t3)) mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() gc.collect() time.sleep(1) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("SLEEP cpu:{}, memory: {}\n".format(cpu, mem)) mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() t4 = timeit.repeat(stmt=stmt4, number=100, repeat=4) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("SPYTZ cpu:{}, memory: {}\n".format(cpu, mem)) self.response.write("timeit: {}\n".format(t4)) mem_st = runtime.memory_usage().current() cpu_st = runtime.cpu_usage().total() gc.collect() time.sleep(1) mem = runtime.memory_usage().current() - mem_st cpu = runtime.cpu_usage().total() - cpu_st self.response.write("SLEEP cpu:{}, memory: {}\n".format(cpu, mem))
def data2store(data): if not data: logging.info("NO HAY DATOS QUE GUARDAR") return cachedata = json.loads(getGasole().decode('zlib')) if "_meta" in cachedata: # compatibilidad con la api antigua cachedata = cachedata.get("_data") for p in data: # recorremos las provincias _provinces = [] # nuevas provincias _towns = [] # nuevas ciudades _stations = [] # nuevas gasolineras # _prices = [] # precios nuevos o actualizados _history = [] # nuevos históricos (tantos como _prices) _closed = [] # estaciones cerradas # _del_prices = [] # precios actuales a borrar datap = data.get(p) cachep = cachedata.get(p) if not cachep: # nueva provincia cachep = {} _provinces.append(Province(key_name=p)) for t in datap: # recorremos las ciudades datat = datap[t] cachet = cachep.get(t) if not cachet: # nueva ciudad cachet = cachep[t] = {} _towns.append(Town(key_name=t, parent=db.Key.from_path('Province', p))) for s in datat: # recorremos las estaciones datas = datat[s] caches = cachet.get(s) update_price = False if not caches: # nueva estación _stations.append(GasStation( key_name = s, parent = db.Key.from_path('Province', p, 'Town', t), label = datas["l"], hours = datas["h"], closed = False)) update_price = True else: geopt = caches.get("g") if geopt: datas["g"]=geopt if caches["d"]!=datas["d"]: # distinta fecha update_price = True del cachet[s] # la borramos de cachep: detección de cerradas if update_price: parent_key = db.Key.from_path('Province', p, 'Town', t, 'GasStation', s) date = Date(*datas["d"]) props = dict((FUEL_OPTIONS[o]["short"], datas["o"][o]) for o in datas["o"]) # _prices.append(PriceData(key_name=s, parent=parent_key, date=date, **props)) _history.append(HistoryData(parent=parent_key, date=date, **props)) if len(cachet)==0: # no quedan estaciones, para optimizar la búsqueda de cerradas del cachep[t] # eliminamos la ciudad de cache # Estaciones cerradas, las que quedan en cachep: for t in cachep: for s in cachep[t]: caches = cachep[t][s] _closed.append(GasStation( key_name = s, parent = db.Key.from_path('Province', p, 'Town', t), label = caches["l"], hours = caches["h"], closed = True)) # _del_prices.append(db.Key.from_path('Province', p, 'Town', t, 'GasStation', s, 'PriceData', s)) newdata = _provinces+_towns+_stations+_history+_closed #+_prices if len(newdata): try: logging.info("==========Guardando datos de %s" %p) if len(_towns): logging.info("%s nuevas ciudades" %len(_towns)) if len(_stations): logging.info("%s nuevas estaciones" %len(_stations)) # if len(_prices): # logging.info("%s nuevos precios" %len(_prices)) if len(_history): logging.info("%s históricos" %len(_history)) if len(_closed): logging.info("%s estaciones CERRADAS" %len(_closed)) # if len(_del_prices): # logging.info("%s precios BORRADOS" %len(_del_prices)) updateDB(dnew=newdata) # json_data = json.dumps({"_data": {p: datap}}) # ApiJson(key_name=p, json=json_data).put() logging.info("Uso de memoria: %s" %memory_usage().current()) except Exception, e: logging.error("*************** No se han podido guardar los datos de %s" %p) logging.error(str(e)) return del newdata