def push_pop(): #push, pop: asc / dec / rand min_num = 10 max_num = 200000 step = 5000 num_datas = list(range(min_num, max_num, step)) result_dics = list( tqdm(map(lambda n: expr_result(n, 3), num_datas), total=len(num_datas))) exprs = F.join_with(list, result_dics) #return num_datas, exprs pprint(exprs) print(num_datas) y_keys = F.lremove(lambda key: 'merge' in key or 'pop' in key, exprs.keys()) gradient_dic = {} for key in sorted(y_keys): plt.plot(num_datas, exprs[key], label=key, marker='.' if 'bh' in key else 'x', linestyle='-' if 'bh' in key else '--') gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0] pprint(gradient_dic) plt.xlabel('number of keys') plt.ylabel('milli seconds') plt.legend() plt.show() y_keys = F.lremove(lambda key: 'merge' in key or 'push' in key, exprs.keys()) gradient_dic = {} for key in sorted(y_keys): plt.plot(num_datas, exprs[key], label=key, marker='.' if 'bh' in key else 'x', linestyle='-' if 'bh' in key else '--') gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0] pprint(gradient_dic) plt.xlabel('number of keys') plt.ylabel('milli seconds') plt.legend() plt.show()
def merge(): min_num = 10 max_num = 5000 step = 100 num_datas = list(range(min_num, max_num, step)) result_dics = list( tqdm(map(lambda n: expr_result(3, n), num_datas), total=len(num_datas))) def avrg_merge_result(dic): def avrg(dics): return sum(map(lambda d: d['time'], dics)) / len(dics) dic['h.merge.2rand'] = avrg(dic['h.merge.2rand']) dic['bh.merge.2rand'] = avrg(dic['bh.merge.2rand']) return dic result_dics = F.lmap(avrg_merge_result, result_dics) print(result_dics) exprs = F.join_with(list, result_dics) #pprint(exprs) y_keys = F.lremove(lambda key: 'pop' in key or 'push' in key, exprs.keys()) #print(y_keys) gradient_dic = {} for key in sorted(y_keys): plt.plot(num_datas, exprs[key], label=key, marker='.' if 'bh' in key else 'x', linestyle='-' if 'bh' in key else '--') gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0] pprint(gradient_dic) plt.xlabel('max number of keys') plt.ylabel('milli seconds') plt.legend() plt.show()
def dnfs(qs): """ Converts query condition tree into a DNF of eq conds. Separately for each alias. Any negations, conditions with lookups other than __exact or __in, conditions on joined models and subrequests are ignored. __in is converted into = or = or = ... """ SOME = object() SOME_TREE = [[(None, None, SOME, True)]] def negate(term): return (term[0], term[1], term[2], not term[3]) def _dnf(where): """ Constructs DNF of where tree consisting of terms in form: (alias, attribute, value, negation) meaning `alias.attribute = value` or `not alias.attribute = value` if negation is False Any conditions other then eq are dropped. """ if isinstance(where, Lookup): # If where.lhs don't refer to a field then don't bother if not hasattr(where.lhs, 'target'): return SOME_TREE # Don't bother with complex right hand side either if isinstance(where.rhs, (QuerySet, Query, Subquery, RawSQL)): return SOME_TREE # Skip conditions on non-serialized fields if isinstance(where.lhs.target, settings.CACHEOPS_SKIP_FIELDS): return SOME_TREE attname = where.lhs.target.attname if isinstance(where, Exact): return [[(where.lhs.alias, attname, where.rhs, True)]] elif isinstance(where, IsNull): return [[(where.lhs.alias, attname, None, where.rhs)]] elif isinstance(where, In) and len( where.rhs) < settings.CACHEOPS_LONG_DISJUNCTION: return [[(where.lhs.alias, attname, v, True)] for v in where.rhs] else: return SOME_TREE elif isinstance(where, NothingNode): return [] elif isinstance(where, (ExtraWhere, SubqueryConstraint)): return SOME_TREE elif len(where) == 0: return [[]] else: chilren_dnfs = lmap(_dnf, where.children) if len(chilren_dnfs) == 0: return [[]] elif len(chilren_dnfs) == 1: result = chilren_dnfs[0] else: # Just unite children joined with OR if where.connector == OR: result = lcat(chilren_dnfs) # Use Cartesian product to AND children else: result = lmap(lcat, product(*chilren_dnfs)) # Negating and expanding brackets if where.negated: result = [lmap(negate, p) for p in product(*result)] return result def clean_conj(conj, for_alias): conds = {} for alias, attname, value, negation in conj: # "SOME" conds, negated conds and conds for other aliases should be stripped if value is not SOME and negation and alias == for_alias: # Conjs with fields eq 2 different values will never cause invalidation if attname in conds and conds[attname] != value: return None conds[attname] = value return conds def clean_dnf(tree, aliases): cleaned = [ clean_conj(conj, alias) for conj in tree for alias in aliases ] # Remove deleted conjunctions cleaned = [conj for conj in cleaned if conj is not None] # Any empty conjunction eats up the rest # NOTE: a more elaborate DNF reduction is not really needed, # just keep your querysets sane. if not all(cleaned): return [{}] return cleaned def query_dnf(query): def table_for(alias): if alias == main_alias: return alias return query.alias_map[alias].table_name dnf = _dnf(query.where) # NOTE: we exclude content_type as it never changes and will hold dead invalidation info main_alias = query.model._meta.db_table aliases = {alias for alias, join in query.alias_map.items() if query.alias_refcount[alias]} \ | {main_alias} - {'django_content_type'} tables = group_by(table_for, aliases) return { table: clean_dnf(dnf, table_aliases) for table, table_aliases in tables.items() } if qs.query.combined_queries: return join_with(lcat, (query_dnf(q) for q in qs.query.combined_queries)) else: return query_dnf(qs.query)
def cached_as(*samples, timeout=None, extra=None, lock=None, keep_fresh=False, key_func=func_cache_key): """ Caches results of a function and invalidates them same way as given queryset(s). NOTE: Ignores queryset cached ops settings, always caches. If keep_fresh is True, this will prevent caching if the given querysets are invalidated during the function call. This prevents prolonged caching of stale data. """ if not samples: raise TypeError('Pass a queryset, a model or an object to cache like') # If we unexpectedly get list instead of queryset return identity decorator. # Paginator could do this when page.object_list is empty. if len(samples) == 1 and isinstance(samples[0], list): return lambda func: func def _get_queryset(sample): if isinstance(sample, models.Model): queryset = sample.__class__.objects.filter(pk=sample.pk) elif isinstance(sample, type) and issubclass(sample, models.Model): queryset = sample.objects.all() else: queryset = sample queryset._require_cacheprofile() return queryset querysets = lmap(_get_queryset, samples) dbs = list({qs.db for qs in querysets}) cond_dnfs = join_with(lcat, map(dnfs, querysets)) key_extra = [qs._cache_key(prefix=False) for qs in querysets] key_extra.append(extra) if timeout is None: timeout = min(qs._cacheprofile['timeout'] for qs in querysets) if lock is None: lock = any(qs._cacheprofile['lock'] for qs in querysets) def decorator(func): @wraps(func) def wrapper(*args, **kwargs): if not settings.CACHEOPS_ENABLED or transaction_states.is_dirty( dbs): return func(*args, **kwargs) prefix = get_prefix(func=func, _cond_dnfs=cond_dnfs, dbs=dbs) cache_key = prefix + 'as:' + key_func(func, args, kwargs, key_extra) with redis_client.getting(cache_key, lock=lock) as cache_data: cache_read.send(sender=None, func=func, hit=cache_data is not None) if cache_data is not None: return pickle.loads(cache_data) else: if keep_fresh: # We call this "asp" for "as precall" because this key is # cached before the actual function is called. We randomize # the key to prevent falsely thinking the key was not # invalidated when in fact it was invalidated and the # function was called again in another process. suffix = key_func(func, args, kwargs, key_extra + [random()]) precall_key = prefix + 'asp:' + suffix # Cache a precall_key to watch for invalidation during # the function call. Its value does not matter. If and # only if it remains valid before, during, and after the # call, the result can be cached and returned. cache_thing(prefix, precall_key, 'PRECALL', cond_dnfs, timeout, dbs=dbs) else: precall_key = '' result = func(*args, **kwargs) cache_thing(prefix, cache_key, result, cond_dnfs, timeout, dbs=dbs, precall_key=precall_key) return result return wrapper return decorator
def handle(self, *args, **options): series = {} samples = {} platform_created_on = join_with( min, [{p: ceil_attrs_date(s) for p in s.platforms} for s in Series.objects.all()]) platform_qs = Platform.objects.annotate(probes_count=Count('probes'))\ .values('gpl_name', 'probes_count') platforms = {} platforms_probes = {} series_annotations = {} sample_annotations = {} concordant_series_annotations = {} concordant_sample_annotations = {} series_tags = {} concordant_series_tags = {} sample_tags = {} concordant_sample_tags = {} series_validations = {} sample_validations = {} concordant_series_validations = {} concordant_sample_validations = {} for specie in SPECIES.values(): series[specie] = accumulate( count_by(ceil_attrs_date, Series.objects.filter(specie=specie))) qs = Sample.objects.filter(platform__specie=specie) iterator = tqdm(queryset_iterator(qs, 30000), total=qs.count(), desc='{0} samples'.format(specie)) samples[specie] = accumulate(count_by(ceil_attrs_date, iterator)) platforms_data = [[ platform_created_on[item['gpl_name']], item['probes_count'] ] for item in platform_qs.filter(specie=specie)] platforms[specie] = accumulate(count_by(first, platforms_data)) group = group_values(platforms_data) platforms_probes[specie] = accumulate(walk_values(sum, group)) qs = SeriesAnnotation.objects.filter(series__specie=specie) series_annotations[specie], \ sample_annotations[specie] = distribute_series_and_sample_annotations(qs) concordant_series_annotations[specie], \ concordant_sample_annotations[specie] = distribute_series_and_sample_annotations( qs.filter(best_cohens_kappa=1)) qs = SeriesTag.objects.filter(platform__specie=specie, is_active=True) series_tags[specie] = distribute_by_created_on(qs) concordant_series_tags[specie] = distribute_by_created_on( qs.exclude(agreed=None)) qs = SampleTag.objects.filter(sample__platform__specie=specie, is_active=True) sample_tags[specie] = distribute_by_created_on(qs) concordant_sample_tags[specie] = distribute_by_created_on( qs.exclude(series_tag__agreed=None)) qs = SerieValidation.objects.filter(platform__specie=specie, ignored=False, by_incompetent=False) series_validations[specie] = distribute_by_created_on(qs) concordant_series_validations[specie] = distribute_by_created_on( qs.filter(best_kappa=1)) qs = SampleValidation\ .objects\ .filter(sample__platform__specie=specie, serie_validation__ignored=False, serie_validation__by_incompetent=False) sample_validations[specie] = distribute_by_created_on(qs) concordant_sample_validations[specie] = distribute_by_created_on( qs.filter( Q(serie_validation__best_kappa=1) | Q(concordant=True))) users = accumulate( count_by(ceil_date, User.objects.values_list('date_joined', flat=True))) tags = accumulate( count_by(ceil_date, Tag.objects.values_list('created_on', flat=True))) delta = CURRENT_DATE - START_DATE keys = sorted( set( ceil_date(START_DATE + timedelta(days=index * 20)) for index in range(delta.days / 20 + 1))) specie_data = { 'series': series, 'samples': samples, 'platforms': platforms, 'platforms_probes': platforms_probes, 'series_annotations': series_annotations, 'sample_annotations': sample_annotations, 'concordant_series_annotations': concordant_series_annotations, 'concordant_sample_annotations': concordant_sample_annotations, 'series_tags': series_tags, 'sample_tags': sample_tags, 'concordant_series_tags': concordant_series_tags, 'concordant_sample_tags': concordant_sample_tags, 'series_validations': series_validations, 'sample_validations': sample_validations, 'concordant_series_validations': concordant_series_validations, 'concordant_sample_validations': concordant_sample_validations, 'series_tags_by_users': distribute_by_user_id(SeriesTag.objects.filter(is_active=True)), 'sample_tags_by_users': distribute_by_user_id(SampleTag.objects.filter(is_active=True)), 'series_validations_by_users': distribute_by_user_id( SerieValidation.objects.filter(ignored=False, by_incompetent=False)), 'sample_validations_by_users': distribute_by_user_id( SampleValidation.objects.filter( serie_validation__ignored=False, serie_validation__by_incompetent=False)), 'series_tag_history': get_series_tag_history(), } data = { 'users': users, 'tags': tags, } with transaction.atomic(): HistoricalCounter.objects.filter( created_on__lte=CURRENT_DATE).delete() HistoricalCounter.objects.bulk_create([ HistoricalCounter(created_on=key, counters=merge( walk_values(get_value(keys, index), data), walk_values( lambda value: walk_values( get_value(keys, index), value), specie_data))) for index, key in enumerate(keys) ])
def dnfs(qs): """ Converts query condition tree into a DNF of eq conds. Separately for each alias. Any negations, conditions with lookups other than __exact or __in, conditions on joined models and subrequests are ignored. __in is converted into = or = or = ... """ SOME = object() SOME_TREE = [[(None, None, SOME, True)]] def negate(term): return (term[0], term[1], term[2], not term[3]) def _dnf(where): """ Constructs DNF of where tree consisting of terms in form: (alias, attribute, value, negation) meaning `alias.attribute = value` or `not alias.attribute = value` if negation is False Any conditions other then eq are dropped. """ if isinstance(where, Lookup): # If where.lhs don't refer to a field then don't bother if not hasattr(where.lhs, 'target'): return SOME_TREE # Don't bother with complex right hand side either if isinstance(where.rhs, (QuerySet, Query)): return SOME_TREE # Skip conditions on non-serialized fields if isinstance(where.lhs.target, NOT_SERIALIZED_FIELDS): return SOME_TREE attname = where.lhs.target.attname if isinstance(where, Exact): return [[(where.lhs.alias, attname, where.rhs, True)]] elif isinstance(where, IsNull): return [[(where.lhs.alias, attname, None, where.rhs)]] elif isinstance(where, In) and len(where.rhs) < LONG_DISJUNCTION: return [[(where.lhs.alias, attname, v, True)] for v in where.rhs] else: return SOME_TREE elif isinstance(where, EverythingNode): return [[]] elif isinstance(where, NothingNode): return [] elif isinstance(where, (ExtraWhere, SubqueryConstraint)): return SOME_TREE elif len(where) == 0: return [[]] else: chilren_dnfs = lmap(_dnf, where.children) if len(chilren_dnfs) == 0: return [[]] elif len(chilren_dnfs) == 1: result = chilren_dnfs[0] else: # Just unite children joined with OR if where.connector == OR: result = lcat(chilren_dnfs) # Use Cartesian product to AND children else: result = lmap(lcat, product(*chilren_dnfs)) # Negating and expanding brackets if where.negated: result = [lmap(negate, p) for p in product(*result)] return result def clean_conj(conj, for_alias): conds = {} for alias, attname, value, negation in conj: # "SOME" conds, negated conds and conds for other aliases should be stripped if value is not SOME and negation and alias == for_alias: # Conjs with fields eq 2 different values will never cause invalidation if attname in conds and conds[attname] != value: return None conds[attname] = value return conds def clean_dnf(tree, aliases): cleaned = [clean_conj(conj, alias) for conj in tree for alias in aliases] # Remove deleted conjunctions cleaned = [conj for conj in cleaned if conj is not None] # Any empty conjunction eats up the rest # NOTE: a more elaborate DNF reduction is not really needed, # just keep your querysets sane. if not all(cleaned): return [[]] return cleaned def query_dnf(query): def table_for(alias): if alias == main_alias: return alias return query.alias_map[alias].table_name dnf = _dnf(query.where) # NOTE: we exclude content_type as it never changes and will hold dead invalidation info main_alias = query.model._meta.db_table aliases = {alias for alias, (join, cnt) in zip_dicts(query.alias_map, query.alias_refcount) if cnt and family_has_profile(table_to_model(join.table_name))} \ | {main_alias} - {'django_content_type'} tables = group_by(table_for, aliases) return {table: clean_dnf(dnf, table_aliases) for table, table_aliases in tables.items()} if django.VERSION >= (1, 11) and qs.query.combined_queries: return join_with(lcat, (query_dnf(q) for q in qs.query.combined_queries)) else: return query_dnf(qs.query)
universal_newlines=True)) #min_n = 3 if len(sys.argv) != 3 else int(sys.argv[1]) #max_n = 10 if len(sys.argv) != 3 else int(sys.argv[2]) #num_expr = 3 if len(sys.argv) != 2 else int(sys.argv[1]) num_expr = 10 if len(sys.argv) != 2 else int(sys.argv[1]) #block = 2**29 // num_expr block = 2**10 // num_expr exprs = list(tqdm( map(expr_result, [n * block for n in range(1,num_expr)]), total = num_expr )) with open('result_%d.yml' % (num_expr,), 'w') as f: yaml.dump(F.join_with(list, exprs), f) with open('result_%d.yml' % (num_expr,)) as f: result_dic = yaml.safe_load(f) y_keys = F.lremove('data.num', result_dic.keys()) #print(F.lmap(result_dic, y_keys)) import numpy as np xs = result_dic['data.num'] gradient_dic = {} for key in y_keys: plt.plot(xs, result_dic[key], label=key, marker='x' if 'q' in key else 'o', linestyle='--' if 'q' in key else '-',) gradient_dic[key] = np.polyfit(xs, result_dic[key], 1)[0]