def home(request): word = request.GET.get('word') order = request.GET.get('order') or 'id' with open('data.json') as f: data = json.load(f) if word: data['token_data'] = py_.filter(data['token_data'], lambda t: t['key'] == word) ids = py_(data['token_data']).pluck('ids').flatten().value() data['data'] = py_.at(data['data'], *ids) for d in data['data']: d['de_highlight'] = py_(d['de'].split(' ')).map( lambda s: f"<font color='red' class='font-weight-bold'>{s}</font>" if s.lower() == word else s).join(' ').value() order = "-count" if order == "count" else "id" sentences = py_.order_by(data['data'], [order]) offset = int(request.GET.get('offset') or 0) limit = int(request.GET.get('limit') or len(sentences)) data['sentences'] = sentences[offset:offset + limit] return render(request, 'home.html', data)
def render_column(self, row, column): display_text = {YES: PASS, NO: FAIL, NOT_REPORTING: N_A} all_checks = FacilityTest.objects.all() default_columns = py_(all_checks).reject(lambda check: check.get_type( ) == FACILITY_TWO_GROUPS_WITH_SAMPLE).map( lambda check: check.name).value() formulation_columns = py_(all_checks).filter( lambda check: check.get_type() == FACILITY_TWO_GROUPS_WITH_SAMPLE ).map(lambda check: check.name).value() formulation = self.request.POST.get(FORMULATION, F1) if column in default_columns: value_for_column = self.get_check_result(column, row) if type(value_for_column) == dict and DEFAULT in value_for_column: actual_result = value_for_column[DEFAULT] return display_text[ actual_result] if actual_result else actual_result else: return "" elif column in formulation_columns: result = self.get_check_result(column, row) if type(result) == dict and formulation in result: actual_result = result[formulation] return display_text[ actual_result] if actual_result else actual_result else: return "" else: return super(ScoresTableView, self).render_column(row, column)
def split_properties_fields(self, search_keys): model_fields = self._model._fields.keys() property_fields = py_(search_keys).filter_(lambda item: any( k not in model_fields for k in item.keys())).value() search_keys = py_(search_keys).reject(lambda item: any( k not in model_fields for k in item.keys())).value() p_fields = {k: d[k] for d in property_fields for k in d.keys()} return search_keys, p_fields
def filter_by_properties(p_fields, page): p_keys = sorted(p_fields.keys()) for p in p_keys: if '$regex' in p_fields[p].keys(): res = py_(page).filter_(lambda item: re.compile( p_fields[p]['$regex'], re.IGNORECASE).search( getattr(item, p))).value() elif '$eq' in p_fields[p].keys(): res = py_(page).filter_(lambda item: p_fields[p]['$eq'] == getattr(item, p)).value() page = res if len(res) > 0 or p == p_keys[-1] else page return page
def get(self, request): featured_tests = FacilityTest.objects.filter( featured=True).order_by("order").values("id", "name", "order", "definition")[:2] ids_for_featured_tests = [item["id"] for item in featured_tests] other_tests = FacilityTest.objects.exclude( id__in=ids_for_featured_tests).order_by("order").values( "id", "name", "order", "definition") regimens = TracingFormulations.objects.values("name", "slug") featured = pydash.py_(featured_tests).map( prepare_for_ui(regimens)).value() other = pydash.py_(other_tests).map(prepare_for_ui(regimens)).value() return Response({"featured": featured, "other": other})
def aggregate_values(self, group, values): aggregation = available_aggregations.get(group.aggregation.id) if aggregation: all_values = py_(values).map( lambda x: x.values).flatten_deep().value() return aggregation(all_values) return None
def f(facility_dict): name = facility_dict.get("name", "") new_location = Location( facility=name, district=py_(facility_dict.get("ancestors", [])).find({ "level": 3 }).value().get("name"), partner=partner_mapping.get(name, "Unknown"), warehouse=facility_dict.get("warehouse"), ) reference_location = locations_that_are_reporting.get( new_location, None) location_has_multiple = locations_reporting_multiple_times.get( new_location, None) if reference_location: new_location = attr.evolve(new_location, status="Reporting") if location_has_multiple: new_location = attr.evolve(new_location, multiple="multiple orders") else: new_location = attr.evolve(new_location, status="Not Reporting", multiple="not") return new_location
def filter_collection( self, collection, rtype: str = None, sort_value: Optional[str] = 'title', reverse: bool = False, limit: int = None, parent_name: str = None, props: List[CorePropFilterModel] = [], ): # Set the limit if limit is None: limit = len(collection.values()) # Filter those results based on arbitrary key-value pairs r1 = collection for prop in props: r1 = pydash.filter_(r1, Query._prop_lambda(prop.key, prop.value)) r1 = py_(r1) \ .filter_(Query._attr_lambda('rtype', rtype)) \ .filter_(Query._filter_parents(collection, parent_name)) \ .sort_by(Query._sort_key_lamda(sort_value), reverse=reverse ) \ .slice(0, limit) return r1.value()
def fetch(url): print('loading: ' + url) resp = requests.get(url, auth=HTTPBasicAuth(account, password)) data = json.loads(resp.text) result = (py_( data['issues']).map(lambda issue: issue['fields']['summary']).value()) return result
def discover_fonts(path, urlprefix='', picks=[]): '''Use font-config to scan and list the properties in a dataframe. In the scanned directory, font-config optionally accepts an output format string. We use this format for each font files found: >{family[0]}|{postscriptname}|{file} I added the `>` specifier to distinguish additional output (to stderr, but subprocess.getoutput captures both `stdout` and `stderr` together) from warnings. [NOTE] 05-09-2019 Modified pattern to family[0], since the font family could include more than one names. We only want one. ''' header = ['family', 'postscriptname', 'file'] fc_format = '%{family[0]}|%{postscriptname}|%{file}' # Uses fc-scan utility from font-config. cmd = f'fc-scan {path} -b -f ">{fc_format}\\n"' df = (py_(subprocess.getoutput(cmd)).lines().filter( lambda s: s[0] == '>').map(lambda s: s[1:]).join('\n').thru( io.StringIO).thru(lambda b: pd.read_csv(b, sep='|', names=header)). thru(lambda df: process_fontlist(df, urlprefix, path)).value()) if len(picks) > 0: hues.info('Picking:', picks) picked_fonts = [name.lower() for name in picks] frame_slice = (df.family.str.lower().isin(picked_fonts)) return df[frame_slice] return df
def run(self, N, seeds): new_seeds = [] # Exctract all the tweets for s in seeds: print("Starting seed: "+s["handle"]) tweets_seed = self.crawler.get_users_tweets(s["handle"], N) if (len(tweets_seed) == 0): self.db_manager.delete_element("seeds", {"handle": s["handle"],"id_experiment":self.id_experiment}) continue # else: # logging.info(s+" Tweets' number: "+str(len(tweets_seed))) for item in tweets_seed: item.update( {"id_experiment":self.id_experiment, "seed":s["_id"]}) self.db_manager.write_mongo("tweets", tweets_seed) handels_new = set(self.crawler.get_all_handles_mentioned(tweets_seed, s["handle"])) # print(s+" Handles mentioned: "+" ".join(handels_new)) handles = [] for h in handels_new: h_dict = { "handle":h, "origin":s["handle"] } new_seeds.append(h_dict) #if len(handles) != 0: # self.db_manager.write_mongo("seeds", [{"handle":h, "starting":False} for h in handels_new]) #pprint.pprint(handles) #new_seeds = list(set(new_seeds+handles)) new_seeds = py_(new_seeds).group_by("handle").to_pairs().map(lambda p: {"handle":p[0],"origin":py_.map(p[1],"origin")}).value() return new_seeds
def groups_have_adequate_data(self, groups): valid_groups = py_(groups).reject(lambda x: x is None).value() is_two_cycle = "Previous" in py_(valid_groups).map( lambda group_result: group_result.group.cycle.id).value() if (is_two_cycle and not py_(valid_groups).every(lambda group_result: len( group_result.factored_records) > 0).value()): return False number_of_records = py_(valid_groups).map( lambda group_result: group_result.factored_records).flatten().size( ).value() has_adequate_data = number_of_records > 0 if has_adequate_data: resu = pydash.every(valid_groups, lambda x: x.is_above_threshold()) return resu return has_adequate_data
def groups_have_adequate_data(self, groups): one_group_has_all_blank = py_(groups).reject(lambda x: x is None).some( lambda gr: gr.all_values_blank()).value() if one_group_has_all_blank: return True denominator = groups[1].aggregate + groups[0].aggregate if len(groups) > 1 and denominator == 0: return False return super(AtLeastNOfTotal, self).groups_have_adequate_data(groups)
def locations_reporting_multiple(records): locations = list( pydash.py_(records).group_by(lambda item: item.get_regimen_location()).pick_by( lambda v, k: len(v) > 1 ).keys().reject( lambda x: x is None ).value() ) return dict((loc.location, loc.location) for loc in locations)
def getWordCount(): wordCountResult = (py_(list( range(12))).map(lambda x: x * 50).map(urlWithStartAt).map( fetch).flatten().compact().map(seg).flatten().filter( lambda x: re.match('[a-zA-Z\u4e00-\u9fa5]', x)).reject( lambda x: len(x) < 2).group_by().map(getCount).order_by( ['count']).filter(lambda x: x['count'] > 1).value()) return wordCountResult()
def from_dict(data): return Definition( groups=py_(data.get("groups", [])).map(DefinitionGroup.from_dict).value(), type=data.get("type"), python_class=data.get("python_class"), sample=data.get("sample"), operator=DefinitionOption.from_dict(data.get("operator")), operator_constant=data.get("operatorConstant"), )
def get_preview_formulations(self, group): if self.definition.type.get("id") == FACILITY_TWO_GROUPS_WITH_SAMPLE: models = { "Adult": "patient_formulations", "Paed": "patient_formulations", "Consumption": "consumption_formulations", } key = models.get(group.model.id) return py_(group.model.tracing_formulations).map( lambda x: x.get(key)).flatten().value() else: return group.selected_formulations
def run(): items = py_(INBOX_IDS).map(getConvos) \ .flatten() \ .filter(lambda x: x['last_message']['is_inbound'] == True ) \ .filter(isLatestMsg) \ .map(stripSearch) \ .value() # Output the list of pending convos to the logs for debugging print json.dumps(items, sort_keys=True, indent=2, separators=(',', ': ')) return items
def get(self): u""" Return groups by user. :return: """ gs = GroupService(db) user = get_user() groups = gs.list(user) return { 'data': py_(groups).map(group_to_dict).value() }
def getTopPredictors(weights, count): """获得最佳标识词汇 Args: weights 权值 count top count Returns: predictors predicators """ return py_(vocabList) \ .map(lambda word, idx: (word, weights[idx])) \ .sort_by(lambda item: item[1], reverse = True) \ .take(count) \ .value()
def random_number_already_exists_in_filename(filename, random_filename_suffix): randfile_ints_found = re.findall( '\d+', filename) # array of numbers found in filename # suffix_matches_found = py_(randfile_ints_found).map(lambda x: x == random_filename_suffix).each(print).value() suffix_matches_found = py_(randfile_ints_found).map( lambda x: x == random_filename_suffix).each().value() print(("Numbers in filename matching proposed new random no: {}".format( suffix_matches_found)) if (python_version.current_version() == 3) else ("Numbers in filename matching proposed new random no: %s") % (suffix_matches_found)) assert (len(suffix_matches_found) > 0), 'Filename does not contain any numbers!' return True if any(element for element in suffix_matches_found) else False
def get_records_from_data_source(self, data_source, group): records = group.model.get_records(data_source) if group.has_overrides: formulations_to_add = defaultdict(set) formulations_to_override = [] for (sample, override_model ) in group.sample_formulation_model_overrides.items(): cleaned_formulation_names = [ name.lower() for name in override_model.get("formulations", []) ] for name in cleaned_formulation_names: formulations_to_add[override_model.get("id")].add(name) formulations_to_override.append(name) records = py_(records).reject(lambda x: x.formulation.lower() in formulations_to_override).value() for (model, formulations) in formulations_to_add.items(): get_records = group.model.get_records(data_source, model) records_for_override_model = py_(get_records).reject( lambda x: x.formulation.lower() not in formulations).value( ) records.extend(records_for_override_model) return records
def proxy_keywords(url): r = fetch_data('extract_keywords', jsdata=url) dom = pq(r.content) kws = dom.find('li') labels = [x.text() for x in kws.find('strong').items()] kws.remove('strong') weights = [parse_number(x.text()) for x in kws.items()] return (py_(zip(labels, weights)).map(lambda x: { 'label': x[0], 'weight': x[1] }).value())
def get(self, group_id): gs = GroupService(db) group = gs.get_one(group_id) if group is None: return { 'data': {} } dto = group_to_dict(group) dto['cards'] = py_(group.cards).map(card_to_dict_for_learn).value() return { 'data': dto }
def get_formulations(self, tracer): models = { "Adult": "patient_formulations", "Paed": "patient_formulations", "Consumption": "consumption_formulations", } key = models.get(self.model.id) if tracer is not None and type(tracer) is Tracer: if tracer.key == "DEFAULT": return self.selected_formulations return py_(self.model.tracing_formulations).find({ "slug": tracer.key }).value().get(key) else: return self.selected_formulations
def get_locations_and_cycles(self): raw_locations = [] for group in self.definition.groups: model = group.model.as_model() if model: field_filters = build_field_filters(group.selected_fields) formulations = self.get_preview_formulations(group) base_queryset = model.objects.filter( formulation__in=formulations, **field_filters) raw_locations.extend( base_queryset.order_by("name").values( "name", "district", "cycle").distinct()) locations = py_(raw_locations).uniq().group_by("name").map( as_loc).sort_by("name").value() return {"locations": locations}
def sticker_nrs_to_objects(listing): """Replace sticker numbers in eligibility listing with real sticker objects.""" sticker_nrs = (py_.py_( list(nrs) for application, nrs in listing).flatten().uniq().value()) stickers_by_nr = { s.nr: s for s in models.Sticker.objects.filter(nr__in=sticker_nrs) } def _replace_with_sticker_objs(listing_item): application, sticker_nrs = listing_item return ( application, [stickers_by_nr[nr] for nr in sticker_nrs if nr in stickers_by_nr], ) return dict([_replace_with_sticker_objs(l) for l in listing])
def getTopRecommends(Theta, X, i, count, rated, items): """获得推荐 Args: Theta Theta X X i 用户下标 count 获得推荐的数目 rated 已经评价的类目id items 商品清单 Returns: topRecommends 推荐项目 """ predictions = predict(Theta, X)[:, i] return py_(items) \ .map(lambda item, idx: (item, predictions[idx])) \ .sort_by(lambda item: item[1], reverse = True) \ .take(count) \ .value()
def proxy_concepts(url): r = fetch_data('extract_concepts', jsdata=url) dom = pq(r.content) divs = dom.find('#concepts').find('div') hrefs = [i.attr('href') for i in divs.find('a').items()] labels = [i.attr('value') for i in divs.find('input').items()] divs.remove('input') divs.remove('label') weights = [parse_number(i.text()) for i in divs.items()] return (py_(zip(hrefs, labels, weights)).map(lambda x: { 'href': x[0], 'label': x[1], 'weight': x[2] }).value())
def processEmail(email): """预处理邮件 Args: email 邮件内容 Returns: indices 单词在词表中的位置 """ # 转换为小写 --> 标准化 URL --> 标准化 邮箱地址 # --> 去除 HTML 标签 --> 标准化数字 # --> 标准化美元 --> 删除非空格字符 return py_(email) \ .strip_tags() \ .reg_exp_replace(r'(http|https)://[^\s]*', 'httpaddr') \ .reg_exp_replace(r'[^\s]+@[^\s]+', 'emailaddr') \ .reg_exp_replace(r'\d+', 'number') \ .reg_exp_replace(r'[$]+', 'dollar') \ .lower_case() \ .trim() \ .words() \ .map(stem) \ .map(lambda word : py_.index_of(vocabList, word) + 1) \ .value()
import gspread from oauth2client.service_account import ServiceAccountCredentials from somajo import Tokenizer scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] credentials = ServiceAccountCredentials.from_json_keyfile_name('easy-deutsch.json', scope) gc = gspread.authorize(credentials) sheet = gc.open("Deutsch Wörter").worksheet('Expressions') tokenizer = Tokenizer(split_camel_case=True, token_classes=False, extra_info=False) data = py_(sheet.get_all_values()).filter(lambda r: r[0]).map(lambda r: py_.compact(r)).map( lambda r: [py_.capitalize(r[0], strict=False), *r[1:]] ).map( lambda r, i: dict(id=i, de=r[0], low=r[0].lower(), tokens=tokenizer.tokenize(r[0].lower()), rest=r[1:]) ).value() token_index = {} for tokens in py_.pluck(data, 'tokens'): for token in tokens: if len(token) <= 1: continue t = token.lower() if t not in token_index: token_index[t] = dict( key=t, ids=py_(data).filter(lambda d: t in d['tokens']).pluck('id').value()
import json import django.core.serializers.json from django.template import Library from django.utils.safestring import mark_safe from pydash import py_ from browser.utils import get_fixture register = Library() # Make a nice-looking identifier for use within js. make_identifier = lambda id: py_(['np', id]).join('_').camel_case().value() # Get the list of tissue and experiment ids experiments = get_fixture('browser.exp--tissues.yaml') @register.filter def sample_format(text): return py_.human_case(text) @register.filter def humanise(text): return py_.human_case(text) @register.filter(is_safe=True) def jsonify(object, args=None): return json.dumps(object,