def retrieve_and_process_data(self): """ Query the data, output wikitext """ logging.info("Retrieving grouping information...") try: (groupings_counts, groupings_groupings) = self.get_grouping_information() except QueryException as e: logging.error(f('No groupings found.')) raise e logging.info(f('Grouping retrieved: {len(groupings_counts)}')) for column_entry in self.columns: column_entry_key = column_entry.get_key() self.column_data[ column_entry_key] = self._get_grouping_counts_from_sparql( column_entry.get_info_query(self)) text = self.get_header() for (grouping, item_count) in sorted(groupings_counts.items(), key=lambda t: t[1], reverse=True): higher_grouping = groupings_groupings.get(grouping) text += self.make_stats_for_one_grouping(grouping, item_count, higher_grouping) if self.stats_for_no_group: text += self.make_stats_for_no_group() text += self.make_footer() return text
def get_info_query(self, property_statistics): """ Get the usage counts for a column for the groupings :return: (str) SPARQL query """ query = f(""" SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {{ ?entity {property_statistics.selector_sparql} .""") if property_statistics.grouping_type == GroupingType.YEAR: query += f(""" ?entity wdt:{property_statistics.grouping_property} ?date . BIND(YEAR(?date) as ?grouping).""") else: query += f(""" ?entity wdt:{property_statistics.grouping_property} ?grouping .""") query += f(""" FILTER(EXISTS {{{self.get_filter_for_info()} }}) }} GROUP BY ?grouping HAVING (?count >= {property_statistics.property_threshold}) ORDER BY DESC(?count) LIMIT 1000 """) return query
def res2next(depth, num_classes, width_per_group=4, scale=4, pretrained=False, progress=True, **kwargs): """Instantiate a Res2NeXt model Args: depth (int): depth of the model num_classes (int): number of output classes scale (int): number of branches for cascade convolutions pretrained (bool): whether the model should load pretrained weights (ImageNet training) progress (bool): whether a progress bar should be displayed while downloading pretrained weights **kwargs: optional arguments of torchvision.models.resnet.ResNet Returns: model (torch.nn.Module): loaded Pytorch model """ if RESNET_LAYERS.get(depth) is None: raise NotImplementedError(f("This specific architecture is not defined for that depth: {depth}")) block = Res2Block if depth >= 50 else BasicBlock kwargs.update(RES2NEXT_PARAMS.get(depth)) model = Res2Net(block, RESNET_LAYERS.get(depth), num_classes=num_classes, scale=scale, **kwargs) if pretrained: state_dict = load_state_dict_from_url(URLS.get(f("res2next{depth}_{width_per_group}w_{scale}s_{kwargs['groups']}c")), progress=progress) # Remove FC params from dict del state_dict['fc.weight'] del state_dict['fc.bias'] missing, unexpected = model.load_state_dict(state_dict, strict=False) if any(unexpected) or any(not elt.startswith('fc.') for elt in missing): raise KeyError(f("Weight loading failed.\nMissing parameters: {missing}\nUnexpected parameters: {unexpected}")) return model
def registerModule(self, module): # This function is used during module instantiation to either reference a # previously loaded module, or to instantiate a module for the first time if (not module['ref'] and not module['modulename']): debugLog( 2, f("registerModule called for module {colored(module['name'], 'red')} without an existing reference or a module to instantiate." )) elif (module['ref']): # If the reference is passed, it means this module has already been # instantiated and we should just refer to the existing instance # Check this module has not already been instantiated if (not self.modules.get(module['name'], None)): self.modules[module['name']] = { "ref": module['ref'], "type": module['type'] } self.debugLog( 7, "TWCMaster ", f("Registered module {colored(module['name'], 'red')}")) else: self.debugLog( 7, "TWCMaster ", f("Avoided re-registration of module {colored(module['name'], 'red')}, which has already been loaded" ))
def test(self, source, dag, name, batch_size=1, max_num=None): self.shared.eval() self.controller.eval() data = source[:max_num * self.max_length] total_loss = 0 hidden = self.shared.init_hidden(batch_size) pbar = trange(0, data.size(0) - 1, self.max_length, desc="test") for count, idx in enumerate(pbar): inputs, targets = self.get_batch(data, idx, evaluation=True) output, hidden = self.shared(inputs, dag, hidden=hidden, is_train=False) output_flat = output.view(-1, self.dataset.num_tokens) total_loss += len(inputs) * self.ce(output_flat, targets).data hidden = detach(hidden) ppl = math.exp(total_loss[0] / (count + 1) / self.max_length) pbar.set_description(f("test| ppl: {ppl:8.2f}")) test_loss = total_loss[0] / len(data) ppl = math.exp(test_loss) self.tb.scalar_summary(f("test/{name}_loss"), test_loss, self.epoch) self.tb.scalar_summary(f("test/{name}_ppl"), ppl, self.epoch) return test_loss, ppl
def load_model(path, model, optim=None, new_lr_bool=False, new_lr=6e-5): # remap everthing onto CPU state = torch.load(str(path), map_location=lambda storage, location: storage) model.load_state_dict(state['model']) if optim: log('loading optim too') if (new_lr_bool): state['optim']['param_groups'][0]['lr'] = new_lr print('Modified learning rate = ', state['optim']['param_groups'][0]['lr']) print( f('=============== learning rate updated to {new_lr} ========================' )) log( f('=============== learning rate updated to {new_lr} ========================' )) optim.load_state_dict(state['optim']) else: log('not loading optim') model.cuda() detail = state['detail'] log('loaded model from %s' % path) return detail
def make_footer(self): total_items = self.get_totals() text = u'|- class="sortbottom"\n|' if self.higher_grouping: text += u"|\n|" text += f( '\'\'\'Totals\'\'\' <small>(all items)</small>:\n| {total_items}\n' ) for column_entry in self.columns: if isinstance(column_entry, PropertyConfig): property_name = column_entry.property if column_entry.qualifier: totalprop = self.get_totals_for_qualifier( property=property_name, qualifier=column_entry.qualifier) else: totalprop = self.get_totals_for_property( property=property_name) elif isinstance(column_entry, TextConfig): totalprop = self._get_count_from_sparql( column_entry.get_totals_query(self)) percentage = self._get_percentage(totalprop, total_items) text += f( '| {{{{{self.cell_template}|{percentage}|{totalprop}|column={column_entry.get_title()}}}}}\n' ) text += u'|}\n' return text
def make_stats_for_no_group(self): """ Query the data for no_group, return the wikitext """ text = u'|-\n' if self.higher_grouping: text += u'|\n' total_no_count = self.get_totals_no_grouping() text += u'| No grouping \n' text += f('| {total_no_count} \n') for column_entry in self.columns: if isinstance(column_entry, PropertyConfig): property_name = column_entry.property if column_entry.qualifier: value = column_entry.value or '[]' column_count = self.get_qualifier_info_no_grouping( property_name, column_entry.qualifier, value) else: column_count = self.get_property_info_no_grouping( property_name) elif isinstance(column_entry, TextConfig): column_count = self._get_count_from_sparql( column_entry.get_info_no_grouping_query(self)) percentage = self._get_percentage(column_count, total_no_count) text += f( '| {{{{{self.cell_template}|{percentage}|{column_count}|column={column_entry.get_title()}|grouping={self.GROUP_MAPPING.NO_GROUPING.value}}}}}\n' ) # noqa return text
def get_query_for_items_for_property_positive(self, column, grouping): query = f(""" SELECT DISTINCT ?entity ?entityLabel ?value ?valueLabel WHERE {{ ?entity {self.selector_sparql} .""") if grouping == self.GROUP_MAPPING.TOTALS: pass elif grouping == self.GROUP_MAPPING.NO_GROUPING: query += f(""" MINUS {{ ?entity wdt:{self.grouping_property} [] . }}""") else: query += f(""" ?entity wdt:{self.grouping_property} wd:{grouping} .""") if column.startswith('P'): query += f(""" ?entity p:{column} ?prop . OPTIONAL {{ ?prop ps:{column} ?value }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} }} """) elif column.startswith('L') or column.startswith('D'): query += f(""" FILTER(EXISTS {{ ?entity {self.TEXT_SELECTOR_MAPPING[column[:1]]} ?lang_label. FILTER((LANG(?lang_label)) = "{column[1:]}"). }}) SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{column[1:]}". }} }} """) return query
def get_filter_for_info(self): if self.qualifier: property_value = f'wd:{self.value}' if self.value else '[]' return f(""" ?entity p:{self.property} [ ps:{self.property} {property_value} ; pq:{self.qualifier} [] ]""") else: return f(""" ?entity p:{self.property}[]""")
def __str__(self): #return ("{\"mac_address\":\"{0}\", \"max_time_out\":\"{1}\", \"last_time_seen\":\"{2}\", \"rssid\":\"{3}}").format(self.mac_address, self.max_time_out, self.last_time_seen, self.rssid) print( f('mac_address:{self.mac_address}, max_time_out:{self.max_time_out_in_seconds}, rssid:{self.rssid}' )) return (f( 'mac_address:{self.mac_address}, max_time_out:{self.max_time_out_in_seconds}, rssid:{self.rssid} dd' ))
def test_redirects_to_list_view(self): other_list = List.objects.create() correct_list = List.objects.create() response = self.client.post( f('/lists/{correct_list.id}/add_item'), data={'item_text': 'A new item for an existing list'}) self.assertRedirects(response, f('/lists/{correct_list.id}/'))
def gerar_base_imagens(self): linha_texto_gerado = self._gerar_linhas_texto() letras = [] for index, letra in enumerate(linha_texto_gerado): nome_fonte = self._retornar_fonte_aleatoria() fonte = ImageFont.truetype(f('{caminho_fontes}{nome_fonte}'), self.tamanho_fonte) self._criar_imagem(letra, fonte, f('{index}.jpg')) letras.append(f('{index}.jpg,{letra}')) self._criar_csv(letras)
def make_column_header(prop_entry): if prop_entry.qualifier: property_link = prop_entry.qualifier else: property_link = prop_entry.property if prop_entry.title: label = f('[[Property:{property_link}|{prop_entry.title}]]') else: label = f('{{{{Property|{property_link}}}}}') return f('! data-sort-type="number"|{label}\n')
def get_optim(cfg, parameters): if (cfg.optim.name == 'Adam'): optim = getattr(torch.optim, cfg.optim.name)(parameters, **cfg.optim.params) log(f('optim: {cfg.optim.name}')) return optim elif (cfg.optim.name == 'Ranger'): optim = Ranger(parameters, **cfg.optim.params) log(f('optim: {cfg.optim.name}')) return optim else: print("SPECIFY CORRECT OPTIMIZER")
def debugLog(self, logdata): # debugLog is something of a catch-all if we don't have a specific # logging function for the given data. It allows a log entry to be # passed to us for storage. if logdata["debugLevel"] >= logdata["minLevel"]: print( colored(logdata["logTime"] + " ", "yellow") + colored(f("{logdata['function']}"), "green") + colored(f(" {logdata['minLevel']} "), "cyan") + f("{logdata['message']}") ) return
def get_grouping_information(self): """ Get the information for a single grouping. :return: Tuple of two (ordered) dictionaries. """ if self.higher_grouping: query = f(""" SELECT ?grouping (SAMPLE(?_higher_grouping) as ?higher_grouping) (COUNT(DISTINCT ?entity) as ?count) WHERE {{ ?entity {self.selector_sparql} . ?entity wdt:{self.grouping_property} ?grouping . OPTIONAL {{ ?grouping {self.higher_grouping} ?_higher_grouping }}. }} GROUP BY ?grouping ?higher_grouping HAVING (?count > {self.grouping_threshold}) ORDER BY DESC(?count) LIMIT 1000 """) else: query = f(""" SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {{ ?entity {self.selector_sparql} . ?entity wdt:{self.grouping_property} ?grouping . }} GROUP BY ?grouping HAVING (?count > {self.grouping_threshold}) ORDER BY DESC(?count) LIMIT 1000 """) grouping_counts = collections.OrderedDict() grouping_groupings = collections.OrderedDict() sq = pywikibot.data.sparql.SparqlQuery() queryresult = sq.select(query) if not queryresult: raise QueryException("No result when querying groupings.") for resultitem in queryresult: qid = resultitem.get('grouping').replace( u'http://www.wikidata.org/entity/', u'') grouping_counts[qid] = int(resultitem.get('count')) if self.higher_grouping: value = resultitem.get('higher_grouping') if value: value = value.replace(u'http://www.wikidata.org/entity/', u'') grouping_groupings[qid] = value return (grouping_counts, grouping_groupings)
def home_view(request): next = request.GET.get('next') form = HomeForm(request.POST or None) if form.is_valid(): gender = form.cleaned_data.get('gender') age = form.cleaned_data.get('age') occupation = form.cleaned_data.get('occupation') city_category = form.cleaned_data.get('city_category') stay_years = form.cleaned_data.get('stay_years') #marital = form.cleaned_data.get('marital') product_id = form.cleaned_data.get('product_id') purchase = form.cleaned_data.get('purchase') messages.success(request, f('Valid inputs, good job!')) h = Person(gender=gender, age=age, occupation=occupation, city_category=city_category, stay_years=stay_years, product_id=product_id, purchase=purchase) a = [] a.append(h.gender) a.append(h.age) a.append(h.occupation) a.append(h.city_category) a.append(h.stay_years) #a.append(h.marital) a.append(h.product_id) a.append(h.purchase) e = [] e.append(a) print(e) d = Data.data_min(e) prediction = int(d[0]) print(type(d)) h.save() if (prediction == 1): messages.info(request, f('The Prediction is MARRIED')) elif (prediction == 0): messages.info(request, f('The Prediction is NOT MARRIED')) if next: return redirect(next) return redirect('/') context = { 'form': form, } return render(request, 'home_form.html', context)
def save_model(self): t.save(self.shared.state_dict(), self.shared_path) logger.info(f("[*] SAVED: {self.shared_path}")) t.save(self.controller.state_dict(), self.controller_path) logger.info(f("[*] SAVED: {self.controller_path}")) epochs, shared_steps, controller_steps = self.get_saved_models_info() for epoch in epochs[:-self.args.max_save_num]: paths = glob( os.path.join(self.args.model_dir, f('*_epoch{epoch}_*.pth'))) for path in paths: remove_file(path)
def load_bowl(self, base_path): """Generate the requested number of synthetic images. count: number of images to generate. height, width: the size of the generated images. """ self.add_class("bowl", 1, "nuclei") masks = dict() id_extractor = re.compile( f("{base_path}\{os.sep}(?P<image_id>.*)\{os.sep}masks\{os.sep}(?P<mask_id>.*)\.png" )) for mask_path in glob(os.path.join(base_path, "**", "masks", "*.png")): matches = id_extractor.match(mask_path) image_id = matches.group("image_id") image_path = os.path.join(base_path, image_id, "images", image_id + ".png") if image_path in masks: masks[image_path].append(mask_path) else: masks[image_path] = [mask_path] for i, (image_path, mask_paths) in enumerate(masks.items()): self.add_image("bowl", image_id=i, path=image_path, mask_paths=mask_paths)
def main(args): prepare_dirs(args) torch.manual_seed(args.random_seed) if args.num_gpu > 0: torch.cuda.manual_seed(args.random_seed) if args.network_type == 'rnn': dataset = data.text.Corpus(os.path.join(args.data_dir, args.dataset)) elif args.dataset == 'cifar': dataset = data.image.Image(os.path.join(args.data_dir, args.dataset)) else: raise NotImplemented(f("{args.dataset} is not supported")) trainer = Trainer(args, dataset) if args.mode == 'train': save_args(args) trainer.train() elif args.mode == 'derive': assert args.load_path != "", "`--load_path` should be given in `derive` mode" trainer.derive() else: if not args.load_path: raise Exception( "[!] You should specify `load_path` to load a pretrained model" ) trainer.test()
def __init__(self, args): if args.datset == 'cifar10': Dataset = datasets.CIFAR10 mean = [0.49139968, 0.48215827, 0.44653124] std = [0.24703233, 0.24348505, 0.26158768] normalize = transforms.Normalize(mean, std) transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) elif args.datset == 'MNIST': Dataset = datasets.MNIST else: raise NotImplemented(f("Unknown dataset: {args.dataset}")) self.train = t.utils.data.DataLoader( Dataset(root='./data', train=True, transform=transform, download=True), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) self.valid = t.utils.data.DataLoader( Dataset(root='./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) self.test = self.valid
def add_node(graph, node_id, label, shape='box', style='filled'): if label.startswith('x'): color = 'white' elif label.startswith('h'): color = 'skyblue' elif label == 'tanh': color = 'yellow' elif label == 'ReLU': color = 'pink' elif label == 'identity': color = 'orange' elif label == 'sigmoid': color = 'greenyellow' elif label == 'avg': color = 'seagreen3' else: color = 'white' if not any(label.startswith(word) for word in ['x', 'avg', 'h']): label = f("{label}\n({node_id})") graph.add_node( node_id, label=label, color='black', fillcolor=color, shape=shape, style=style, )
def get_reward(self, dag, entropies, valid_idx=None): if type(entropies) is not np.ndarray: entropies = entropies.data.cpu().numpy() if valid_idx: valid_idx = 0 inputs, targets = self.get_batch(self.valid_data, valid_idx, self.max_length) valid_loss = self.get_loss(inputs, targets, None, dag) valid_ppl = math.exp(valid_loss.data[0]) # TODO: we don't know reward_c if self.args.ppl_square: # TODO: but we do know reward_c=80 in the previous paper R = self.args.reward_c / valid_ppl**2 else: R = self.args.reward_c / valid_ppl if self.args.entropy_mode == 'reward': rewards = R + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = R * np.ones_like(entropies) else: raise NotImplemented( f("Unkown entropy mode: {self.args.entropy_mode}")) return rewards
def get_num_cell_parameters(self, dag): num = 0 num += size(self.w_xc) num += size(self.w_xh) q = deque() q.append(0) while True: if len(q) == 0: break node_id = q.popleft() nodes = dag[node_id] for next_node in nodes: next_id = next_node.id if next_id == self.args.num_blocks: assert len( nodes ) == 1, "parent of leaf node should have only one child" continue w_h = self.w_h[node_id][next_id] w_c = self.w_c[node_id][next_id] num += size(w_h) num += size(w_c) q.append(next_id) logger.debug( f("# of cell parameters: {format(self.num_parameters, ',d')}")) return num
def replace_in_page(self, output, page_text): regex_text = f( '({{{{{self.template_name}.*?}}}}).*?({{{{{self.end_template_name}}}}})' ) regex = re.compile(regex_text, re.MULTILINE | re.DOTALL) new_text = re.sub(regex, r'\1\n%s\n\2' % output, page_text, count=1) return new_text
def get_qualifier_info(self, property, qualifier, value="[]"): """ Get the usage counts for a qulifier for the groupings :param property: Wikidata Pid of the property :param qualifier: Wikidata Pid of the qualifier :return: (Ordered) dictionary with the counts per grouping """ query = f(""" SELECT ?grouping (COUNT(DISTINCT ?entity) as ?count) WHERE {{ ?entity {self.selector_sparql} . ?entity wdt:{self.grouping_property} ?grouping . FILTER EXISTS {{ ?entity p:{property} [ ps:{property} {value} ; pq:{qualifier} [] ] }} . }} GROUP BY ?grouping HAVING (?count > {self.property_threshold}) ORDER BY DESC(?count) LIMIT 1000 """) result = collections.OrderedDict() sq = pywikibot.data.sparql.SparqlQuery() queryresult = sq.select(query) if not queryresult: return None for resultitem in queryresult: qid = resultitem.get('grouping').replace( u'http://www.wikidata.org/entity/', u'') result[qid] = int(resultitem.get('count')) return result
def get_totals(self): query = f(""" SELECT (COUNT(?item) as ?count) WHERE {{ ?item {self.selector_sparql} }} """) return self._get_count_from_sparql(query)
def make_stats_for_one_grouping(self, grouping, item_count, higher_grouping): """ Query the data for one group, return the wikitext. """ text = u'|-\n' if self.higher_grouping: if higher_grouping: text += self.format_higher_grouping_text(higher_grouping) else: text += u'|\n' if grouping in self.GROUP_MAPPING.__members__: text += u'| %s\n' % ( self.GROUP_MAPPING.__members__.get(grouping).value, ) elif self.grouping_type == GroupingType.YEAR: text += u'| %s\n' % (grouping, ) else: text += u'| {{Q|%s}}\n' % (grouping, ) if self.grouping_link: try: group_item = pywikibot.ItemPage(self.repo, grouping) group_item.get() label = group_item.labels["en"] except (pywikibot.exceptions.InvalidTitle, KeyError): logging.info(f("Could not retrieve label for {grouping}")) label = grouping text += f('| [[{self.grouping_link}/{label}|{item_count}]] \n') else: text += f('| {item_count} \n') for column_entry in self.columns: column_entry_key = column_entry.get_key() try: column_count = self.column_data.get(column_entry_key).get( grouping) except AttributeError: column_count = 0 if not column_count: column_count = 0 percentage = self._get_percentage(column_count, item_count) text += f( '| {{{{{self.cell_template}|{percentage}|{column_count}|column={column_entry.get_title()}|grouping={grouping}}}}}\n' ) # noqa return text
def get_totals_no_grouping(self): query = f(""" SELECT (COUNT(?item) as ?count) WHERE {{ ?item {self.selector_sparql} MINUS {{ ?item wdt:{self.grouping_property} _:b28. }} }} """) return self._get_count_from_sparql(query)