def HSS(clim_data, hindcast, n_dry = 10, n_wet = 10): from utils import categorize from numpy import argsort #_Categorize the data clim_cat = categorize(clim_data) hind_cat = categorize(hindcast) #_Sort the observations idx = argsort(clim_data) #_Create indices for the all, dry, and wet categories index = { 'all' : idx, 'dry' : idx[:n_dry], 'wet' : idx[-n_wet:] } #_Initialize the skill score dictionary hss = {} #_Calculate HSS for each category for cat in index: total = len(index[cat]) exp_hits = total / 3. hits = sum(clim_cat[index[cat]] == hind_cat[index[cat]]) hss[cat] = float(hits - exp_hits) / (total - exp_hits) ### Optional printing ### # print '%s years: total is %i, exp_hits is %.2f, hits is %i, hss is %.2f' \ # % (yrs, total, exp_hits, hits, hss[yrs]) return hss
def _get_yacc_variables(self, module): frame, variables = get_global_vars(module) filter_rules = [lambda x: x == "tokens", lambda x: x.startswith("p_")] variables = sorted(filter_variables(filter_rules, variables), key=partial(by_appearance, frame)) tokens, production_rules = categorize(filter_rules, variables) if len(tokens) == 0: raise SyntaxError("'tokens' (a tuple of strings) must be defined") return tokens[-1][-1], production_rules # tokens = [('tokens', (...))]
def get_lexer_variables(self, module=None): frame, variables = get_global_vars(module) filter_rules = [lambda x: x == "tokens", lambda x: x == "states", lambda x: x.startswith("t_")] variables = sorted(filter_variables(filter_rules, variables), key=partial(by_appearance, frame)) token_names, states, t_rules = categorize(filter_rules, variables) if len(token_names) == 0: raise SyntaxError("'tokens' (a tuple of strings) must be defined") token_names = token_names[0] if token_names else [] states = states[0] if states else [] return token_names, states, t_rules
insert_item({'id': article.id}, 'errorArticles') continue try: article.download() article.parse() title = article.title title_split = article.title.split('|') if len(title_split) != 1: title = title_split[0].strip() pattern = re.compile(source.brand, re.IGNORECASE) body = pattern.sub('', article.text) categories = categorize(body) try: # if langdetect.detect(body) != 'en' or langdetect.detect(title) != 'en': if langdetect.detect(body) != 'en': if PY_ENV == 'development': print('\n(NOT ENGLISH) Skipped: ' + str(article.url) + '\n') slp_time = insert_log( source_id, 'articleCrawl', 'error', float(time.clock() - start_time), { 'articleUrl': article.url, 'articleTitle': title, 'errorMessage': 'NOT ENGLISH', 'crawlerName': 'credible crawler' })