def HSS(clim_data, hindcast, n_dry = 10, n_wet = 10):
    from utils import categorize
    from numpy import argsort

    #_Categorize the data
    clim_cat = categorize(clim_data)
    hind_cat = categorize(hindcast)

    #_Sort the observations
    idx = argsort(clim_data)

    #_Create indices for the all, dry, and wet categories
    index = {   'all' : idx,
                'dry' : idx[:n_dry],
                'wet' : idx[-n_wet:]
                                    }

    #_Initialize the skill score dictionary
    hss = {}

    #_Calculate HSS for each category
    for cat in index:
        total = len(index[cat])
        exp_hits = total / 3.
        hits = sum(clim_cat[index[cat]] == hind_cat[index[cat]])
        hss[cat] = float(hits - exp_hits) / (total - exp_hits)

        ### Optional printing ###
        # print '%s years: total is %i, exp_hits is %.2f, hits is %i, hss is %.2f' \
        #             % (yrs, total, exp_hits, hits, hss[yrs])

    return hss
示例#2
0
    def _get_yacc_variables(self, module):
        frame, variables = get_global_vars(module)
        filter_rules = [lambda x: x == "tokens",
                        lambda x: x.startswith("p_")]
        variables = sorted(filter_variables(filter_rules, variables),
                           key=partial(by_appearance, frame))
        tokens, production_rules = categorize(filter_rules, variables)

        if len(tokens) == 0:
            raise SyntaxError("'tokens' (a tuple of strings) must be defined")

        return tokens[-1][-1], production_rules  # tokens = [('tokens', (...))]
示例#3
0
文件: lex.py 项目: dfridman1/lex-yacc
    def get_lexer_variables(self, module=None):
        frame, variables = get_global_vars(module)
        filter_rules = [lambda x: x == "tokens",
                        lambda x: x == "states",
                        lambda x: x.startswith("t_")]
        variables = sorted(filter_variables(filter_rules, variables),
                           key=partial(by_appearance, frame))
        token_names, states, t_rules = categorize(filter_rules, variables)

        if len(token_names) == 0:
            raise SyntaxError("'tokens' (a tuple of strings) must be defined")

        token_names = token_names[0] if token_names else []
        states = states[0] if states else []
        return token_names, states, t_rules
示例#4
0
                insert_item({'id': article.id}, 'errorArticles')
                continue

            try:
                article.download()
                article.parse()

                title = article.title
                title_split = article.title.split('|')

                if len(title_split) != 1:
                    title = title_split[0].strip()

                pattern = re.compile(source.brand, re.IGNORECASE)
                body = pattern.sub('', article.text)
                categories = categorize(body)

                try:
                    # if langdetect.detect(body) != 'en' or langdetect.detect(title) != 'en':
                    if langdetect.detect(body) != 'en':
                        if PY_ENV == 'development':
                            print('\n(NOT ENGLISH) Skipped: ' +
                                  str(article.url) + '\n')
                        slp_time = insert_log(
                            source_id, 'articleCrawl', 'error',
                            float(time.clock() - start_time), {
                                'articleUrl': article.url,
                                'articleTitle': title,
                                'errorMessage': 'NOT ENGLISH',
                                'crawlerName': 'credible crawler'
                            })