Exemplo n.º 1
0
def _test():
    from testdata import pseudo_shingakunet
    model =  transition_model(data = pseudo_shingakunet(), 
                              goal = 'https://shingakunet.com/net2/shiryoSeikyu/entry/complete')
    return model
Exemplo n.º 2
0
                elif netloc_ratio >= upper_criteria_url and path_ratio >= upper_criteria_url: #クエリが長過ぎて一致条件に当てはまらないものを救う
                    if query_ratio >= lower_criteria_url * 0.50:
                        states[state].append(url)
                        adopted_flag = True
                        accumulated += uu
                        break
                        
                        
            #以前に採用したstateとマージされなかった場合は新規stateとして採用する
            if not adopted_flag:
                states.setdefault(url, [])
                states[url].append(url)
                accumulated += uu
        #条件を満たしたらマージをやめる
        if len(states) >= max_state + accumulated_states or accumulated >= minimum_volume * total_uu:
            print 'states:', len(states) - accumulated_states, 'volume:', float(accumulated) / float(total_uu)
            break
            
    for state, url in states.items():
        states[state] = list(set(states[state]))
    #uu_volumeが十分にとれなかった場合には何らかの方法で救済する?
    return states

if __name__ == '__main__':
    from get_model import make_uu_base_data
    from testdata import pseudo_shingakunet
    states = summarize(make_uu_base_data(pseudo_shingakunet()),
                       'https://shingakunet.com/net2/shiryoSeikyu/entry/complete')
    for state, sub_states in states.items():
            print 'state:', state, 'sub-state:', sub_states