def process_models(models):
    result = []
    for model in models:
        for model_ in models:
            if model != model_:
                result.append(mc_difference_logic.diff_markov_chains(model, model_))
    log.info(sum([el['content'] for el in result]))
    return result
def big_differences():
    log.info('extract messages')

    users = main_db.get_not_loaded_users()

    model_main = markov_chain('main', booster)
    result = []

    log.info('---------users to find is %s-------------------------------' % len(users))
    loaded_users = []
    for user in users:
        log.info('load user %s' % user)
        loaded_user = engine.scrap(user, neighbourhood=0)
        if not loaded_user:
            continue
        main_db.set_user_loaded(user)
        model_main = create_model(loaded_user, mc=model_main)
        create_model(loaded_user)
        loaded_users.append(loaded_user)

    log.info('---------start process differences of models--------------')
    for user in loaded_users:
        model_current = markov_chain.create(user.name_, booster)
        diff_element = diff_markov_chains(model_main, model_current)
        result.append({'name': user.name_, 'x': diff_element['content'], 'y': user.timeline_count})
        log.info('create difference... %s' % diff_element['content'])

    diff_main = diff_markov_chains(model_main, model_main)
    nodes, edges = model_main.get_unique_nodes_edges()
    model_diffs = [
            {'x': diff_main['content'], 'y': float(edges) / nodes},
    ]
    vis.visualise(result,
                  header='diff and tweets count',
                  x_title='difference between this and main',
                  y_title='count tweets',
                  spec_symbols=model_diffs)

    model_main.visualise(100)
def clust(models):
    out = []
    for mc in models:
        t1 = time.time()

        nearest = max([{el: diff_markov_chains(mc.model_id_, el.model_id_, booster)} for el in models if el != mc],
            key=lambda x: x.values()[0])
        nearest.keys()[0].print_me()
        print nearest.values()[0]
        new_mc_id = booster.sum_models(mc.model_id_, nearest.keys()[0].model_id_)
        new_mc = markov_chain(new_mc_id, booster)
        out.append(new_mc)

        t2 = time.time()
        print 'time: ', t2 - t1
    return clust(out)
    timeline2 = tools.flush(user2.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10]
    print len(timeline1)
    print len(timeline2)
    mc1 = markov_chain_machine.create_model(timeline1, user_name1, boost)
    mc2 = markov_chain_machine.create_model(timeline2, user_name2, boost)

    return mc1, mc2


def form_timeline(user_timeline):
    true_timeline = tools.flush(user_timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))
    return true_timeline

if __name__ == '__main__':
#    models = get_users_data('navalny', 'MedvedevRussia')
#    print diff_markov_chains(models[0], models[1])
#    engine.get_relations_of_user('navalny')

#    user = engine.get_user_info('GoogleRussia')
#    db.save_user(user.serialise())
#    user = db.get_user({'name_':'@GoogleRussia'})
#
#    print len(user.timeline)
#    print user.timeline_count

    user = db.get_user({'name_': '@GoogleRussia'})
    time_line = form_timeline(user.timeline)
    mc = markov_chain_machine.create_model(time_line,user.name_,boost)
    mc.print_me()
    diff_markov_chains(mc,mc)
def test_difference_logic(markov_chain_l, markov_chain_r):
    difference_element = diff_markov_chains(markov_chain_l, markov_chain_r)
    log.info('difference element is: %s' % difference_element)