Exemplo n.º 1
0
def main():
    logging.basicConfig(level=logging.DEBUG)
    personas_file = PersonasInput()
    personas_file.open_csv(sys.argv[1])
    personas = personas_file.read()
    buscador = BuscadorDiasporas()
    diaspora_output = DiasporaOutput(sys.argv[2])
    for p in personas:
        diaspora_output.open_person(p)
        logging.info ('batch_diaspora_search::processing '+ p.nombre)
        r = reloj()
        buscador.inicia(p.nombre, p.vinculo)
        
        #********
        # PIPELINE geo
        #********
        resultado_busqueda = buscador.genera_busquedas_geograficas(p.lugares)
        resultado_busqueda.filtra_nominal(p.nombre)
        r.stop()
        ps = PipelineStats()
        ps.type = 'geo'
        ps.total_queries = resultado_busqueda.total_queries
        ps.total_snippets = len(resultado_busqueda.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_busqueda.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_busqueda.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()
Exemplo n.º 2
0
def main():
    logging.basicConfig(level=logging.DEBUG)
    personas_file = PersonasInput()
    personas_file.open_csv(sys.argv[1])
    personas = personas_file.read()
    buscador = BuscadorDiasporas()
    diaspora_output = DiasporaOutput(sys.argv[2])
    for p in personas:
        diaspora_output.open_person(p)
        logging.info ('batch_diaspora_search::processing '+ p.nombre)
        r = reloj()
        buscador.inicia(p.nombre, p.vinculo)        
        
        #********
        # PIPELINE orgs
        #********
        resultado_busqueda = buscador.genera_busquedas_organizacionales(p.orgs)
        resultado_busqueda.filtra_nominal(p.nombre)
        r.stop()
        ps = PipelineStats()
        ps.type = 'orgs'
        ps.total_queries = resultado_busqueda.total_queries
        ps.total_snippets = len(resultado_busqueda.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_busqueda.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_busqueda.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()
Exemplo n.º 3
0
def main():
    buscador = BuscadorDiasporas()
    #TODO: leer los 23 según el nuevo formato batch
    # 
    name = 'Maria Villamil'
    topics = 'Potential Miscanthus Adoption in Illinois: Information Needs and Preferred Information Channels; Crop Sciences'
    #las organizaciones se separan con ;
    orgs = 'College of Agricultural, Consumer, and Environmental Sciences; University of Illinois at Urbana-Champaign'
    places = 'Illinois, Urbana-Champaign'
    resultado = buscador.genera_busquedas(name, topics, orgs, places)
    print 'total de snippets encontrados=', str(resultado.total)
    resultado.filtra_nominal(name)
    resultado.filtra_tematico(topics)

    # ordenado = resultado.seleccion()
    # ordenado = resultado.conjunto_resultante
    # if len(ordenado)>0:
    #     for s in reversed(ordenado):
    #         print '--------------------------------'
    #         print '|SNIPPET_TITLE::', s.title
    #         print '|SNIPPET_DESCRIPTION::', s.description
    #         print '|SNIPPET_LINK::', s.link
    #         print '|SNIPPET:ESA SCORE::', str(s.filter_status.semantic)
    #         print '---------------------------------'

    if len(resultado.conjunto_resultante)>0:
        ordenado = resultado.ordena_conjunto_resultante('query')
        for s in ordenado:
            print '--------------------------------'
            print '|SNIPPET_QUERY::', s.query
            print '|SNIPPET_TITLE::', s.title
            print '|SNIPPET_DESCRIPTION::', s.description
            print '|SNIPPET_LINK::', s.link
            print '|SNIPPET:ESA SCORE::', str(s.filter_status.semantic)
            print '---------------------------------'
        print '>>>>> ' +str(len(ordenado))+ '<<<<< RESULTING SNIPPETS'
Exemplo n.º 4
0
def main():
    logging.basicConfig(level=logging.DEBUG)
    personas_file = PersonasInput()
    personas_file.open_csv(sys.argv[1])
    personas = personas_file.read()
    buscador = BuscadorDiasporas()
    
    output_folder = sys.argv[2]
    for p in personas:
        logging.info ('batch_diaspora_search::processing '+ p.nombre)
        #********
        # PIPELINE name
        #********
        name_output_folder = output_folder+'/results_name'
        diaspora_output = DiasporaOutput(name_output_folder)
        diaspora_output.open_person(p)
 
        r = reloj()
        buscador.inicia(p.nombre, p.vinculo)
        r.start()
        resultado_name = buscador.genera_busquedas_nominales()  
        name_list = resultado_name.filtra_nominal(p.nombre)        
        r.stop()
        logging.debug('converging::name_list.snippets = ' +str(len(name_list)))        
        
        
        ps = PipelineStats()
        ps.type = 'name'
        ps.total_queries = resultado_name.total_queries
        ps.total_snippets = len(resultado_name.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_name.vinculo_encontrado        
        diaspora_output.write_pipeline(ps, list(resultado_name.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()

        #********
        # PIPELINE geo
        #********
        geo_output_folder = output_folder+'/results_geo'
        diaspora_output = DiasporaOutput(geo_output_folder)
        diaspora_output.open_person(p)

        r.start()
        resultado_geo = buscador.genera_busquedas_geograficas(p.lugares)
        geo_list=resultado_geo.filtra_nominal(p.nombre)
        r.stop()
        logging.debug('converging::resultado_geo.snippets = ' +str(len(geo_list)))
        ps = PipelineStats()
        ps.type = 'geo'
        ps.total_queries = resultado_geo.total_queries
        ps.total_snippets = len(resultado_geo.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_geo.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_geo.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()

        #********
        # PIPELINE topics 
        #********
        topics_output_folder = output_folder+'/results_topics'
        diaspora_output = DiasporaOutput(topics_output_folder)
        diaspora_output.open_person(p)
        r.start()
        resultado_topics = buscador.genera_busquedas_tematicas(p.temas)
        topics_list = resultado_topics.filtra_nominal(p.nombre)
        r.stop()
        logging.debug('converging::topics_list = ' +str(len(topics_list)))        
        ps = PipelineStats()
        ps.type = 'topics'
        ps.total_queries = resultado_topics.total_queries
        ps.total_snippets = len(resultado_topics.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_topics.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_topics.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()

        #********
        # PIPELINE orgs
        #********
        orgs_output_folder = output_folder+'/results_orgs'
        diaspora_output = DiasporaOutput(orgs_output_folder)
        diaspora_output.open_person(p)

        r.start()
        resultado_orgs = buscador.genera_busquedas_organizacionales(p.orgs)
        orgs_list = resultado_orgs.filtra_nominal(p.nombre)
        r.stop()
        logging.debug('converging::resultado_orgs.snippets = ' +str(len(orgs_list)))
        ps = PipelineStats()
        ps.type = 'orgs'
        ps.total_queries = resultado_orgs.total_queries
        ps.total_snippets = len(resultado_orgs.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_orgs.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_orgs.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()
                
        #********
        # NEW PIPELINE convergent
        #********
        conv_output_folder = output_folder+'/results_converging'
        diaspora_output = DiasporaOutput(conv_output_folder)
        diaspora_output.open_person(p)

        r.start()
        name_links = set([])
        geo_links = set([])
        orgs_links = set([])
        topics_links = set([])

        unique_snippets_list = name_list + geo_list + orgs_list + topics_list
        logging.debug('converging::len(name_snippets_set)= ' +str(len(name_list)))
        logging.debug('converging::len(geo_snippets_set)= ' +str(len(geo_list)))
        logging.debug('converging::len(topics_snippets_set)= ' +str(len(topics_list)))
        logging.debug('converging::len(orgs_snippets_set)= ' +str(len(orgs_list)))
        logging.debug('converging::len(unique_snippets_set)= ' +str(len(unique_snippets_list)))
        
        for s in name_list:
            name_links.add(s.link)
        for s in geo_list:
            geo_links.add(s.link)
        for s in orgs_list:
            orgs_links.add(s.link)
        for s in topics_list:
            topics_links.add(s.link)    
       
        convergent_4 = []
        convergent_3 = []
        convergent_2 = []
        convergent_1 = []

        for s in unique_snippets_list:
            logging.debug('for s in unique_snippets_list.query=' +s.query)
            if (s.link not in name_links) and (s.link not in geo_links) and (s.link not in orgs_links) and (s.link not in topics_links):
                continue
            if s.link in name_links and s.link in orgs_links and s.link in topics_links and s.link in geo_links:
                convergent_4.append(s)
                name_links.remove(s.link)
                orgs_links.remove(s.link)
                geo_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in name_links) and (s.link in geo_links) and (s.link in orgs_links):
                convergent_3.append(s)
                name_links.remove(s.link)
                geo_links.remove(s.link)
                orgs_links.remove(s.link)
            elif (s.link in geo_links) and (s.link in orgs_links) and (s.link in topics_links):
                convergent_3.append(s)
                geo_links.remove(s.link)
                orgs_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in orgs_links) and (s.link in topics_links) and (s.link in name_links):
                convergent_3.append(s)
                orgs_links.remove(s.link)
                topics_links.remove(s.link)
                name_links.remove(s.link)
            elif (s.link in name_links) and (s.link in geo_links) and (s.link in topics_links):
                convergent_3.append(s)
                name_links.remove(s.link)
                geo_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in name_links) and (s.link in geo_links):
                convergent_2.append(s)
                name_links.remove(s.link)
                geo_links.remove(s.link)
            elif (s.link in name_links) and (s.link in orgs_links):
                convergent_2.append(s)
                name_links.remove(s.link)
                orgs_links.remove(s.link)
            elif (s.link in name_links) and (s.link in topics_links):
                convergent_2.append(s)
                name_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in geo_links) and (s.link in orgs_links):
                convergent_2.append(s)
                geo_links.remove(s.link)
                orgs_links.remove(s.link)
            elif (s.link in geo_links) and (s.link in topics_links):
                convergent_2.append(s)
                geo_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in orgs_links) and (s.link in topics_links):
                convergent_2.append(s)
                orgs_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in name_links):
                convergent_1.append(s)
                name_links.remove(s.link)
            elif (s.link in geo_links):
                convergent_1.append(s)
                geo_links.remove(s.link)
            elif (s.link in orgs_links):
                convergent_1.append(s)
                orgs_links.remove(s.link)
            elif (s.link in topics_links):
                convergent_1.append(s)
                topics_links.remove(s.link)

        r.stop()

        
        unique_link_set = set([])
        unique_convergent_4 = set([])
        unique_convergent_3 = set([])
        unique_convergent_2 = set([])
        unique_convergent_1 = set([])
        repeated = set([])


         
        for s in convergent_4:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_4.add(s)
                logging.debug ('convergent_4 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)
            else:
                repeated.add(s)
                logging.debug ('repeated_4 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)

        ps4 = PipelineStats()
        ps4.type = 'converging pipelines 4'
        diaspora_output.write_converging_pipeline(ps4, list(unique_convergent_4), 4)
            
        for s in convergent_3:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_3.add(s)
                logging.debug ('convergent_3 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)
            else:
                repeated.add(s)
                logging.debug ('repeated_3 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)

        ps3 = PipelineStats()
        ps3.type = 'converging pipelines 3'
        diaspora_output.write_converging_pipeline(ps3, list(unique_convergent_3), 3)
            

        for s in convergent_2:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_2.add(s)
                logging.debug ('convergent_2 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)
            else:
                repeated.add(s)
                logging.debug ('repeated_2 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)

        ps2 = PipelineStats()
        ps2.type = 'converging pipelines 2'
        diaspora_output.write_converging_pipeline(ps2, list(unique_convergent_2), 2)
            
                
        for s in convergent_1:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_1.add(s)
                logging.debug ('convergent_1 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)
            else:
                repeated.add(s)
                logging.debug ('repeated_1 snippet=' +s.query+ ' title= ' +s.title+ ' ==> ' +s.link)


        ps1 = PipelineStats()
        ps1.type = 'converging pipelines 1'
        diaspora_output.write_converging_pipeline(ps1, list(unique_convergent_1), 1)
            
        diaspora_output.close_person()
Exemplo n.º 5
0
def main():
    logging.basicConfig(level=logging.DEBUG)
    personas_file = PersonasInput()
    personas_file.open_csv(sys.argv[1])
    personas = personas_file.read()
    buscador = BuscadorDiasporas()

    output_folder = sys.argv[2]
    for p in personas:
        logging.info('batch_diaspora_search::processing ' + p.nombre)
        #********
        # PIPELINE name
        #********
        name_output_folder = output_folder + '/results_name'
        diaspora_output = DiasporaOutput(name_output_folder)
        diaspora_output.open_person(p)

        r = reloj()
        buscador.inicia(p.nombre, p.vinculo)
        r.start()
        resultado_name = buscador.genera_busquedas_nominales()
        name_list = resultado_name.filtra_nominal(p.nombre)
        r.stop()
        logging.debug('converging::name_list.snippets = ' +
                      str(len(name_list)))

        ps = PipelineStats()
        ps.type = 'name'
        ps.total_queries = resultado_name.total_queries
        ps.total_snippets = len(resultado_name.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_name.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_name.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()

        #********
        # PIPELINE geo
        #********
        geo_output_folder = output_folder + '/results_geo'
        diaspora_output = DiasporaOutput(geo_output_folder)
        diaspora_output.open_person(p)

        r.start()
        resultado_geo = buscador.genera_busquedas_geograficas(p.lugares)
        geo_list = resultado_geo.filtra_nominal(p.nombre)
        r.stop()
        logging.debug('converging::resultado_geo.snippets = ' +
                      str(len(geo_list)))
        ps = PipelineStats()
        ps.type = 'geo'
        ps.total_queries = resultado_geo.total_queries
        ps.total_snippets = len(resultado_geo.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_geo.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_geo.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()

        #********
        # PIPELINE topics
        #********
        topics_output_folder = output_folder + '/results_topics'
        diaspora_output = DiasporaOutput(topics_output_folder)
        diaspora_output.open_person(p)
        r.start()
        resultado_topics = buscador.genera_busquedas_tematicas(p.temas)
        topics_list = resultado_topics.filtra_nominal(p.nombre)
        r.stop()
        logging.debug('converging::topics_list = ' + str(len(topics_list)))
        ps = PipelineStats()
        ps.type = 'topics'
        ps.total_queries = resultado_topics.total_queries
        ps.total_snippets = len(resultado_topics.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_topics.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_topics.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()

        #********
        # PIPELINE orgs
        #********
        orgs_output_folder = output_folder + '/results_orgs'
        diaspora_output = DiasporaOutput(orgs_output_folder)
        diaspora_output.open_person(p)

        r.start()
        resultado_orgs = buscador.genera_busquedas_organizacionales(p.orgs)
        orgs_list = resultado_orgs.filtra_nominal(p.nombre)
        r.stop()
        logging.debug('converging::resultado_orgs.snippets = ' +
                      str(len(orgs_list)))
        ps = PipelineStats()
        ps.type = 'orgs'
        ps.total_queries = resultado_orgs.total_queries
        ps.total_snippets = len(resultado_orgs.snippets)
        ps.tiempo_proceso = r.tiempo()[0]
        ps.encontro_vinculo = resultado_orgs.vinculo_encontrado
        diaspora_output.write_pipeline(ps, list(resultado_orgs.snippets))
        if ps.encontro_vinculo:
            diaspora_output.close_person()
            continue
        diaspora_output.close_person()

        #********
        # NEW PIPELINE convergent
        #********
        conv_output_folder = output_folder + '/results_converging'
        diaspora_output = DiasporaOutput(conv_output_folder)
        diaspora_output.open_person(p)

        r.start()
        name_links = set([])
        geo_links = set([])
        orgs_links = set([])
        topics_links = set([])

        unique_snippets_list = name_list + geo_list + orgs_list + topics_list
        logging.debug('converging::len(name_snippets_set)= ' +
                      str(len(name_list)))
        logging.debug('converging::len(geo_snippets_set)= ' +
                      str(len(geo_list)))
        logging.debug('converging::len(topics_snippets_set)= ' +
                      str(len(topics_list)))
        logging.debug('converging::len(orgs_snippets_set)= ' +
                      str(len(orgs_list)))
        logging.debug('converging::len(unique_snippets_set)= ' +
                      str(len(unique_snippets_list)))

        for s in name_list:
            name_links.add(s.link)
        for s in geo_list:
            geo_links.add(s.link)
        for s in orgs_list:
            orgs_links.add(s.link)
        for s in topics_list:
            topics_links.add(s.link)

        convergent_4 = []
        convergent_3 = []
        convergent_2 = []
        convergent_1 = []

        for s in unique_snippets_list:
            logging.debug('for s in unique_snippets_list.query=' + s.query)
            if (s.link not in name_links) and (s.link not in geo_links) and (
                    s.link not in orgs_links) and (s.link not in topics_links):
                continue
            if s.link in name_links and s.link in orgs_links and s.link in topics_links and s.link in geo_links:
                convergent_4.append(s)
                name_links.remove(s.link)
                orgs_links.remove(s.link)
                geo_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in name_links) and (s.link
                                             in geo_links) and (s.link
                                                                in orgs_links):
                convergent_3.append(s)
                name_links.remove(s.link)
                geo_links.remove(s.link)
                orgs_links.remove(s.link)
            elif (s.link in geo_links) and (s.link in orgs_links) and (
                    s.link in topics_links):
                convergent_3.append(s)
                geo_links.remove(s.link)
                orgs_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in orgs_links) and (s.link in topics_links) and (
                    s.link in name_links):
                convergent_3.append(s)
                orgs_links.remove(s.link)
                topics_links.remove(s.link)
                name_links.remove(s.link)
            elif (s.link in name_links) and (s.link in geo_links) and (
                    s.link in topics_links):
                convergent_3.append(s)
                name_links.remove(s.link)
                geo_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in name_links) and (s.link in geo_links):
                convergent_2.append(s)
                name_links.remove(s.link)
                geo_links.remove(s.link)
            elif (s.link in name_links) and (s.link in orgs_links):
                convergent_2.append(s)
                name_links.remove(s.link)
                orgs_links.remove(s.link)
            elif (s.link in name_links) and (s.link in topics_links):
                convergent_2.append(s)
                name_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in geo_links) and (s.link in orgs_links):
                convergent_2.append(s)
                geo_links.remove(s.link)
                orgs_links.remove(s.link)
            elif (s.link in geo_links) and (s.link in topics_links):
                convergent_2.append(s)
                geo_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in orgs_links) and (s.link in topics_links):
                convergent_2.append(s)
                orgs_links.remove(s.link)
                topics_links.remove(s.link)
            elif (s.link in name_links):
                convergent_1.append(s)
                name_links.remove(s.link)
            elif (s.link in geo_links):
                convergent_1.append(s)
                geo_links.remove(s.link)
            elif (s.link in orgs_links):
                convergent_1.append(s)
                orgs_links.remove(s.link)
            elif (s.link in topics_links):
                convergent_1.append(s)
                topics_links.remove(s.link)

        r.stop()

        unique_link_set = set([])
        unique_convergent_4 = set([])
        unique_convergent_3 = set([])
        unique_convergent_2 = set([])
        unique_convergent_1 = set([])
        repeated = set([])

        for s in convergent_4:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_4.add(s)
                logging.debug('convergent_4 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)
            else:
                repeated.add(s)
                logging.debug('repeated_4 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)

        ps4 = PipelineStats()
        ps4.type = 'converging pipelines 4'
        diaspora_output.write_converging_pipeline(ps4,
                                                  list(unique_convergent_4), 4)

        for s in convergent_3:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_3.add(s)
                logging.debug('convergent_3 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)
            else:
                repeated.add(s)
                logging.debug('repeated_3 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)

        ps3 = PipelineStats()
        ps3.type = 'converging pipelines 3'
        diaspora_output.write_converging_pipeline(ps3,
                                                  list(unique_convergent_3), 3)

        for s in convergent_2:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_2.add(s)
                logging.debug('convergent_2 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)
            else:
                repeated.add(s)
                logging.debug('repeated_2 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)

        ps2 = PipelineStats()
        ps2.type = 'converging pipelines 2'
        diaspora_output.write_converging_pipeline(ps2,
                                                  list(unique_convergent_2), 2)

        for s in convergent_1:
            if s.link not in unique_link_set:
                unique_link_set.add(s.link)
                unique_convergent_1.add(s)
                logging.debug('convergent_1 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)
            else:
                repeated.add(s)
                logging.debug('repeated_1 snippet=' + s.query + ' title= ' +
                              s.title + ' ==> ' + s.link)

        ps1 = PipelineStats()
        ps1.type = 'converging pipelines 1'
        diaspora_output.write_converging_pipeline(ps1,
                                                  list(unique_convergent_1), 1)

        diaspora_output.close_person()