Exemplo n.º 1
0
    def handle(self, *args, **options):
        contador = 0
        total = 50
        materias = Materia.objects.filter(corpo__icontains="saibamais")[:total]
        # materias = [Materia.objects.get(id=863)]

        i = 0

        for m in materias:
            print "\n", m.titulo
            documento = get_document(m)
            # html = lhtml.fromstring(m.corpo.decode('utf-8'))
            # documento['relacionadas'] = set([ change_host(h.attrib['href']) for h in html.cssselect('.saibamais ul li a')])

            recomendadas = [str(recomendada.permalink) for recomendada in relacionadas(documento, 4)]
            for r in recomendadas:
                urls = [u.origem for u in Analytics.objects.filter(destino=r).order_by("pageviews")]
                documents = [(tf(m.corpo).keys(), "dentro") for m in Materia.objects.filter(permalink__in=urls)]

                prob = 0
                if documents:
                    documents += [
                        (tf(m.corpo).keys(), "fora")
                        for m in Materia.objects.exclude(permalink__in=urls)[: len(documents)]
                    ]
                    prob = classifica(m, documents)
                    # print i, round(prob, 2)
                    if prob > 0.7:
                        print round(prob, 2), r
                    i += 1
Exemplo n.º 2
0
    def handle(self, *args, **options):
        import time
        inicio = time.time()
        
        settings.CACHE = True
        
        # if options['editoria']:
        #     folder = Folder.objects.get(name=options['editoria'])
        #     materias = Materia.objects.filter(status='T', folders=folder)
        # else:
        #     materias = Materia.objects.filter(status='T')
            
        # materias = materias[:options['total']]

        materias = Materia.objects.filter(corpo__icontains='saibamais')[:100]

        # editorias_id = [39,31,119,214,339,216,146,
        #                 8,133,101,94,20,42,76,105]
        # for f in editorias_id:
        #     folder = Folder.objects.get(id=f)
        #     materias = Materia.objects.filter(status='T', folders=folder)[:total]
    
        contador = 0
        combinacoes = []
        dict_combinacoes = {}
        seq = options['sequential']
        for tam_comb in range(len(seq)):
            for comb in itertools.combinations(seq,tam_comb+1):
                combinacoes.append("".join(comb))


        for m in materias:
            contador+=1
            # print contador, time.time() -inicio

            mfolder = m.primary_folder().name
            if not dict_combinacoes.has_key(mfolder):
                dict_combinacoes[mfolder] = {}
            
            if dict_combinacoes[mfolder].has_key('_TotalMaterias'):
                dict_combinacoes[mfolder]['_TotalMaterias'] +=1 
            else:
                dict_combinacoes[mfolder]['_TotalMaterias']=1
                
            for comb in combinacoes: 
                documento = monta_doc(m)
                # import pdb; pdb.set_trace();


                materiasSolr = relacionadas(documento, comb=comb, total=int(options['recomendadas']), similaridade=eval(options['similaridade']))
                recomendadas = []
                recomendadas = [str(recomendada.url) for (recomendada, score) in materiasSolr]
                encontradas = documento['relacionadas'].intersection(recomendadas)

                if any(encontradas):
                    if dict_combinacoes[mfolder].has_key(comb):
                        dict_combinacoes[mfolder][comb]+=1
                    else:
                        dict_combinacoes[mfolder][comb]=1

        
        # print "total de acertos em %s matérias" % (contador)
        
        dict_final={}
        
        # import pdb; pdb.set_trace();

        for dc in dict_combinacoes.keys():
            print "\n", dc
            for d in sorted(dict_combinacoes[dc].keys()):
                print d,dict_combinacoes[dc][d]                
                if dict_final.has_key(d):
                    dict_final[d]+=dict_combinacoes[dc][d]
                else:
                    dict_final[d]=dict_combinacoes[dc][d]
            
        print "\nTotal Geral"
        for f in sorted(dict_final.keys()):
            print f, dict_final[f]