Exemplo n.º 1
0
        weeks = list()
        dist = list()
        lamb = list()
        inicial = 1
        final = 603
        for line in data_file:
            tweet = json.loads(line)
            created = int(tweet['created_at'])
            if(days2time(inicial) <= created < days2time(final)):
                doc_tw.add(tweet['text'])
                doc_set.append(tweet)
        texts = tp.text_process(doc_tw)        
        corpus, dic = tp.create_corpus(texts)
        ldamodel = tp.generate_lda(corpus, dic, 5)
        #ldamodel = tp.generate_hdp(corpus, dic)
        print(tp.print_topics(ldamodel))

        with open(lamb_dir) as l_file:
            for line in l_file:
                i = int(line.split('|')[2])
                w = int(line.split('|')[0])
                lamb.append(w)
                for s in range(i-1): 
                    lamb.append(w)

        for k in range(inicial, final, 7):
            doc = set()
            for tw in doc_set:
                if(days2time(k) <= tw['created_at'] < days2time(k+7)):
                    doc.add(tw['text'])
            documents = tp.text_process(doc)