def update_association(self, postfreq_sum, commentfreq_sum, vocab_pairsum, vocab_ids, batch_size): qassociation, schema = self._query_all( self.query_association_by_vocabt_id, (tuple(vocab_ids), )) association_dict = {(i[schema['vocabt_id']], i[schema['vocabc_id']], i[schema['tokenizer']]): i[schema['pxy']] for i in qassociation} total_vocab_id = list( set( it.chain.from_iterable([[i[0], i[1]] for i in association_dict]))) if len(total_vocab_id) > 0: qvocab, schema = self._query_all(self.query_vocab_by_id_sql, (tuple(total_vocab_id), )) qvocab_dict = { v[schema['id']]: (v[schema['postfreq']], v[schema['commentfreq']]) for v in qvocab } vocabt_all = [] vocabc_all = [] npmi_all = [] confidence_all = [] tokenizer_all = [] for k, v in association_dict.items(): px = qvocab_dict[k[0]][0] / postfreq_sum py = qvocab_dict[k[1]][1] / commentfreq_sum pxy = v / vocab_pairsum vocabt_all.append(k[0]) vocabc_all.append(k[1]) npmi_all.append(self.normalized_pmi(px, py, pxy)) confidence_all.append(math.log(pxy / px)) tokenizer_all.append(k[2]) batch_vocabt = self.batch_list(vocabt_all, batch_size) batch_vocabc = self.batch_list(vocabc_all, batch_size) batch_tokenizer = self.batch_list(tokenizer_all, batch_size) batch_npmi = self.batch_list(npmi_all, batch_size) batch_confidence = self.batch_list(confidence_all, batch_size) for vocabt_id, vocabc_id, tokenizer, confidence, pmi in zip( batch_vocabt, batch_vocabc, batch_tokenizer, batch_confidence, batch_npmi): psql = PsqlQuery() psql.update( self.update_association_sql, { 'vocabt_id': vocabt_id, 'vocabc_id': vocabc_id, 'tokenizer': tokenizer, 'confidence': confidence, 'pmi': pmi })
def update_vocab_commentfreq(self, vocab_id): vocab_id = list(set(vocab_id)) qvocab2comment, schema = self._query_all( self.query_vocab2comment_by_vid_sql, (tuple(vocab_id), )) qvocab_id = [v2c[schema['vocabulary_id']] for v2c in qvocab2comment] vocab_cnt = collections.Counter(qvocab_id) freq = [vocab_cnt[id_] if id_ in vocab_cnt else 0 for id_ in vocab_id] psql = PsqlQuery() psql.update(self.update_vocab_commentfreq_sql, { 'id_': vocab_id, 'commentfreq': freq })