def get_movie_info(movie_id): create_app() process = Process.get_or_create(id=movie_id) if process.is_success: return print 'Strting fetch movie: {}'.format(movie_id) start = time.time() process = Process.get_or_create(id=movie_id) movie = Movie.objects.filter(id=movie_id) if not movie: html = get_tree(MOVIE_URL.format(movie_id)) name = html.xpath("//div[@id='content']//h1/span/text()")[0] mark = html.xpath( "//div[@class='rating_wrap clearbox']//strong/text()")[0] picture = html.xpath( "//div[@id='content']//div[@id='mainpic']//img/@src")[0] movie = Movie(id=movie_id, name=name, mark=mark, picture=picture) movie.save() get_top_comment_and_user_info(movie_id, movie) process.make_succeed() print 'Finished fetch movie: {} Cost: {}'.format( movie_id, time.time() - start)
def get_top_comment_and_user_info(comment_id, movie): create_app() comment = Comment.objects.filter(id=comment_id) if not comment: html = get_tree(COMMENT_URL.format(comment_id)) content = html.xpath( "//div[@id='content']//div[@class='comment-item']//div[@class='comment']//p/text()" )[0] user_name = html.xpath( "//div[@id='content']//div[@class='comment-item']//div[@class='avatar']//a/@title" )[0] user_url = html.xpath( "//div[@id='content']//div[@class='comment-item']//div[@class='avatar']//a/@href" )[0] user_picture = html.xpath( "//div[@id='content']//div[@class='comment-item']//div[@class='avatar']//a//img/@src" )[0] like_count = html.xpath( "//div[@id='content']//div[@class='comment-item']" "//div[@class='comment']//span[@class='votes pr5']/text()")[0] user_id = user_url.split('/')[-2] user = User.get_or_create(id=user_id, name=user_name, picture=user_picture) user.save() comment = Comment.get_or_create(id=comment_id, content=content, like_count=like_count, user=user, movie=movie) comment.save()
def predict(): try: tree_json = request.json["tree"] toy_id = request.json["toy_id"] user_truth = request.json["user_truth"] targets = request.json["category"] tree = utilities.get_tree(tree_json) y_pred = decisiontree.predict(tree, toy_id, user_truth, targets[0]) print('Prediction: ', targets[y_pred[0]]) return {"prediction": y_pred.tolist()} except Exception as err: print(err) return "Failed", 500
def analyse_features(): resultPath = os.path.join(os.getcwd(), os.path.join('data', 'results.json')) resultFile = open(resultPath, 'r') resultArray = json.load(resultFile) cUserRating = [] hsvHist = [] bitmap = [] bic = [] hsvHistu = [] bitmapu = [] bicu = [] t = utils.get_tree() EHD = [] GF = [] GLCM = [] EHDu = [] GFu = [] GLCMu = [] i = 0 c = 0 hash = 0 for item in resultArray: i += 1 c *= hash == item['sessionhash'] hash = item['sessionhash'] c += item['votevalue'] == "0" if c == 30: break resultArray = resultArray[i:] hash = {} for item in resultArray: try: hash[item['sessionhash']] += 1 except KeyError: hash[item['sessionhash']] = 1 print hash print len(hash) for item in resultArray: if item['votevalue'] == "0" or hash[item[ 'sessionhash']] < 30: # or not item['similarimg']['random']: continue img1 = int(item['mainimg']['index']) img2 = int(item['similarimg']['index']) if item['similarimg']['random']: if item['mainimg']['compare_by'] == 'color': hsvHistu.append(1 - (float(item['votevalue']) - 1) / 4) bitmapu.append(1 - (float(item['votevalue']) - 1) / 4) bicu.append(1 - (float(item['votevalue']) - 1) / 4) comp = color.ColorFeatureExtracter.CompareFeatures( t[img1]['features'], t[img2]['features']) hsvHist.append(comp['HsvHist']) bitmap.append(comp['ColorBitmap']) bic.append(comp['BIC']) else: EHDu.append(1 - (float(item['votevalue']) - 1) / 4) EHD.append( cv2.compareHist(t[img1]['features']['EHD'], t[img2]['features']['EHD'], 3)) GFu.append(1 - (float(item['votevalue']) - 1) / 4) GF.append( cv2.compareHist(t[img1]['features']['GF'], t[img2]['features']['GF'], 3)) GLCMu.append(1 - (float(item['votevalue']) - 1) / 4) GLCM.append( cv2.compareHist(t[img1]['features']['GLCM'], t[img2]['features']['GLCM'], 3)) else: if (item['mainimg']['feature'] == 'HsvHist'): comp = color.ColorFeatureExtracter.CompareFeatures( t[img1]['features'], t[img2]['features']) hsvHist.append(comp['HsvHist']) hsvHistu.append(1 - (float(item['votevalue']) - 1) / 4) if (item['mainimg']['feature'] == 'ColorBitmap'): comp = color.ColorFeatureExtracter.CompareFeatures( t[img1]['features'], t[img2]['features']) bitmap.append(comp['ColorBitmap']) bitmapu.append(1 - (float(item['votevalue']) - 1) / 4) if (item['mainimg']['feature'] == 'BIC'): comp = color.ColorFeatureExtracter.CompareFeatures( t[img1]['features'], t[img2]['features']) bic.append(comp['BIC']) bicu.append(1 - (float(item['votevalue']) - 1) / 4) if (item['mainimg']['feature'] == 'EHD'): EHD.append( cv2.compareHist(t[img1]['features']['EHD'], t[img2]['features']['EHD'], 3)) EHDu.append(1 - (float(item['votevalue']) - 1) / 4) if (item['mainimg']['feature'] == 'GF'): GF.append( cv2.compareHist(t[img1]['features']['GF'], t[img2]['features']['GF'], 3)) GFu.append(1 - (float(item['votevalue']) - 1) / 4) if (item['mainimg']['feature'] == 'GLCM'): GLCM.append( cv2.compareHist(t[img1]['features']['GLCM'], t[img2]['features']['GLCM'], 3)) GLCMu.append(1 - (float(item['votevalue']) - 1) / 4) plt.plot(hsvHistu, hsvHist, 'or') #plt.show() print 'HsvHist' print linear_regression(hsvHistu, hsvHist) print "Bitmap" print linear_regression(bitmapu, bitmap) print "BIC" print linear_regression(bicu, bic) #print tUserRating #print EHD print "EHD" print linear_regression(EHDu, EHD) print "GF" print linear_regression(GFu, GF) print "GLCM" print linear_regression(GLCMu, GLCM) return
import cv2 from features import color from utils import get_tree, save_tree import os import time c = time.clock from features import EHD, GF, GLCM from concurrent.futures import ThreadPoolExecutor # Settings save_every_n = 100 multithreaded = True t = get_tree() def main(): if multithreaded: with ThreadPoolExecutor(max_workers=4) as executor: for i, painting in enumerate(t): future = executor.submit(calcfeats, i, painting) print("\n\nSubmitted all tasks\n\n") else: for i, painting in enumerate(t): calcfeats(i, painting) print("\n\nDone with all paintings\n\n") def calcfeats(i, painting): painting['features'] = {} fn = painting['afbeelding']
def __init__(self, article_id, session): self._article_id = article_id self.tree = get_tree(Article_url.format(article_id), session)
import os import requests from lxml import html from utils import extract_articles, extract_article, get_tree from data_access import save_article os.environ["debug"] = "n" os.environ["print"] = "y" if __name__ == "__main__": tree = get_tree("https://www.faz.net") articles = extract_articles(tree) for article in articles: if article["is_premium"]: continue article = extract_article(article["url"]) if article != None: save_article(article)
def forward(self, images, captions, lengths, img_lengths, img_txts, img_spans, txt_spans, labels, ids=None, epoch=None, *args): self.niter += 1 self.logger.update('Eit', self.niter) self.logger.update('lr', self.optimizer.param_groups[0]['lr']) img_lengths = torch.tensor(img_lengths).long() if isinstance( img_lengths, list) else img_lengths lengths = torch.tensor(lengths).long() if isinstance(lengths, list) else lengths if torch.cuda.is_available(): images = images.cuda() captions = captions.cuda() lengths = lengths.cuda() img_lengths = img_lengths.cuda() bsize = captions.size(0) img_emb, nll_img, kl_img, span_margs_img, argmax_spans_img, trees_img, lprobs_img = self.forward_img_parser( images, img_lengths) ll_loss_img = nll_img.sum() kl_loss_img = kl_img.sum() txt_emb, nll_txt, kl_txt, span_margs_txt, argmax_spans_txt, trees_txt, lprobs_txt = self.forward_txt_parser( captions, lengths) ll_loss_txt = nll_txt.sum() kl_loss_txt = kl_txt.sum() contrastive_loss = self.forward_loss(img_emb, txt_emb, img_lengths, lengths, argmax_spans_img, argmax_spans_txt, span_margs_img, span_margs_txt) mt_loss = contrastive_loss.sum() loss_img = self.vse_lm_alpha * (ll_loss_img + kl_loss_img) / bsize loss_txt = self.vse_lm_alpha * (ll_loss_txt + kl_loss_txt) / bsize loss_mt = self.vse_mt_alpha * mt_loss / bsize loss = loss_img + loss_txt + loss_mt self.optimizer.zero_grad() loss.backward() if self.grad_clip > 0: clip_grad_norm_(self.all_params, self.grad_clip) self.optimizer.step() self.logger.update('Loss_img', loss_img.item(), bsize) self.logger.update('Loss_txt', loss_txt.item(), bsize) self.logger.update('KL-Loss_img', kl_loss_img.item() / bsize, bsize) self.logger.update('KL-Loss_txt', kl_loss_txt.item() / bsize, bsize) self.logger.update('LL-Loss_img', ll_loss_img.item() / bsize, bsize) self.logger.update('LL-Loss_txt', ll_loss_txt.item() / bsize, bsize) self.n_word_img += (img_lengths + 1).sum().item() self.n_word_txt += (lengths + 1).sum().item() self.n_sent += bsize for b in range(bsize): max_img_len = img_lengths[b].item() pred_img = [(a[0], a[1]) for a in argmax_spans_img[b] if a[0] != a[1]] pred_set_img = set(pred_img[:-1]) gold_img = [(img_spans[b][i][0].item(), img_spans[b][i][1].item()) for i in range(max_img_len - 1)] gold_set_img = set(gold_img[:-1]) utils.update_stats(pred_set_img, [gold_set_img], self.all_stats_img) max_txt_len = lengths[b].item() pred_txt = [(a[0], a[1]) for a in argmax_spans_txt[b] if a[0] != a[1]] pred_set_txt = set(pred_txt[:-1]) gold_txt = [(txt_spans[b][i][0].item(), txt_spans[b][i][1].item()) for i in range(max_txt_len - 1)] gold_set_txt = set(gold_txt[:-1]) utils.update_stats(pred_set_txt, [gold_set_txt], self.all_stats_txt) # if self.niter % self.log_step == 0: p_norm, g_norm = self.norms() all_f1_img = utils.get_f1(self.all_stats_img) all_f1_txt = utils.get_f1(self.all_stats_txt) train_kl_img = self.logger.meters["KL-Loss_img"].sum train_ll_img = self.logger.meters["LL-Loss_img"].sum train_kl_txt = self.logger.meters["KL-Loss_txt"].sum train_ll_txt = self.logger.meters["LL-Loss_txt"].sum info = '|Pnorm|: {:.6f}, |Gnorm|: {:.2f}, ReconPPL-Img: {:.2f}, KL-Img: {:.2f}, ' + \ 'PPLBound-Img: {:.2f}, CorpusF1-Img: {:.2f}, ' + \ 'ReconPPL-Txt: {:.2f}, KL-Txt: {:.2f}, ' + \ 'PPLBound-Txt: {:.2f}, CorpusF1-Txt: {:.2f}, ' + \ 'Speed: {:.2f} sents/sec' info = info.format( p_norm, g_norm, np.exp(train_ll_img / self.n_word_img), train_kl_img / self.n_sent, np.exp((train_ll_img + train_kl_img) / self.n_word_img), all_f1_img[0], np.exp(train_ll_txt / self.n_word_txt), train_kl_txt / self.n_sent, np.exp((train_ll_txt + train_kl_txt) / self.n_word_txt), all_f1_txt[0], self.n_sent / (time.time() - self.s_time)) pred_action_img = utils.get_actions(trees_img[0]) sent_s_img = img_txts[0] pred_t_img = utils.get_tree(pred_action_img, sent_s_img) gold_t_img = utils.span_to_tree(img_spans[0].tolist(), img_lengths[0].item()) gold_action_img = utils.get_actions(gold_t_img) gold_t_img = utils.get_tree(gold_action_img, sent_s_img) info += "\nPred T Image: {}\nGold T Image: {}".format( pred_t_img, gold_t_img) pred_action_txt = utils.get_actions(trees_txt[0]) sent_s_txt = [ self.vocab.idx2word[wid] for wid in captions[0].cpu().tolist() ] pred_t_txt = utils.get_tree(pred_action_txt, sent_s_txt) gold_t_txt = utils.span_to_tree(txt_spans[0].tolist(), lengths[0].item()) gold_action_txt = utils.get_actions(gold_t_txt) gold_t_txt = utils.get_tree(gold_action_txt, sent_s_txt) info += "\nPred T Text: {}\nGold T Text: {}".format( pred_t_txt, gold_t_txt) return info