示例#1
0
def get_movie_info(movie_id):
    create_app()
    process = Process.get_or_create(id=movie_id)
    if process.is_success:
        return

    print 'Strting fetch movie: {}'.format(movie_id)
    start = time.time()
    process = Process.get_or_create(id=movie_id)

    movie = Movie.objects.filter(id=movie_id)
    if not movie:
        html = get_tree(MOVIE_URL.format(movie_id))
        name = html.xpath("//div[@id='content']//h1/span/text()")[0]
        mark = html.xpath(
            "//div[@class='rating_wrap clearbox']//strong/text()")[0]
        picture = html.xpath(
            "//div[@id='content']//div[@id='mainpic']//img/@src")[0]
        movie = Movie(id=movie_id, name=name, mark=mark, picture=picture)
        movie.save()
        get_top_comment_and_user_info(movie_id, movie)
        process.make_succeed()
        print 'Finished fetch movie: {} Cost: {}'.format(
            movie_id,
            time.time() - start)
示例#2
0
def get_top_comment_and_user_info(comment_id, movie):
    create_app()
    comment = Comment.objects.filter(id=comment_id)
    if not comment:
        html = get_tree(COMMENT_URL.format(comment_id))
        content = html.xpath(
            "//div[@id='content']//div[@class='comment-item']//div[@class='comment']//p/text()"
        )[0]
        user_name = html.xpath(
            "//div[@id='content']//div[@class='comment-item']//div[@class='avatar']//a/@title"
        )[0]
        user_url = html.xpath(
            "//div[@id='content']//div[@class='comment-item']//div[@class='avatar']//a/@href"
        )[0]
        user_picture = html.xpath(
            "//div[@id='content']//div[@class='comment-item']//div[@class='avatar']//a//img/@src"
        )[0]
        like_count = html.xpath(
            "//div[@id='content']//div[@class='comment-item']"
            "//div[@class='comment']//span[@class='votes pr5']/text()")[0]
        user_id = user_url.split('/')[-2]
        user = User.get_or_create(id=user_id,
                                  name=user_name,
                                  picture=user_picture)
        user.save()
        comment = Comment.get_or_create(id=comment_id,
                                        content=content,
                                        like_count=like_count,
                                        user=user,
                                        movie=movie)
        comment.save()
示例#3
0
def predict():
    try:
        tree_json = request.json["tree"]
        toy_id = request.json["toy_id"]
        user_truth = request.json["user_truth"]
        targets = request.json["category"]
        tree = utilities.get_tree(tree_json)

        y_pred = decisiontree.predict(tree, toy_id, user_truth, targets[0])
        print('Prediction: ', targets[y_pred[0]])
        return {"prediction": y_pred.tolist()}
    except Exception as err:
        print(err)
        return "Failed", 500
示例#4
0
def analyse_features():
    resultPath = os.path.join(os.getcwd(),
                              os.path.join('data', 'results.json'))
    resultFile = open(resultPath, 'r')
    resultArray = json.load(resultFile)

    cUserRating = []
    hsvHist = []
    bitmap = []
    bic = []
    hsvHistu = []
    bitmapu = []
    bicu = []

    t = utils.get_tree()

    EHD = []
    GF = []
    GLCM = []
    EHDu = []
    GFu = []
    GLCMu = []

    i = 0
    c = 0
    hash = 0
    for item in resultArray:
        i += 1
        c *= hash == item['sessionhash']
        hash = item['sessionhash']
        c += item['votevalue'] == "0"
        if c == 30:
            break

    resultArray = resultArray[i:]

    hash = {}
    for item in resultArray:
        try:
            hash[item['sessionhash']] += 1
        except KeyError:
            hash[item['sessionhash']] = 1

    print hash
    print len(hash)
    for item in resultArray:
        if item['votevalue'] == "0" or hash[item[
                'sessionhash']] < 30:  # or not item['similarimg']['random']:
            continue
        img1 = int(item['mainimg']['index'])
        img2 = int(item['similarimg']['index'])
        if item['similarimg']['random']:
            if item['mainimg']['compare_by'] == 'color':
                hsvHistu.append(1 - (float(item['votevalue']) - 1) / 4)
                bitmapu.append(1 - (float(item['votevalue']) - 1) / 4)
                bicu.append(1 - (float(item['votevalue']) - 1) / 4)
                comp = color.ColorFeatureExtracter.CompareFeatures(
                    t[img1]['features'], t[img2]['features'])
                hsvHist.append(comp['HsvHist'])
                bitmap.append(comp['ColorBitmap'])
                bic.append(comp['BIC'])
            else:
                EHDu.append(1 - (float(item['votevalue']) - 1) / 4)
                EHD.append(
                    cv2.compareHist(t[img1]['features']['EHD'],
                                    t[img2]['features']['EHD'], 3))
                GFu.append(1 - (float(item['votevalue']) - 1) / 4)
                GF.append(
                    cv2.compareHist(t[img1]['features']['GF'],
                                    t[img2]['features']['GF'], 3))
                GLCMu.append(1 - (float(item['votevalue']) - 1) / 4)
                GLCM.append(
                    cv2.compareHist(t[img1]['features']['GLCM'],
                                    t[img2]['features']['GLCM'], 3))
        else:
            if (item['mainimg']['feature'] == 'HsvHist'):
                comp = color.ColorFeatureExtracter.CompareFeatures(
                    t[img1]['features'], t[img2]['features'])
                hsvHist.append(comp['HsvHist'])
                hsvHistu.append(1 - (float(item['votevalue']) - 1) / 4)
            if (item['mainimg']['feature'] == 'ColorBitmap'):
                comp = color.ColorFeatureExtracter.CompareFeatures(
                    t[img1]['features'], t[img2]['features'])
                bitmap.append(comp['ColorBitmap'])
                bitmapu.append(1 - (float(item['votevalue']) - 1) / 4)
            if (item['mainimg']['feature'] == 'BIC'):
                comp = color.ColorFeatureExtracter.CompareFeatures(
                    t[img1]['features'], t[img2]['features'])
                bic.append(comp['BIC'])
                bicu.append(1 - (float(item['votevalue']) - 1) / 4)
            if (item['mainimg']['feature'] == 'EHD'):
                EHD.append(
                    cv2.compareHist(t[img1]['features']['EHD'],
                                    t[img2]['features']['EHD'], 3))
                EHDu.append(1 - (float(item['votevalue']) - 1) / 4)
            if (item['mainimg']['feature'] == 'GF'):
                GF.append(
                    cv2.compareHist(t[img1]['features']['GF'],
                                    t[img2]['features']['GF'], 3))
                GFu.append(1 - (float(item['votevalue']) - 1) / 4)
            if (item['mainimg']['feature'] == 'GLCM'):
                GLCM.append(
                    cv2.compareHist(t[img1]['features']['GLCM'],
                                    t[img2]['features']['GLCM'], 3))
                GLCMu.append(1 - (float(item['votevalue']) - 1) / 4)

    plt.plot(hsvHistu, hsvHist, 'or')
    #plt.show()
    print 'HsvHist'
    print linear_regression(hsvHistu, hsvHist)
    print "Bitmap"
    print linear_regression(bitmapu, bitmap)
    print "BIC"
    print linear_regression(bicu, bic)
    #print tUserRating
    #print EHD
    print "EHD"
    print linear_regression(EHDu, EHD)
    print "GF"
    print linear_regression(GFu, GF)
    print "GLCM"
    print linear_regression(GLCMu, GLCM)
    return
示例#5
0
import cv2
from features import color
from utils import get_tree, save_tree
import os
import time
c = time.clock

from features import EHD, GF, GLCM

from concurrent.futures import ThreadPoolExecutor

# Settings
save_every_n = 100
multithreaded = True

t = get_tree()

def main():
    if multithreaded:
        with ThreadPoolExecutor(max_workers=4) as executor:
            for i, painting in enumerate(t):
                future = executor.submit(calcfeats, i, painting)
            print("\n\nSubmitted all tasks\n\n")
    else:
        for i, painting in enumerate(t):
            calcfeats(i, painting)
    print("\n\nDone with all paintings\n\n")

def calcfeats(i, painting):
    painting['features'] = {}
    fn = painting['afbeelding']
示例#6
0
 def __init__(self, article_id, session):
     self._article_id = article_id
     self.tree = get_tree(Article_url.format(article_id), session)
示例#7
0
import os
import requests
from lxml import html

from utils import extract_articles, extract_article, get_tree
from data_access import save_article

os.environ["debug"] = "n"
os.environ["print"] = "y"

if __name__ == "__main__":
    tree = get_tree("https://www.faz.net")
    articles = extract_articles(tree)

    for article in articles:
        if article["is_premium"]:
            continue
        article = extract_article(article["url"])
        if article != None:
            save_article(article)
示例#8
0
    def forward(self,
                images,
                captions,
                lengths,
                img_lengths,
                img_txts,
                img_spans,
                txt_spans,
                labels,
                ids=None,
                epoch=None,
                *args):
        self.niter += 1
        self.logger.update('Eit', self.niter)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        img_lengths = torch.tensor(img_lengths).long() if isinstance(
            img_lengths, list) else img_lengths
        lengths = torch.tensor(lengths).long() if isinstance(lengths,
                                                             list) else lengths

        if torch.cuda.is_available():
            images = images.cuda()
            captions = captions.cuda()
            lengths = lengths.cuda()
            img_lengths = img_lengths.cuda()
        bsize = captions.size(0)

        img_emb, nll_img, kl_img, span_margs_img, argmax_spans_img, trees_img, lprobs_img = self.forward_img_parser(
            images, img_lengths)

        ll_loss_img = nll_img.sum()
        kl_loss_img = kl_img.sum()

        txt_emb, nll_txt, kl_txt, span_margs_txt, argmax_spans_txt, trees_txt, lprobs_txt = self.forward_txt_parser(
            captions, lengths)

        ll_loss_txt = nll_txt.sum()
        kl_loss_txt = kl_txt.sum()

        contrastive_loss = self.forward_loss(img_emb, txt_emb, img_lengths,
                                             lengths, argmax_spans_img,
                                             argmax_spans_txt, span_margs_img,
                                             span_margs_txt)
        mt_loss = contrastive_loss.sum()

        loss_img = self.vse_lm_alpha * (ll_loss_img + kl_loss_img) / bsize
        loss_txt = self.vse_lm_alpha * (ll_loss_txt + kl_loss_txt) / bsize
        loss_mt = self.vse_mt_alpha * mt_loss / bsize

        loss = loss_img + loss_txt + loss_mt

        self.optimizer.zero_grad()
        loss.backward()

        if self.grad_clip > 0:
            clip_grad_norm_(self.all_params, self.grad_clip)
        self.optimizer.step()

        self.logger.update('Loss_img', loss_img.item(), bsize)
        self.logger.update('Loss_txt', loss_txt.item(), bsize)
        self.logger.update('KL-Loss_img', kl_loss_img.item() / bsize, bsize)
        self.logger.update('KL-Loss_txt', kl_loss_txt.item() / bsize, bsize)
        self.logger.update('LL-Loss_img', ll_loss_img.item() / bsize, bsize)
        self.logger.update('LL-Loss_txt', ll_loss_txt.item() / bsize, bsize)

        self.n_word_img += (img_lengths + 1).sum().item()
        self.n_word_txt += (lengths + 1).sum().item()
        self.n_sent += bsize

        for b in range(bsize):
            max_img_len = img_lengths[b].item()
            pred_img = [(a[0], a[1]) for a in argmax_spans_img[b]
                        if a[0] != a[1]]
            pred_set_img = set(pred_img[:-1])
            gold_img = [(img_spans[b][i][0].item(), img_spans[b][i][1].item())
                        for i in range(max_img_len - 1)]
            gold_set_img = set(gold_img[:-1])
            utils.update_stats(pred_set_img, [gold_set_img],
                               self.all_stats_img)

            max_txt_len = lengths[b].item()
            pred_txt = [(a[0], a[1]) for a in argmax_spans_txt[b]
                        if a[0] != a[1]]
            pred_set_txt = set(pred_txt[:-1])
            gold_txt = [(txt_spans[b][i][0].item(), txt_spans[b][i][1].item())
                        for i in range(max_txt_len - 1)]
            gold_set_txt = set(gold_txt[:-1])
            utils.update_stats(pred_set_txt, [gold_set_txt],
                               self.all_stats_txt)

        # if self.niter % self.log_step == 0:
        p_norm, g_norm = self.norms()
        all_f1_img = utils.get_f1(self.all_stats_img)
        all_f1_txt = utils.get_f1(self.all_stats_txt)
        train_kl_img = self.logger.meters["KL-Loss_img"].sum
        train_ll_img = self.logger.meters["LL-Loss_img"].sum
        train_kl_txt = self.logger.meters["KL-Loss_txt"].sum
        train_ll_txt = self.logger.meters["LL-Loss_txt"].sum

        info = '|Pnorm|: {:.6f}, |Gnorm|: {:.2f}, ReconPPL-Img: {:.2f}, KL-Img: {:.2f}, ' + \
                'PPLBound-Img: {:.2f}, CorpusF1-Img: {:.2f}, ' + \
                'ReconPPL-Txt: {:.2f}, KL-Txt: {:.2f}, ' + \
                'PPLBound-Txt: {:.2f}, CorpusF1-Txt: {:.2f}, ' + \
                'Speed: {:.2f} sents/sec'

        info = info.format(
            p_norm, g_norm, np.exp(train_ll_img / self.n_word_img),
            train_kl_img / self.n_sent,
            np.exp((train_ll_img + train_kl_img) / self.n_word_img),
            all_f1_img[0], np.exp(train_ll_txt / self.n_word_txt),
            train_kl_txt / self.n_sent,
            np.exp((train_ll_txt + train_kl_txt) / self.n_word_txt),
            all_f1_txt[0], self.n_sent / (time.time() - self.s_time))

        pred_action_img = utils.get_actions(trees_img[0])
        sent_s_img = img_txts[0]
        pred_t_img = utils.get_tree(pred_action_img, sent_s_img)
        gold_t_img = utils.span_to_tree(img_spans[0].tolist(),
                                        img_lengths[0].item())
        gold_action_img = utils.get_actions(gold_t_img)
        gold_t_img = utils.get_tree(gold_action_img, sent_s_img)
        info += "\nPred T Image: {}\nGold T Image: {}".format(
            pred_t_img, gold_t_img)

        pred_action_txt = utils.get_actions(trees_txt[0])
        sent_s_txt = [
            self.vocab.idx2word[wid] for wid in captions[0].cpu().tolist()
        ]
        pred_t_txt = utils.get_tree(pred_action_txt, sent_s_txt)
        gold_t_txt = utils.span_to_tree(txt_spans[0].tolist(),
                                        lengths[0].item())
        gold_action_txt = utils.get_actions(gold_t_txt)
        gold_t_txt = utils.get_tree(gold_action_txt, sent_s_txt)
        info += "\nPred T Text: {}\nGold T Text: {}".format(
            pred_t_txt, gold_t_txt)
        return info