示例#1
0
def test_display_search():
    docId = 11
    idx = Index()
    idx.load_index()
    adOps = Advanced_operations()
    print(adOps.display_search(docId))
    print()
    print(adOps.search(docId))
def test_scanner():
    idx = Index()
    idx.load_index()
    snr = Scanner()
    #     snr.scan(['dark_hero', 'noir_thriller'], Test, 'Test')

    scoreCounter = Counter()
    snr.scan(['dark_hero', 'noir_thriller'], Search_tag, scoreCounter)
    print(scoreCounter)
示例#3
0
Created on 30 Nov 2018

@author: wyan2
'''
'''
Refer to my previous work
'''

from flask import Flask, render_template, request
from inv_index import Index, Advanced_operations
from logger import Logger
import global_settings as gs
import json

logger = Logger.get_logger('movie_recommendation', True)
idx = Index(logger)
adOps = Advanced_operations(logger)

app = Flask(__name__)


@app.route('/search', methods=['POST', 'GET'])
def search():
    movieId = int(request.args['movieId'])
    return json.dumps(adOps.search(movieId))


@app.route('/display_search', methods=['POST', 'GET'])
def display_search():
    templatePath = gs.templatePath
    movieId = int(request.args['movieId'])
示例#4
0
 def __init__(self):
     self.idx = Index()
示例#5
0
# -*- coding:utf-8 -*-
'''
Created on 30 Nov 2018

@author: wyan2
'''
import os
os.sys.path.append('..')

from inv_index import Index
from data_structure import Post_unit, Query
from probes import Index_probe

index = Index()


def prepare_index():
    pu1 = Post_unit.deflatten('a -1 -1 -1 {"gen_score":1} 1 1')
    pu2 = Post_unit.deflatten('b -1 -1 -1 {"gen_score":1} 2 1')
    pu3 = Post_unit.deflatten('a -1 -1 -1 {"gen_score":1} 5 1')
    pu4 = Post_unit.deflatten('a -1 -1 -1 {"gen_score":1} 6 1')
    '''test adding posting unti'''
    index.add_post_unit(pu1)
    index.add_post_unit(pu2)
    index.add_post_unit(pu3)
    index.add_post_unit(pu4)

    Index_probe().display()


def test_persisting():
示例#6
0
def test__search():
    idx = Index()
    idx.load_index()
    adOps = Advanced_operations()
    print(adOps.search(541).most_common(10))
 def __init__(self, logger=None):
     self.lg = logger or Logger.get_logger('Advanced_operations')
     self.idx = Index(logger)
     self.snr = Scanner(logger)
class Advanced_operations():
    def __init__(self, logger=None):
        self.lg = logger or Logger.get_logger('Advanced_operations')
        self.idx = Index(logger)
        self.snr = Scanner(logger)

    # docId -> tagList -> search
    def _search(self, docId):
        scoreCounter = Counter()

        doc = self.idx.docInfo.get(docId)
        if not type(doc) == type(None):  # if the searched movie is existing

            # create and add information into the query
            query = Query()
            for pUnitId in doc.pUnitIds:
                pUnit = self.idx.posting[pUnitId]
                query.tagTf[pUnit.tagText] = pUnit.uProp['tf']

            # add the query to index, for the usage during scanning
            qId = self.idx.add_query(query)
            try:
                tagList = list(query.tagTf.keys())
                self.snr.scan(tagList, Search_tag, scoreCounter, query)
            except Exception as e:
                self.lg.warn(traceback.format_exc())

            # remove the query from index, after searching
            self.idx.remove_query(qId)

        return scoreCounter

    def search(self, docId):
        scoreCounter = self._search(docId)
        return list(map(lambda t: t[0], scoreCounter.most_common(gs.topK)))

    # transfer the searching result into readable information
    def display_search(self, docId):
        displayMsgList = []
        scoreCounter = self._search(docId)

        if len(scoreCounter) > 0:
            # scoreCounter.pop(docId)
            topKRecords = scoreCounter.most_common(gs.topK)

            # head line
            displayMsgList.append(
                '%s\t%-20s\t%-90s\t%-40s\t%s\n' %
                ('docId', 'rankingScore', 'title', 'genre', 'tagNum'))

            # info of searched movie
            # basicInfo = self.idx.docInfo.get(docId).basicInfo
            # displayMsgList.append('%d\t%-20s\t%-50s\t%-30s\t%d\n\n'%(docId, '-', basicInfo['title'], basicInfo['genre'], basicInfo['tagNum']))

            # info of the recommendations
            for record in topKRecords:
                docId = record[0]
                score = record[1]
                basicInfo = self.idx.docInfo.get(docId).basicInfo

                # docId, ranking_score, title, genre, tagNum
                displayMsgList.append(
                    '%d\t%-20f\t%-90s\t%-40s\t%d\n' %
                    (docId, score, basicInfo['title'], basicInfo['genre'],
                     basicInfo['tagNum']))

            displayMsg = ''.join(displayMsgList)

        else:
            displayMsg = 'Searched movie is not existing.'
        return displayMsg