Exemplo n.º 1
0
def compute_similarities(text, models, count=None):
    """Finds items that are similar to the specified text
    :param text: The text to be used for comparison
    :param models: The list of models to be compared against text
                   Each of the entries should have a simhash property
    :param count: The no. of similar items to return
    """
    # Get the simhash of the submitted message
    _hash = simhash(util.unicodeToAscii(text))
    candidates, scores = {}, []

    # TODO: Investiage ways of speeding this - complexity is O(n)
    for model in models:
        target = simhash(hash=long(model.simhash))
        if long(target) == long(_hash):
            continue
        similarity = _hash.similarity(target)
        if similarity >= similarity_threshold:
            scores.append((model.id, similarity))
            candidates[model.id] = model
    if len(scores) == 0:
        return []

    scores.sort(key=lambda x: x[1], reverse=True)
    result_size = max_similar_messages if count is None else count

    retval = []
    for x in range(result_size):
        message_dict = candidates[scores[x][0]].as_dict()
        del message_dict['simhash']
        message_dict['score'] = scores[x][1]
        retval.append(message_dict)
    return retval
Exemplo n.º 2
0
def add_message(deployment_id):
    """Adds a new message for the deployment in :deployment_id

    The input parameters are:
        message: string

    :param deployment_id: the id of the deployment
    """
    if not request.json:
        abort(400)
    _post = request.json
    if 'origin_message_id' not in _post and 'content' not in _post:
        abort(400)

    # Does the deployment exist
    deployment = Deployment.by_id(deployment_id)
    if deployment is None:
        abort(404)

    _hash = simhash(util.unicodeToAscii(_post['content']))
    message = Message(deployment_id=deployment_id,
                      origin_message_id=_post['origin_message_id'],
                      content=_post['content'],
                      simhash=str(_hash))
    message.create()
    return jsonify(message.as_dict())
Exemplo n.º 3
0
def compute_similarities(text, models, count=None):
    """Finds items that are similar to the specified text
    :param text: The text to be used for comparison
    :param models: The list of models to be compared against text
                   Each of the entries should have a simhash property
    :param count: The no. of similar items to return
    """
    # Get the simhash of the submitted message
    _hash = simhash(util.unicodeToAscii(text))
    candidates, scores = {}, []

    # TODO: Investiage ways of speeding this - complexity is O(n)
    for model in models:
        target = simhash(hash=long(model.simhash))
        if long(target) == long(_hash):
            continue
        similarity = _hash.similarity(target)
        if similarity >= similarity_threshold:
            scores.append((model.id, similarity))
            candidates[model.id] = model
    if len(scores) == 0:
        return []

    scores.sort(key=lambda x: x[1], reverse=True)
    result_size = max_similar_messages if count is None else count

    retval = []
    for x in range(result_size):
        message_dict = candidates[scores[x][0]].as_dict()
        del message_dict['simhash']
        message_dict['score'] = scores[x][1]
        retval.append(message_dict)
    return retval
Exemplo n.º 4
0
def add_message(deployment_id):
    """Adds a new message for the deployment in :deployment_id

    The input parameters are:
        message: string

    :param deployment_id: the id of the deployment
    """
    if not request.json:
        abort(400)
    _post = request.json
    if 'origin_message_id' not in _post and 'content' not in _post:
        abort(400)

    # Does the deployment exist
    deployment = Deployment.by_id(deployment_id)
    if deployment is None:
        abort(404)

    _hash = simhash(util.unicodeToAscii(_post['content']))
    message = Message(deployment_id=deployment_id,
                      origin_message_id=_post['origin_message_id'],
                      content=_post['content'],
                      simhash=str(_hash))
    message.create()
    return jsonify(message.as_dict())
Exemplo n.º 5
0
def add_report(deployment_id):
    """Adds a new report to the deployment specified by the ``deployment_id``
    parameter

    Input parameters:
        description: string - Description of the report
        categories: array of integers - category ids

    :param deployment_id: the id of the deployment
    """
    verify_deployment(deployment_id)
    errors = {}
    _post = request.json
    # Check for fields
    if 'origin_report_id' not in _post:
        errors['origin_report_id'] = 'The report id is missing'
    if 'title' not in _post:
        errors['title'] = 'The report title is missing'
    if 'description' not in _post:
        errors['description'] = 'The report description is missing'
    if 'categories' not in _post or len(_post['categories']) == 0:
        errors['categories'] = 'The report categories must be specified'

    # Did we encounter any errors?
    if len(errors) > 0:
        app.logger.error("There are some errors in the request %r" % errors)
        abort(400)

    # Does the specified report already exist?
    _report = db.session.query(Report).\
        filter(Report.origin_report_id == _post['origin_report_id'],
               Report.deployment_id == deployment_id).first()

    if not _report is None:
        app.logger.error("The report %s has already been registered" %
                         _post['origin_report_id'])
        abort(400)

    # Get the categories
    categories = db.session.query(Category).\
        filter(Category.deployment_id == deployment_id,
               Category.origin_category_id.in_(_post['categories'])).all()

    # Have the specified category ids been registered?
    if len(categories) == 0:
        app.logger.error("The specified categories are invalid")
        abort(400)

    # Compute the simhash on the report description
    _hash = simhash(util.unicodeToAscii(_post['description']))
    report = Report(deployment_id=deployment_id,
                    origin_report_id=_post['origin_report_id'],
                    title=_post['title'],
                    description=_post['description'],
                    simhash=str(_hash))
    # Create the report
    report.create()

    # Save the report categories
    report_categories = []
    for category in categories:
        rc = ReportCategory(report_id=report.id, category_id=category.id)
        report_categories.append(rc)
    ReportCategory.create_all(report_categories)

    return jsonify(report.as_dict())
Exemplo n.º 6
0
def add_report(deployment_id):
    """Adds a new report to the deployment specified by the ``deployment_id``
    parameter

    Input parameters:
        description: string - Description of the report
        categories: array of integers - category ids

    :param deployment_id: the id of the deployment
    """
    verify_deployment(deployment_id)
    errors = {}
    _post = request.json
    # Check for fields
    if 'origin_report_id' not in _post:
        errors['origin_report_id'] = 'The report id is missing'
    if 'title' not in _post:
        errors['title'] = 'The report title is missing'
    if 'description' not in _post:
        errors['description'] = 'The report description is missing'
    if 'categories' not in _post or len(_post['categories']) == 0:
        errors['categories'] = 'The report categories must be specified'

    # Did we encounter any errors?
    if len(errors) > 0:
        app.logger.error("There are some errors in the request %r" % errors)
        abort(400)

    # Does the specified report already exist?
    _report = db.session.query(Report).\
        filter(Report.origin_report_id == _post['origin_report_id'],
               Report.deployment_id == deployment_id).first()

    if not _report is None:
        app.logger.error("The report %s has already been registered" %
                         _post['origin_report_id'])
        abort(400)

    # Get the categories
    categories = db.session.query(Category).\
        filter(Category.deployment_id == deployment_id,
               Category.origin_category_id.in_(_post['categories'])).all()

    # Have the specified category ids been registered?
    if len(categories) == 0:
        app.logger.error("The specified categories are invalid")
        abort(400)

    # Compute the simhash on the report description
    _hash = simhash(util.unicodeToAscii(_post['description']))
    report = Report(deployment_id=deployment_id,
                    origin_report_id=_post['origin_report_id'],
                    title=_post['title'],
                    description=_post['description'],
                    simhash=str(_hash))
    # Create the report
    report.create()

    # Save the report categories
    report_categories = []
    for category in categories:
        rc = ReportCategory(report_id=report.id, category_id=category.id)
        report_categories.append(rc)
    ReportCategory.create_all(report_categories)

    return jsonify(report.as_dict())