示例#1
0
def resolve_conflicts(pid):
    user = current_user
    assignment_id = pid + '-' + user.username

    indices = storage_model.detect_result_conflicts(mongo, pid)
    pair_datafile = storage_model.get_pair_datafile(mongo=mongo,
                                                    user=user,
                                                    pid=pid)
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile), indices=indices)

    icons = working_data.get_icons()
    ids_list = working_data.get_ids()
    ids = list(zip(ids_list[0::2], ids_list[1::2]))

    pairs_formatted = working_data.get_data_display('full')
    data = list(zip(pairs_formatted[0::2], pairs_formatted[1::2]))

    ret_data = {
        'data': data,
        'icons': icons,
        'ids': ids,
        'title': project['project_name'],
        'this_url': '/record_linkage/' + pid,
        'next_url': '/project/' + pid,
        'pid': pid,
        'data_size': len(data),
    }
    return render_template('resolve_conflicts.html', data=ret_data)
示例#2
0
def open_big_cell():
    user = current_user
    pid = r.get(user.username + '_working_pid')
    assignment_id = pid + '-' + user.username

    pair_datafile = storage_model.get_pair_datafile(mongo=mongo,
                                                    user=user,
                                                    pid=pid)
    full_data = dl.load_data_from_csv(pair_datafile)
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile))
    assignment_status = storage_model.get_assignment_status(
        mongo=mongo, username=user.username, pid=pid)
    kapr_limit = float(assignment_status['kapr_limit'])

    id1 = request.args.get('id1')
    id2 = request.args.get('id2')
    id3 = request.args.get('id3')
    id4 = request.args.get('id4')
    mode = request.args.get('mode')

    pair_num1 = str(id1.split('-')[0])
    attr_num1 = str(id1.split('-')[2])
    ret1 = dm.open_cell(assignment_id, full_data, working_data, pair_num1,
                        attr_num1, mode, r, kapr_limit)
    pair_num2 = str(id3.split('-')[0])
    attr_num2 = str(id3.split('-')[2])
    ret2 = dm.open_cell(assignment_id, full_data, working_data, pair_num2,
                        attr_num2, mode, r, kapr_limit)

    if ret2['result'] == 'fail':
        return jsonify(ret2)

    ret = {
        'value1': ret1['value1'],
        'value2': ret1['value2'],
        'value3': ret2['value1'],
        'value4': ret2['value2'],
        'id': ret1['id'],
        'mode': ret2['mode'],
        'KAPR': ret2['KAPR'],
        'result': ret2['result'],
        'new_delta': ret2['new_delta']
    }

    log_data = {
        'username': user.username,
        'timestamp': time.time(),
        'url': '/get_big_cell',
        'pid': str(pid),
        'assignment_id': str(assignment_id),
        'log': json.dumps(ret)
    }
    storage_model.mlog(mongo=mongo, data=log_data)

    return jsonify(ret)
示例#3
0
def record_linkage(pid):
    user = current_user

    # find if this project exist
    project = storage_model.get_assignment(mongo=mongo,
                                           username=user.username,
                                           pid=pid)
    if not project:
        return page_not_found('page_not_found')

    # username and project_id can identify an assignment
    assignment_id = pid + '-' + user.username

    # get assignment status
    assignment_status = storage_model.get_assignment_status(
        mongo=mongo, username=user.username, pid=pid)
    current_page = assignment_status['current_page']
    page_size = assignment_status['page_size']
    kapr_limit = assignment_status['kapr_limit']
    current_kapr = assignment_status['current_kapr']
    display_mode = assignment_status['display_mode']
    isfull = False
    if 'isfull' in assignment_status:
        isfull = assignment_status['isfull']
    if isfull == 'true':
        default_mode = 'B'
    else:
        default_mode = 'M'
    if current_page >= page_size:
        flask.flash('You have completed the project.', 'alert-success')
        return redirect(url_for('project'))

    # get working data and full data
    pair_datafile = storage_model.get_pair_datafile(mongo=mongo,
                                                    user=user,
                                                    pid=pid)
    indices, pair_idx = storage_model.get_current_block(mongo=mongo,
                                                        pid=pid,
                                                        assignee=user.username)
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile), indices=indices)
    project_pairfile = storage_model.get_project_pair_datafile(
        mongo=mongo, user=user.username, pid=pid)
    full_project_pairs = storage_model.get_total_pairs_from_pairfile(
        project_pairfile)
    working_data.set_kapr_size(full_project_pairs)
    full_data = dl.load_data_from_csv(project_pairfile)

    # prepare return data
    icons = working_data.get_icons()
    #print(icons)
    ids_list = working_data.get_ids()
    ids = list(zip(ids_list[0::2], ids_list[1::2]))
    data_mode = display_mode.lower()
    data_mode_list = storage_model.get_data_mode(assignment_id,
                                                 ids,
                                                 r=r,
                                                 data_mode=data_mode,
                                                 default_mode=default_mode)
    pairs_formatted = working_data.get_data_display(data_mode, data_mode_list)
    data = list(zip(pairs_formatted[0::2], pairs_formatted[1::2]))

    pair_ids = indices
    record_ids = storage_model.get_record_id_by_pair_id(mongo, pid, indices)

    # get the delta information
    delta = list()
    for i in range(working_data.size()):
        data_pair = working_data.get_data_pair_by_index(i)
        if data_pair is None:
            break
        delta += dm.KAPR_delta(full_data, data_pair, 11 * ['M'],
                               len(full_data))

    # prepare cache data for ajax query
    r.set(user.username + '_working_pid', pid)
    r.set(user.username + '_working_pid_rc', 0)
    KAPR_key = assignment_id + '_KAPR'
    r.set(KAPR_key, float(current_kapr))

    # get saved working answers
    answers = storage_model.get_working_answers(assignment_id, r)

    ret_data = {
        'data': data,
        'data_mode_list': data_mode_list,
        'icons': icons,
        'ids': ids,
        'record_ids': record_ids,
        'pair_ids': pair_ids,
        'title': project['project_name'],
        'kapr': round(100 * float(current_kapr), 1),
        'kapr_limit': float(kapr_limit),
        'page_number': current_page + 1,
        'page_size': page_size,
        'pair_num_base': pair_idx + 1,
        'delta': delta,
        'this_url': '/record_linkage/' + pid,
        'saved_answers': answers,
        'data_size': len(data),
        'isfull': isfull,
    }
    return render_template('record_linkage_ppirl.html', data=ret_data)
示例#4
0
def resolve_conflicts2(pid):
    user = current_user

    # find if this project exist
    assignment = storage_model.get_conflict_project(mongo=mongo,
                                                    username=user.username,
                                                    pid=pid)
    if not assignment:
        return page_not_found('page_not_found')

    # username and project_id can identify an assignment
    assignment_id = pid + '-' + user.username

    # get assignment status
    current_page = assignment['current_page']
    page_size = int(assignment['page_size'])
    kapr_limit = assignment['kapr_limit']
    current_kapr = assignment['current_kapr']
    if current_page >= page_size:
        flask.flash('You have completed the project.', 'alert-success')
        return redirect(url_for('project'))

    # get working data and full data
    pair_datafile = storage_model.get_project_pair_datafile(mongo=mongo,
                                                            user=user.username,
                                                            pid=pid)
    pair_idx = assignment['pair_idx']
    indices = assignment['pair_num'][current_page]
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile), indices=indices)
    project_pairfile = storage_model.get_project_pair_datafile(
        mongo=mongo, user=user.username, pid=pid)
    full_project_pairs = storage_model.get_total_pairs_from_pairfile(
        project_pairfile)
    working_data.set_kapr_size(full_project_pairs)
    full_data = dl.load_data_from_csv(project_pairfile)

    isfull = assignment['isfull']

    # prepare return data
    icons = working_data.get_icons()
    ids_list = working_data.get_ids()
    ids = list(zip(ids_list[0::2], ids_list[1::2]))
    data_mode = 'masked'
    data_mode_list = storage_model.get_conflict_data_mode(
        pid, ids, mongo, r, assignment_id, isfull)
    pairs_formatted = working_data.get_data_display(data_mode, data_mode_list)
    data = list(zip(pairs_formatted[0::2], pairs_formatted[1::2]))

    record_ids = storage_model.get_record_id_by_pair_id(mongo, pid, indices)

    # get the delta information
    delta = list()
    for i in range(working_data.size()):
        data_pair = working_data.get_data_pair_by_index(i)
        if data_pair is None:
            break
        delta += dm.KAPR_delta(full_data, data_pair, 11 * ['M'],
                               len(full_data))

    # prepare cache data for ajax query
    r.set(user.username + '_working_pid', pid)
    r.set(user.username + '_working_pid_rc', 1)
    KAPR_key = assignment_id + '_KAPR'
    r.set(KAPR_key, float(current_kapr))

    # get saved working answers
    answers = storage_model.get_working_answers(assignment_id, r)

    # get users' choices information
    choices, choice_cnt = storage_model.get_users_choices(mongo=mongo,
                                                          pid=pid,
                                                          indices=indices)
    print(choices)

    ret_data = {
        'data': data,
        'data_mode_list': data_mode_list,
        'icons': icons,
        'ids': ids,
        'pair_ids': indices,
        'record_ids': record_ids,
        'title': 'resolve conflicts',
        'kapr': round(100 * float(current_kapr), 1),
        'kapr_limit': kapr_limit,
        'page_number': current_page + 1,
        'page_size': page_size,
        'pair_num_base': pair_idx + 1,
        'delta': delta,
        'this_url': '/resolve_conflicts2/' + pid,
        'saved_answers': answers,
        'data_size': len(data),
        'choices': choices,
        'choice_cnt': choice_cnt,
        'isfull': isfull,
    }
    return render_template('resolve_conflicts2.html', data=ret_data)
示例#5
0
def create_resolve_conflict_project(pid):
    project = storage_model.get_project_by_pid(mongo, pid)
    owner = project['owner']

    assignment_id = pid + '-' + owner
    # get pair_num of conflicts
    conflict_indices = storage_model.detect_result_conflicts(mongo, pid)

    # get block information of the project
    project = storage_model.get_project_by_pid(mongo=mongo, pid=pid)

    # arrange conflict pairs by block
    block_id = project['block_id']

    # arrange pairs by block id
    conflicts = list()
    for block in block_id:
        cur_block = list()
        for idx in conflict_indices:
            if idx in block:
                cur_block.append(idx)
        if cur_block:
            conflicts.append(cur_block)

    # simulate open cells for those opened by assignees
    pair_datafile = storage_model.get_pair_datafile_by_owner(mongo=mongo,
                                                             owner=owner,
                                                             pid=pid)
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile),
        indices=conflict_indices)
    project_pairfile = storage_model.get_project_pair_datafile(mongo=mongo,
                                                               user=owner,
                                                               pid=pid)
    full_project_pairs = storage_model.get_total_pairs_from_pairfile(
        project_pairfile)
    working_data.set_kapr_size(full_project_pairs)
    full_data = dl.load_data_from_csv(project_pairfile)

    KAPR_key = assignment_id + '_KAPR'
    r.set(KAPR_key, 0.0)

    ids_list = working_data.get_ids()
    ids = list(zip(ids_list[0::2], ids_list[1::2]))
    data_mode_list = storage_model.get_conflict_data_mode(
        pid, ids, mongo, r, assignment_id)
    dm.batched_open_cell(assignment_id,
                         full_data,
                         working_data,
                         ids,
                         data_mode_list,
                         r,
                         kapr_limit=100)

    KAPR_key = assignment_id + '_KAPR'
    current_kapr = r.get(KAPR_key)

    result_path = os.path.join(
        config.DATA_DIR, 'internal', project['owner'] + '_' +
        project['project_name'] + '_conflict_result.csv')
    # create result file
    f = open(result_path, 'w+')
    f.close()

    isfull = storage_model.has_full_assignee(mongo, pid)

    conflict_project = {
        'pid': pid,
        'project_name': project['project_name'],
        'pair_num': conflicts,
        'current_page': 0,
        'page_size': len(conflicts),
        'kapr_limit': 100,
        'current_kapr': current_kapr,
        'pair_idx': 0,
        'total_pairs': len(conflict_indices),
        'result_path': result_path,
        'isfull': isfull,
    }

    storage_model.save_conflict_project(mongo, conflict_project)

    return 'block_id'
示例#6
0
import math
import data_loader as dl
import data_display as dd
import data_model as dm
import config

app = Flask(__name__)

if config.ENV == 'production':
    r = redis.from_url(os.environ.get("REDIS_URL"))
elif config.ENV == 'development':
    r = redis.Redis(host='localhost', port=6379, db=0)

# global data, this should be common across all users, not affected by multiple process
DATASET = dl.load_data_from_csv('data/section2.csv')
DATA_PAIR_LIST = dm.DataPairList(
    data_pairs=dl.load_data_from_csv('data/ppirl.csv'))


def state_machine(function_name):
    def wrapper(f):
        @wraps(f)
        def inner_wrapper(*args, **kwargs):
            sequence = config.SEQUENCE
            for i in range(len(sequence)):
                if sequence[i] == function_name:
                    session['state'] = i
                    break
            return f(*args, **kwargs)

        return inner_wrapper