Пример #1
0
def main():
    parser = argparse.ArgumentParser(description = 'Generate HITs for Amazon Mechnical Turk workers.')
    parser.add_argument('-f', help = 'The mtk data source file.')
    parser.add_argument('-o', help = 'The output file of used data.')

    args = parser.parse_args()

    data_sources = []
    if (args.f != None):
        data_sources = utils.load_file(args.f)
        random.shuffle(data_sources)

    db_collections = hit.setup_mongodb()
    data_metainfo = hit.regex_datasource(data_sources)
    images_metainfo = hit.query_imagedata_from_db(db_collections, data_metainfo)

    # data_labels: flickr high interesting 1, flickr low interesting 2, pinterest [3, 4, 5]
    data_labels = data_metainfo[0]
    # data_ids: (flickr, pinterest) image id
    data_ids = data_metainfo[1]

    data_count_limit = 50

    for begin_index in range(0, len(data_sources), data_count_limit):
        print("index: " + str(begin_index))
        generate_hits(data_sources[begin_index:begin_index + data_count_limit], begin_index, args, data_ids[begin_index:begin_index + data_count_limit], images_metainfo)

    sys.exit(0)
def main():
    parser = argparse.ArgumentParser(description = 'Generate HITs for Amazon Mechnical Turk workers.')
    parser.add_argument('-f', help = 'The mtk data source file.')
    parser.add_argument('-o', help = 'The output file of used data.')
    parser.add_argument('-m', default = 'normal', help = 'The running mode in {normal, qua_init, qua}.')
    parser.add_argument('-q', help = 'The qualification type id.')
    parser.add_argument('-t', default = 'sandbox', help = 'The type of Mechanical Turk.')


    args = parser.parse_args()

    if (args.m == 'qua' and args.q == None):
        print('Please give qualification type id if running in qualification mode.')
        sys.exit(0)

    data_sources = []
    if (args.f != None):
        data_sources = utils.load_file(args.f)
        if (args.m != 'qua'):
            random.shuffle(data_sources)

    db_collections = hit.setup_mongodb()
    data_metainfo = hit.regex_datasource(data_sources)
    images_metainfo = hit.query_imagedata_from_db(db_collections, data_metainfo)

    # data_labels: flickr high interesting 1, flickr low interesting 2, pinterest [3, 4, 5]
    data_labels = data_metainfo[0]
    # data_ids: (flickr, pinterest) image id
    data_ids = data_metainfo[1]


    data_count_limit = 100

    for begin_index in range(0, len(data_sources), data_count_limit):
        print("index: " + str(begin_index))
        generate_hits(args.t, data_sources[begin_index:begin_index + data_count_limit], begin_index, args, data_ids[begin_index:begin_index + data_count_limit], images_metainfo)

    sys.exit(0)