Exemplo n.º 1
0
 def __init__(self):
     self.config = ConfigParser.ConfigParser()
     if os.path.isfile("/usr/local/etc/moustash/moustash.ini") == False:
         print "Configuration file (/usr/local/etc/moustash/moustash.ini) not found !!!"
         print "Exit !!!"
         sys.exit(1)
     self.config.read("/usr/local/etc/moustash/moustash.ini")
     self.moustash_config = {}
     self.moustash_config["Moustash"] = config_section_map(self.config, "Moustash")
     self.moustash_config["Cuir"] = config_section_map(self.config, "Cuir")
     self.broker = {}
     if self.moustash_config["Moustash"]["transport"] == "redis":
         redis_parameters = {"redis_host" : "localhost", "redis_port" : "6379", "redis_db" : "0", "redis_namespace": "logstash:moustash"}
         self.fill_broker_options(redis_parameters)
         self.broker_connection = self.connect_to_redis()
     elif self.moustash_config["Moustash"]["transport"] == "rabbitmq":
         rabbitmq_parameters = {"rabbitmq_host" : "localhost", "rabbitmq_port": "5672",
                                "rabbitmq_ssl" : "0", "rabbitmq_ssl_key" : None, 
                                "rabbitmq_ssl_cert" : None, "rabbitmq_ssl_cacert" : None,
                                "rabbitmq_vhost" : "/", "rabbitmq_username" : "guest",
                                "rabbitmq_password" : "guest",
                                "rabbitmq_queue" : "logstash-queue", "rabbitmq_queue_durable" : "0",
                                "rabbitmq_exchange_type" : "direct", "rabbitmq_exchange_durable" : "0",
                                "rabbitmq_key" : "logstash-key", "rabbitmq_exchange" : "logstash-exchange"}
         self.fill_broker_options(rabbitmq_parameters)
         self.broker_connection = self.connect_to_rabbitmq()
     else:
         print("Not yet implemented : %s !" % moustash_config["Moustash"]["transport"])
Exemplo n.º 2
0
 def __init__(self):
     self.config = ConfigParser.ConfigParser()
     if os.path.isfile("/usr/local/etc/moustash/moustash.ini") == False:
         print "Configuration file (/usr/local/etc/moustash/moustash.ini) not found !!!"
         print "Exit !!!"
         sys.exit(1)
     self.config.read("/usr/local/etc/moustash/moustash.ini")
     self.moustash_config = {}
     self.moustash_config["Moustash"] = config_section_map(
         self.config, "Moustash")
     self.moustash_config["Cuir"] = config_section_map(self.config, "Cuir")
     self.broker = {}
     if self.moustash_config["Moustash"]["transport"] == "redis":
         redis_parameters = {
             "redis_host": "localhost",
             "redis_port": "6379",
             "redis_db": "0",
             "redis_namespace": "logstash:moustash"
         }
         self.fill_broker_options(redis_parameters)
         self.broker_connection = self.connect_to_redis()
     elif self.moustash_config["Moustash"]["transport"] == "rabbitmq":
         rabbitmq_parameters = {
             "rabbitmq_host": "localhost",
             "rabbitmq_port": "5672",
             "rabbitmq_ssl": "0",
             "rabbitmq_ssl_key": None,
             "rabbitmq_ssl_cert": None,
             "rabbitmq_ssl_cacert": None,
             "rabbitmq_vhost": "/",
             "rabbitmq_username": "******",
             "rabbitmq_password": "******",
             "rabbitmq_queue": "logstash-queue",
             "rabbitmq_queue_durable": "0",
             "rabbitmq_exchange_type": "direct",
             "rabbitmq_exchange_durable": "0",
             "rabbitmq_key": "logstash-key",
             "rabbitmq_exchange": "logstash-exchange"
         }
         self.fill_broker_options(rabbitmq_parameters)
         self.broker_connection = self.connect_to_rabbitmq()
     else:
         print("Not yet implemented : %s !" %
               moustash_config["Moustash"]["transport"])
Exemplo n.º 3
0
def main(argv):
    ###############################
    # Getting programme options
    ###############################
    help_str = 'no help provided'
    try:
        opts, args = getopt.getopt(argv, "ho:c:i:", ["config=", "output-file=", "input-folder="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in ("-o", "--output-file"):
            res_file = arg
        elif opt in ("-i", "--input-folder"):
            input_folder = arg
        elif opt in ("-c", "--config"):
            config_file = arg

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    fo = open(res_file, "wb")
    #############################
    # Get list of results folders
    #############################
    results = glob.glob(input_folder + '/*')
    fieldnames = ["concept", "g", "w", "map"]
    writer = csv.DictWriter(fo, fieldnames=fieldnames)
    writer.writeheader()
    for result in results:
        fname = os.path.basename(result)
        values = re.split("g-|_w-", fname)
        files = glob.glob(os.path.join(result, "val/trec_eval_results") + "/*")
        for file_res in files:
            concept_name = os.path.basename(file_res)
            with open(file_res, "rb") as trec_res:
                for line in trec_res.read().splitlines():
                    if "map" in line and "gm_map" not in line and concept_name in line:
                        print line
                        token_line = re.split('[ \t]*', line)
                        row = dict()
                        row[fieldnames[0]] = concept_name
                        row[fieldnames[1]] = values[1]
                        row[fieldnames[2]] = values[2]
                        row[fieldnames[3]] = token_line[-1]
                        writer.writerow(row)
    fo.close()
Exemplo n.º 4
0
def main(argv):
    ###############################
    # Getting program options
    ###############################
    generate_all_file = False
    help_str = 'svm-train.py -c <concepts list>'
    try:
        opts, args = getopt.getopt(argv, "", ["list-id=", "input-predictions=",
                                              "config=", "all",
                                              "results-dir="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--input-predictions":
            input_predictions = arg
        elif opt in "--list-id":
            list_id = arg
        elif opt in "--results-dir":
            results_dir = arg
        elif opt in "--all":
            generate_all_file = True

    if generate_all_file:
        logging.info("all option activated")
    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    #########################
    # Get list of predictions files
    #########################
    concepts = glob.glob(input_predictions + '*.out')
    for concept in concepts:
        logging.info("concept file " + concept)

    #########################
    # Get list of id
    #########################
    logging.info('get concept list from ' + list_id)
    if list_id.startswith('http://'):
        res = urllib2.urlopen(list_id).read()
    else:
        res = open(list_id).read()
    photo_ids = res.splitlines()
    logging.info('found ids of ' + str(len(photo_ids)) + ' photos')

    ##########################
    # Create output dir
    ##########################
    logging.info("results_dir = " + results_dir)
    if not os.path.exists(results_dir):
        logging.info('no output dir creating at ' + results_dir)
        if not subprocess.call(['mkdir', '-p', results_dir]) == 0:
            logging.warning('cannot create output dir, aborting')
            sys.exit(1)
    logging.info('output dir is ' + results_dir)

    if generate_all_file:
        fall = open(results_dir + 'all.top', "wb")

    logging.info('initialisation des concepts')
    begin_time = timeit.default_timer()
    for concept_file in concepts:
        concept_name = os.path.basename(concept_file).split(".")[0]
        top_output = results_dir + concept_name + '.top'
        fo = open(concept_file)
        file_result = open(top_output, 'wb')
        predictions = fo.read()
        lines = predictions.splitlines()
        indicator = lines[0].split(" ")
        score_row = 2
        if indicator[1] == "1":
            score_row = 1
        idx_begin = 1
        for photo_id in photo_ids:
            id_string = photo_id.split(".")[0]
            line = concept_name + " Q0 " + id_string + " 0 " + lines[idx_begin].split(" ")[score_row] + " R\n"
            if generate_all_file:
                fall.write(line)
            file_result.write(line)
            idx_begin += 1
        file_result.close()
    if generate_all_file:
            fall.close()

    end_time = timeit.default_timer()
    logging.info('end after  ' + str(end_time - begin_time) + 's generated ' + str(len(concepts)) + " concept models")
Exemplo n.º 5
0
def main(argv):
    ###############################
    # Getting programme options
    ###############################
    help_str = 'no help provided'
    try:
        opts, args = getopt.getopt(argv, "hr:c:i:", ["config=", "output-file=", "input-folder="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in ("-c", "--config"):
            config_file = arg

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    config_predict = config_section_map(config, 'Predict')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    model_folders = dict()

    folder_tmpl = Template('centers-${centers}_g-${g}_w-${w}')
    res_file = config_predict['best_results_sift']
    input_folder = config_predict['sift_folders']

    with open(res_file, "r") as best_option:
        content = best_option.read().splitlines()
        for line in content:
            row = line.split(" ")
            concept_name = row[1]
            concept_map = dict()
            nb_centers = row[3]
            g_value = row[5]
            w_value = row[7]
            folder_name = folder_tmpl.substitute(g=g_value, centers=nb_centers, w=w_value)
            if not input_folder.startswith("/"):
                folder_path = os.path.join(config_general['project_dir'], input_folder, folder_name)
            else:
                folder_path = os.path.join(input_folder, folder_name)
            logging.info("folder for " + concept_name + " -> " + folder_path)
            if not os.path.exists(folder_path):
                logging.warning("folder " + folder_name + " required for " + concept_name + " not found")
            else:
                concept_map['sift_folder'] = folder_path
                concept_map['sift_g'] = g_value
                concept_map['sift_w'] = w_value
                concept_map['sift_centers'] = nb_centers
                model_folders[concept_name] = concept_map
Exemplo n.º 6
0
def main(argv):
    ###############################
    # Getting program options
    ###############################
    help_str = 'svm-train.py -c <concepts list>'
    try:
        opts, args = getopt.getopt(argv, "tc:ho:", ["concepts=", "results-dir=",
                                                    "input-svm=", "config=",
                                                    "svm-args=", "histograms="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--input-svm":
            input_svm = arg
        elif opt in "--histograms":
            histogram_file = arg
        elif opt in "--results-dir":
            results_dir = arg
        elif opt in "--svm-args":
            svm_options = arg

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    config_svm = config_section_map(config, 'libSvm')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    ############################
    # Initialisation de lib-svm
    ############################
    svm_predict = config_svm['svm_predict']

    now = datetime.now()
    script_name = os.path.basename(__file__)
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    #########################
    # Init svm train_photos command
    #########################
    train_cmd = [svm_predict]
    if 'svm_options' in locals():
        logging.info('svm options ' + svm_options)
        train_cmd = train_cmd + svm_options.split(' ')  # bug with space in subprocess so we have to split it here

    train_cmd += [histogram_file]
    #########################
    # Get list of svm files
    #########################
    concepts = glob.glob(input_svm + '*.model')
    for concept in concepts:
        logging.info("concept file " + concept)

    ##########################
    # Create output dir
    ##########################
    logging.info("results_dir = " + results_dir)
    if not os.path.exists(results_dir):
        logging.info('no output dir creating at ' + results_dir)
        if not subprocess.call(['mkdir', '-p', results_dir]) == 0:
            logging.warning('cannot create output dir, aborting')
            sys.exit(1)
    logging.info('output dir is ' + results_dir)

    logging.info('initialisation des concepts')
    begin_time = timeit.default_timer()
    for concept_file in concepts:
        concept_name = os.path.basename(concept_file).split(".")[0]
        model_output = results_dir + concept_name + '.out'
        logging.info('model for ' + concept_name + ' registered at ' + model_output)
        command = [concept_file, model_output]
        command = train_cmd + command
        logging.info('svm call : ' + " ".join(command))
        ret = subprocess.call(command)
        if ret != 0:
            logging.info('exit code from train_photos for ' + concept_name + ' : ' + str(ret))
    end_time = timeit.default_timer()
    logging.info('end after  ' + str(end_time - begin_time) + 's generated ' + str(len(concepts)) + " concept models")
def main(argv):
    ###############################
    # Getting program options
    ###############################
    generate_all_file = False
    help_str = ''
    try:
        opts, args = getopt.getopt(argv, "", ["output=", "input-dir=",
                                              "config=", "nb-cluster="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--input-dir":
            input_dir = arg
        elif opt in "--output":
            output = arg
        elif opt in "--nb-cluster":
            nb_cluster = float(arg)

    if generate_all_file:
        logging.info("all option activated")

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    #########################
    # Get mapping files
    #########################
    logging.info("looking for mapping in folder : " + input_dir)
    mappings = glob.glob(input_dir + '/*.map')
    logging.info("found " + str(len(mappings)) + " mapping files")

    logging.info("nb clusters : " + str(nb_cluster))
    ########################################
    # Creation du fichier et dossier pour les resultats
    ########################################
    logging.info("results file  = " + output)
    result_name = os.path.basename(output)
    results_dir = os.path.dirname(output)
    if not os.path.exists(results_dir):
        logging.info('no output dir creating at ' + results_dir)
        if not subprocess.call(['mkdir', '-p', results_dir]) == 0:
            logging.warning('cannot create output dir, aborting')
            sys.exit(1)
    logging.info('output dir is ' + results_dir)
    output_temp = output + ".temp"
    with open(output_temp, 'w') as res_file:
        for m in mappings:
            hist = create_histogram(m, nb_cluster)
            res_file.write(os.path.basename(os.path.splitext(m)[0]))
            res_file.write(" ")
            for i, val in enumerate(hist):
                if val != 0.0:
                    res_file.write(str(i+1) + ":")
                    res_file.write(str(val) + " ")
            res_file.write("\n")

    #####################
    # Ordering the file
    ####################
    os.system("sort " + output_temp + " > " + output)
    sed_command = "sed -i.back -e 's/^[0-9]*_[0-9]* /0 /g' " + output
    os.system(sed_command)
    logging.info(sed_command)
    os.remove(output_temp)
Exemplo n.º 8
0
def main(argv):
    ###############################
    # Getting programme options
    ###############################
    help_str = 'histogram.py -u <url> -o <outputfile>'
    download = False
    try:
        opts, args = getopt.getopt(argv, "hd:u:o:c:", ["config=", "url=", "output="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in ("-u", "--url"):
            url_list = arg
        elif opt in ("-o", "--output"):
            res_file = arg
        elif opt in "-d":
            dl_dir = arg
        elif opt in ("-c", "--config"):
            config_file = arg

    if not (('url_list' in locals()) and ('res_file' in locals()) and ('dl_dir' in locals()) and ('config_file' in locals())):
            logging.warning('histogram.py not correctly called')
            print help_str
            sys.exit()

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    config_libc = config_section_map(config, 'libC')

    ######################
    # Chargement des libs C
    ######################
    lib = cdll.LoadLibrary(config_libc['libhistogram'])
    libc = CDLL('libc.so.6')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    script_name = os.path.basename(__file__)
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    logging.info('Getting url from "' + url_list)
    logging.info('Output file is "' + res_file)

#    url_list = 'http://mrim.imag.fr/GINF53C4/PROJET/train/urls.txt'
#    res_file = "../results/train_photos.results"

    photo_path = dl_dir
    if not (os.path.isdir(photo_path)):
        logging.info("creating folder to dl photos")
        os.system("mkdir -p " + photo_path)
    else:
        logging.info("Photo will be dl to " + photo_path)

    logging.info('get url list from ' + url_list)
    response = urllib2.urlopen(url_list).read()

    logging.info("dl all photos")
    begin_dl = timeit.default_timer()
    nb_photo = 0
    nb_existing = 0
    for line in response.splitlines():
        if not os.path.exists(photo_path + "/" + line.split('/')[-1]):
            os.system("wget -P " + photo_path + " " + line + " >/dev/null 2>&1")
            nb_photo += 1
        else:
            nb_existing += 1
    elapsed = timeit.default_timer() - begin_dl
    logging.info("dl " + str(nb_photo) + " photos took " + str(elapsed))
    logging.info(str(nb_existing) + " photos already in the folder")

    logging.info('Opening file for results ' + res_file)
    results_path = "/".join(os.path.join(res_file.split('/')[0:-1]))
    if not os.path.exists(results_path):
        os.system("mkdir -p " + results_path)
    fp = libc.fopen(res_file, "w")

    ##################################
    # Begin main loop
    ##################################
    hist = pointer(HISTROGRAM())
    begin_time = timeit.default_timer()
    logging.info('Begin at ' + str(begin_time))
    nb_elem = 0
    for line in response.splitlines():
        path = photo_path + "/" + line.split('/')[-1]
        lib.read_img(hist,  path)
        lib.print_histogram_libsvm(fp, hist, 0)
        lib.free_histogram(hist)
        nb_elem += 1
        if nb_elem % 500 == 0:
            logging.info(str(nb_elem) + ' in ' + str(timeit.default_timer() - begin_time))

    end_time = timeit.default_timer()
    logging.info('end after  ' + str(end_time - begin_time))
    logging.info(str(nb_elem) + ' have been treated')
def main(argv):
    ###############################
    # Getting program options
    ###############################
    nb_thread = 2
    help_str = 'svm-train.py -c <concepts list>'
    try:
        opts, args = getopt.getopt(argv, "tc:ho:", ["input-folder=", "results-dir=",
                                                    "cluster-map=", "config=",
                                                    "nb-clusters=", "nb-thread="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--input-folder":
            input_sift = arg
        elif opt in "--results-dir":
            results_dir = arg
        elif opt in "--cluster-map":
            centroids_file = arg
        elif opt in "--nb-thread":
            nb_thread = int(arg)
        elif opt in "--nb-clusters":
            nb_clusters = int(arg)

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    config_scripts = config_section_map(config, 'Scripts')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    logging.info("Running with " + str(nb_thread) + " threads ")
    #########################
    # Init svm train_photos command
    #########################
    one_nn_script = config_scripts['1nn']

    #########################
    # Get list of sift files
    #########################
    sifts = glob.glob(input_sift + '*.sift')
    logging.info("getting " + str(len(sifts)) + " sift files")

    ##########################
    # Create output dir
    ##########################
    logging.info("results_dir = " + results_dir)
    if not os.path.exists(results_dir):
        logging.info('no output dir creating at ' + results_dir)
        if not subprocess.call(['mkdir', '-p', results_dir]) == 0:
            logging.warning('cannot create output dir, aborting')
            sys.exit(1)
    logging.info('output dir is ' + results_dir)

    logging.info('initialisation des fichiers Sift')
    begin_time = timeit.default_timer()
    cmds = []
    for sift_file in sifts:
        sift_file_name = os.path.basename(sift_file)
        file_name = os.path.splitext(sift_file_name)[0] + ".map"
        results_file = os.path.join(results_dir, file_name)
        command = []
        # R --slave --no-save --no-restore --no-environ --args centers256.txt 256 all_for_R_demo_30 res1nn.txt < 1nn.R
        command += ["R", "--slave", "--no-save", "--no-restore", "--no-environ", "--args"]
        command += [centroids_file, str(nb_clusters), sift_file, results_file]
        cmds.append(command)
    logging.info('---------------PROCESS--------------------')

    process = []
    while len(cmds) != 0 or len(process) != 0:
        if len(process) < nb_thread and len(cmds) != 0:
            file_string = open(one_nn_script, "r")
            cmd = cmds.pop()
            temp_sift_file = os.path.join("/tmp", os.path.splitext(os.path.basename(cmd[8]))[0] + ".temp")
            os.system("sed -n '4,$p' " + cmd[8] + " | tr -d \";\" |sed 's/<CIRCLE [1-9].*> //' > " + temp_sift_file)
            cmd[8] = temp_sift_file
            process.append([cmd,
                            subprocess.Popen(cmd,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE,
                                             stdin=file_string,
                                             preexec_fn=os.setsid),
                            temp_sift_file, file_string])
            logging.info("running : " + " ".join(cmd))
        for idx, p in enumerate(process):
            if p[1].poll() is not None:
                stream_data = p[1].communicate()
                rc = p[1].returncode
                if rc != 0:
                    logging.warning(stream_data)
                    logging.warning("command " + " ".join(p[0]))
                logging.info(" end : " + " ".join(p[0]))
                logging.info(" remove temp file : " + p[2])
                os.remove(p[2])
                p[3].close()
                process.pop(idx)

    end_time = timeit.default_timer()
    logging.info('end after  ' + str(end_time - begin_time) + 's generated ' + str(len(sifts)) + " concept models")
    sys.exit(0)
Exemplo n.º 10
0
def main(argv):
    ###############################
    # Getting program options
    ###############################
    help_str = 'eval photo'
    try:
        opts, args = getopt.getopt(argv, None, ["config=", "image-path=",
                                                "result="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--image-path":
            image_path = arg
        elif opt in "--result":
            result = arg

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    config_predict = config_section_map(config, 'Predict')
    config_scripts = config_section_map(config, 'Scripts')
    config_svm = config_section_map(config, 'libSvm')
    config_svm = config_section_map(config, 'libSvm')
    config_libc = config_section_map(config, 'libC')


    #########################
    # Init svm train_photos command
    #########################
    one_nn_script = config_scripts['1nn']
    svm_predict = config_svm['svm_predict']

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    ###########################
    # Generation of sift file
    ###########################
    if not os.path.exists(image_path):
        logging.warning("not image found at path " + image_path)
        exit(1)

    working_dir = os.path.dirname(image_path)

    sift_file = os.path.join(working_dir, os.path.splitext(os.path.basename(image_path))[0] + '.sift')
    color_descriptor_exec = os.path.join(config_general['project_dir'], 'dep', 'colorDescriptor')
    create_sift_cmd = [
        color_descriptor_exec,
        '--descriptor', 'sift',
        image_path, '--output', sift_file
    ]

    logging.info('sift commande : ' + str(create_sift_cmd))
    process = subprocess.Popen(create_sift_cmd)
    while process.poll() is None:
        Time.sleep(0)
        pass
    ###########################
    # Parametrage des concepts
    ###########################
    model_folders = dict()

    folder_tmpl = Template('centers-${centers}_g-${g}_w-${w}')
    centers_tmpl = Template(os.path.join(config_predict['centers_folders'], 'centers${nb_centers}.txt'))
    model_folder_tmpl = Template(os.path.join(config_predict['sift_folders'], 'centers${nb_centers}.txt'))
    res_file = config_predict['best_results_sift']
    input_folder = config_predict['sift_folders']
    concepts = []
    with open(res_file, "r") as best_option:
        content = best_option.read().splitlines()
        for line in content:
            if 'all' in line:
                continue
            row = line.split(" ")
            concept_name = row[1]
            concepts.append(concept_name)
            concept_map = dict()
            nb_centers = row[3]
            g_value = row[5]
            w_value = row[7]
            folder_name = folder_tmpl.substitute(g=g_value, centers=nb_centers, w=w_value)
            if not input_folder.startswith("/"):
                folder_path = os.path.join(config_general['project_dir'], input_folder, folder_name)
            else:
                folder_path = os.path.join(input_folder, folder_name)
            logging.info("folder for " + concept_name + " -> " + folder_path)
            if not os.path.exists(folder_path):
                logging.warning("folder " + folder_name + " required for " + concept_name + " not found")
            else:
                concept_map['sift_folder'] = folder_path
                concept_map['sift_g'] = g_value
                concept_map['sift_w'] = w_value
                concept_map['sift_centers'] = nb_centers
                concept_map['sift_centers_file'] = centers_tmpl.substitute(nb_centers=nb_centers)
                concept_map['sift_model_file'] = os.path.join(folder_path, 'model', concept_name + ".model")
                if not os.path.exists(os.path.join(folder_path, 'model', concept_name + ".model")):
                    logging.warning("no model found : " + os.path.join(folder_path, 'model', concept_name + ".model"))
                model_folders[concept_name] = concept_map
                logging.info("map for concept : " + concept_name + " -> " + str(concept_map))
    concepts = set(concepts)

    #Parameters for fusion
    fusion_model_map = dict()
    fusion_res_files = config_predict['best_results_fusion']
    fusion_parameters = open(fusion_res_files, 'r').read().splitlines()
    for line in fusion_parameters:
        content = line.split(' ')
        concept_map = dict()
        concept_name = content[1]
        sift_coef = float(content[3])
        color_coef = float(content[5])
        concept_map['sift_coef'] = sift_coef
        concept_map['color_coef'] = color_coef
        fusion_model_map[concept_name] = concept_map

    # PArameters for color
    color_res_file = config_predict['best_results_color']
    color_input_folder = config_predict['color_folders']
    color_folder_tmpl = Template('g-${g}_w-${w}')
    color_model_folder = dict()
    with open(color_res_file, "r") as color_best_option:
        content = color_best_option.read().splitlines()
        for line in content:
            if 'all' in line:
                continue
            row = line.split(" ")
            concept_name = row[1]
            color_concept_map = dict()
            g_value = row[3]
            w_value = row[5]
            folder_name = color_folder_tmpl.substitute(g=g_value, w=w_value)
            if not input_folder.startswith("/"):
                folder_path = os.path.join(config_general['project_dir'], color_input_folder, folder_name)
            else:
                folder_path = os.path.join(color_input_folder, folder_name)
            logging.info("folder for " + concept_name + " -> " + folder_path)
            if not os.path.exists(folder_path):
                logging.warning("folder " + folder_name + " required for " + concept_name + " not found [color]")
            else:
                color_concept_map['color_folder'] = folder_path
                color_concept_map['color_g'] = g_value
                color_concept_map['color_w'] = w_value
                color_concept_map['color_model_file'] = os.path.join(folder_path, 'model', concept_name + ".model")
                if not os.path.exists(os.path.join(folder_path, 'model', concept_name + ".model")):
                    logging.warning("no model found : " + os.path.exists(os.path.join(folder_path, 'model', concept_name + ".model")))
                color_model_folder[concept_name] = color_concept_map
                logging.info("map for concept : " + concept_name + " -> " + str(concept_map))


    jpg_name = image_path
    if(os.path.splitext(image_path)[1] == '.PNG' or os.path.splitext(image_path)[1] == '.png') :
        jpg_name = os.path.splitext(image_path)[0] + '.jpg'
        print "convert"
        os.system('convert ' + image_path + ' ' + jpg_name );

    ####################################################"
    # Color histogrammes
    #####################################################
    lib = cdll.LoadLibrary(config_libc['libhistogram'])
    libc = CDLL('libc.so.6')
    fp = libc.fopen(os.path.join(working_dir, "color_histogram.svm"), "w")
    hist = pointer(HISTROGRAM())
    path = jpg_name
    lib.read_img(hist,  path)
    lib.print_histogram_libsvm(fp, hist, 0)
    lib.free_histogram(hist)
    libc.fflush(fp) # need to fflush otherwise data won't be writed till the prg finished :'(
    libc.close(fp)

    if(os.path.splitext(image_path)[1] == '.PNG' or os.path.splitext(image_path)[1] == '.png') :
        os.system('rm ' + jpg_name);

    ####################################################"
    # Predict Color
    ######################################################"
    res_files = []
    for concept in color_model_folder:
        concept_map = color_model_folder[concept]
        g_param = concept_map['color_g']
        w_param = concept_map['color_w']
        svm_file = os.path.join(working_dir, 'color_histogram.svm')
        logging.info('color svm file for predict ' + svm_file)
        logging.info('model for predict ' + concept_map['color_model_file'])
        concept_out = os.path.join(working_dir, concept + '.color.out')
        res_files.append(concept_out )
        logging.info("best parameters for " + concept + " concept ->  " +
                     " g : " + g_param +
                     " w : " + w_param)
        predict_command = [
            svm_predict,
            '-b', '1',
            svm_file,
            concept_map['color_model_file'],
            concept_out
        ]
        logging.info("predict color cmd : " + " ".join(predict_command))
        predict_process = subprocess.Popen(predict_command)
        while predict_process.poll() is None:
            Time.sleep(0)
            pass
        p_rc = predict_process.returncode
        if p_rc != 0:
            logging.warning("error during prediction for concept " + concept)
            logging.warning("command : " + " ".join(predict_command))
            exit(1)

    collect_dict = dict()
    fusion_collect_dict = dict()
    for res in res_files:
        cpt_name = os.path.basename(res.split('.')[0])
        if not cpt_name in fusion_collect_dict:
            fusion_collect_dict[cpt_name] = 0.0
        if not os.path.exists(res):
            logging.warning("no output res for " + res)
            continue
        with open(res, "r") as results_stream:
            content = results_stream.read().splitlines()
            is_concept = content[1].split(" ")[0]
            if content[0].split(" ")[1] == "1":
                res_map = content[1].split(" ")[1]
            else:
                res_map = content[1].split(" ")[2]
            fusion_collect_dict[cpt_name] = float(res_map) * float(fusion_model_map[cpt_name]['color_coef']) # calcul of fusion on the thumb
            collect_dict[cpt_name] = {
                "map" : res_map,
                "is_concept" : is_concept,
                "concept" : cpt_name
            }

    finale_res = os.path.basename(image_path) + ".color.json"
    finale_res_path = os.path.join(working_dir, finale_res)
    with open(finale_res_path, "w") as final_res_stream:
        final_res_stream.write(json.dumps(collect_dict))

    while process.poll() is None:
        Time.sleep(0)
        pass

    rc = process.returncode
    if rc != 0:
        logging.warning("error while generating sift file")
        exit(1)

    if not os.path.exists(sift_file):
        logging.warning('error while creating sift file')
        exit(1)


    ###########################
    # Generation of mapping
    ###########################
    temp_sift_file = os.path.join(working_dir, sift_file + '.tmp')
    os.system("sed -n '4,$p' " + sift_file + " | tr -d \";\" |sed 's/<CIRCLE [1-9].*> //' > " + temp_sift_file)
    map_files = []
    for concept in concepts:
        if concept_name not in model_folders:
            logging.info("missing concept " + concept_name + " into information map")
            continue
        with open(one_nn_script, "r") as file_string:
            concept_map = model_folders[concept]
            mapping_file = os.path.join(working_dir, "mapping-center" + str(concept_map['sift_centers']) + ".map")
            logging.info("mapping at " + mapping_file)
            map_files.append(mapping_file)  # on stock les chemin vers les fichiers pour les histogrammes
            if not os.path.exists(mapping_file):
                command = []
                command += ["R", "--slave", "--no-save", "--no-restore", "--no-environ", "--args"]
                command += [concept_map['sift_centers_file'],
                            str(concept_map['sift_centers']),
                            temp_sift_file,
                            mapping_file]
                logging.info("mapping command for concept " + concept + " -> " + " ".join(command))
                mapping_process = subprocess.Popen(command,
                                                   stdout=subprocess.PIPE,
                                                   stderr=subprocess.PIPE,
                                                   stdin=file_string)
            while mapping_process.poll() is None:
                Time.sleep(0)
                pass

    map_files = set(map_files)
    generated_mapping_files = glob.glob(working_dir + '/*.map')
    for gen_file in set(generated_mapping_files):
        if gen_file not in map_files:
            logging.warning("failed to create mapping for file " + gen_file)
        nb_cluster = re.search('([0-9]*).map$', gen_file).group(1)
        result_files_svm_path = os.path.join(working_dir, "svm_file" + nb_cluster + ".svm")
        try:
            svm_file = open(result_files_svm_path, "w")
            logging.info("opening : " + result_files_svm_path)
            nb_cluster_int = int(nb_cluster)
            histogram = create_histogram(gen_file, nb_cluster_int)
            svm_file.write("0 ")
            for i, val in enumerate(histogram):
                if val != 0.0:
                    svm_file.write(str(i+1) + ":")
                    svm_file.write(str(val) + " ")
            svm_file.write("\n")
            svm_file.close()
        except ValueError:
            logging.warning("error in map file, cannot convert " + nb_cluster + " to int")

    res_files = []
    for concept in model_folders:
        concept_map = model_folders[concept]
        g_param = concept_map['sift_g']
        w_param = concept_map['sift_w']
        nb_centers = concept_map['sift_centers']
        svm_file = os.path.join(working_dir, 'svm_file' + nb_centers + ".svm")
        logging.info('svm file for predict ' + svm_file)
        logging.info('model for predict ' + concept_map['sift_model_file'])
        concept_out = os.path.join(working_dir, concept + '.sift.out')
        res_files.append(concept_out )
        logging.info("best parameters for " + concept + " concept ->  " +
                     "centers " + nb_centers +
                     " g : " + g_param +
                     " w : " + w_param)
        predict_command = [
            svm_predict,
            '-b', '1',
            svm_file,
            concept_map['sift_model_file'],
            concept_out
        ]
        logging.info(" ".join(predict_command))
        predict_process = subprocess.Popen(predict_command)

        while predict_process.poll() is None:
            Time.sleep(0)
            pass
        p_rc = predict_process.returncode
        if p_rc != 0:
            logging.warning("error during prediction for concept " + concept)
            logging.warning("command : " + " ".join(predict_command))
            exit(1)

    collect_dict = dict()
    for res in res_files:
        cpt_name = os.path.basename(res.split('.')[0])
        if not os.path.exists(res):
            logging.warning("no output res for " + res)
            continue
        with open(res, "r") as results_stream:
            content = results_stream.read().splitlines()
            is_concept = content[1].split(" ")[0]
            if content[0].split(" ")[1] == "1":
                res_map = content[1].split(" ")[1]
            else:
                res_map = content[1].split(" ")[2]
            fusion_collect_dict[cpt_name] += float(res_map) * float(fusion_model_map[cpt_name]['sift_coef']) # calcul of fusion on the thumb
            collect_dict[cpt_name] = {
                "map" : res_map,
                "is_concept" : is_concept,
                "concept" : cpt_name
            }

    finale_res = os.path.basename(image_path) + ".sift.json"
    finale_res_path = os.path.join(working_dir, finale_res)
    with open(finale_res_path, "w") as final_res_stream:
        final_res_stream.write(json.dumps(collect_dict))

    fusion_json_dict = dict()
    for concept in fusion_collect_dict:
        fusion_json_dict[concept] = dict()
        fusion_json_dict[concept]['is_concept'] = 1 if fusion_collect_dict[concept] > 0.5 else  -1
        fusion_json_dict[concept]['concept'] = concept
        fusion_json_dict[concept]['map'] = fusion_collect_dict[concept]

    finale_res = os.path.basename(image_path) + ".fusion.json"
    finale_res_path = os.path.join(working_dir, finale_res)
    with open(finale_res_path, "w") as final_res_stream:
        final_res_stream.write(json.dumps(fusion_json_dict))
Exemplo n.º 11
0
def main(argv):
    ###############################
    # Getting program options
    ###############################
    nb_thread = 2
    help_str = "formatSift.py"
    try:
        opts, args = getopt.getopt(argv, None, ["config=", "url-list=", "results-dir=", "download-dir=", "freq-cut="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == "-h":
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--url-list":
            url_list = arg
        elif opt in "--results-dir":
            results_dir = arg
        elif opt in "--download-dir":
            download_dir = arg
        elif opt in "--freq-cut":
            cut_every = int(arg)

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, "General")

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general["log_dir"]

    now = datetime.now()
    date_str = (
        str(now.day) + "_" + str(now.hour) + "_" + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    )
    logfile_name = os.path.basename(__file__).split(".")[0] + "-" + date_str + ".log"
    logging.basicConfig(filename=log_dir + "/" + logfile_name, level=logging.DEBUG)

    if "cut_every" not in locals():
        logging.warning("no defined step")
        exit(1)

    ##################################################
    # Recuperation de la liste des fichiers SIFT
    ##################################################
    if "url_list" not in locals():
        logging.warning("no sift files provided ")
        exit(1)

    logging.info(url_list)

    if url_list.startswith("http://") or url_list.startswith("https://"):
        raw_file = urllib2.urlopen(url_list).read()
    else:
        raw_file = open(url_list).read()
    urls = raw_file.splitlines()
    logging.info("nombre d'url : " + str(len(urls)))

    ##################################################
    # Creation du dossier de sauvegarde
    ##################################################
    logging.info("download dir = " + download_dir)
    if not os.path.exists(results_dir):
        logging.info("no output dir creating at " + results_dir)
        if not subprocess.call(["mkdir", "-p", download_dir]) == 0:
            logging.warning("cannot create download dir, aborting")
            sys.exit(1)
    logging.info("download dir is " + results_dir)

    ########################################
    # Creation du dossier pour les resultats
    ########################################
    logging.info("results_dir = " + results_dir)
    result_name = os.path.basename(results_dir)
    results_dir = os.path.dirname(results_dir)
    if not os.path.exists(results_dir):
        logging.info("no output dir creating at " + results_dir)
        if not subprocess.call(["mkdir", "-p", results_dir]) == 0:
            logging.warning("cannot create output dir, aborting")
            sys.exit(1)
    logging.info("output dir is " + results_dir)

    logging.info("download sift files")
    begin_dl = timeit.default_timer()
    nb_existing = 0
    for url in urls:
        if url.endswith(".sift"):
            name = url.split("/")[-1]
            sift_path = os.path.join(download_dir, name)
            if not os.path.exists(sift_path):
                os.system("wget -P " + download_dir + " " + url + " >/dev/null 2>&1")
            else:
                nb_existing = 0
    elapsed = timeit.default_timer() - begin_dl
    logging.info("dl " + str(len(urls)) + " sift file took " + str(elapsed))
    logging.info(str(nb_existing) + " sift already in the folder")

    res_file = open(os.path.join(results_dir, result_name), "w")

    for url in urls:
        if url.endswith(".sift"):
            name = url.split("/")[-1]
            with open(os.path.join(download_dir, name)) as sift:
                content = sift.read().splitlines()
                step = 4  # on passe les trois premieres lignes
                while step < len(content):
                    line = content[step].split(";")[-2]
                    line = line.lstrip()
                    line = line.rstrip()
                    res_file.write(line)
                    res_file.write("\n")
                    step += cut_every

    res_file.close()
    logging.info("end of program")
Exemplo n.º 12
0
def main(argv):
    """ programme which generate a  new trec formated files
        It take in entry a files which indeicates for each concepts the trec formated files to merge.
        the sum of int float must be 1.
        Line example

        aeroplane;/path/toFile1:float1;/path/toFile2:float2;/path/toFile3:float3
    """
    ###############################
    # Getting programme options
    ###############################
    help_str = 'no help provided'
    try:
        opts, args = getopt.getopt(argv, "ho:c:i:", ["config=", "output-file=", "input-folder="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in ("-o", "--output-folder"):
            res_folder = arg
        elif opt in ("-i", "--input-file"):
            input_file = arg
        elif opt in ("-c", "--config"):
            config_file = arg

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)
    logging.info("running")

    FileAndPods = namedtuple("FileAndPods", "file name pods")

    if not os.path.exists(input_file):
        logging.warning("input file doesn't exist : " + input_file)
        sys.exit(1)

    if not os.path.exists(res_folder):
        logging.info("creating res folder at : " + res_folder)
        os.system('mkdir -p ' + res_folder)

    template = Template("$concept Q0 $name 0 $map R")
    with open(input_file, 'r') as input_stream :
        lines = input_stream.read().splitlines()
        for line in lines: # EACH CONCEPT
            content = line.split(';')
            concept_name = content[0]
            results_file = os.path.join(res_folder , concept_name + '.top')
            files = []
            size = -1
            nb_trec_file = 0
            for i in range(1, len(content)) : # each file
                pair = content[i].split(':')
                if not os.path.exists(pair[0]):
                    logging.warning("not able to merge files for concept " + str(concept_name) + " no file " + paire[0] )
                    continue
                nb_trec_file += 1
                stream = open(pair[0], 'r')
                tuple = FileAndPods(stream.read().splitlines(),concept_name, pair[1])
                stream.close()
                if (size == -1) :
                    size = len(tuple.file)
                else :
                    if len(tuple.file) != size:
                        logging.warning("files doesn't have same length cannot merge")
                files.append(tuple)

            with open(results_file, 'w') as res_writer:
                for i in range(0,size):
                    photo_name = None
                    new_val = 0
                    for elem in files:
                        line = elem.file[i].split(' ')
                        if photo_name is None:
                            photo_name = line[2]
                        else :
                            if photo_name != line[2] :
                                logging.warning("photoname doesn't match")
                                logging.warning(photo_name + " & " + line[2])
                                sys.exit(1)

                        current_val = float(line[4])
                        new_val += current_val * float(elem.pods)
                    new_line = template.substitute(concept=elem.name ,name=photo_name, map=new_val)

                    res_writer.write(new_line + "\n")
    logging.info('end of programme')
Exemplo n.º 13
0
def main(argv):
    """
        Programme qui depuis une liste de concept et de photo attribut
        a une liste d'histogrammes au format svm un concept
        Les arguments sont
            -c une url vers une liste de concepts
            -H la liste des histogrammes dans lordre d'aparition dans le fichiers de concepts.
            -o base path et name pour la sortie des fichiers "/file/starting_name_"
            -u url de base pour recuperer les concepts "default : http://mrim.imag.fr/GINF53C4/PROJET/val_photos/ann/
    """
    ###############################
    # Getting program options
    ###############################
    help_str = 'concept.py -c <concept> -H <histogram> -o <filesbase> -u <urlbase>'
    try:
        opts, args = getopt.getopt(argv, "hu:o:H:c:", ["url=", "output=", "config="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in ("-u", "--url"):
            url_base = arg
        elif opt in "-c":
            concept_file = arg
        elif opt in "-H":
            histogram_file = arg
        elif opt in ("-o", "--output"):
            res_file = arg
        elif opt in "--config":
            config_file = arg

    if not ('histogram_file' in locals()):
            logging.info('main not correctly called : Histogram file is needed')
            print help_str
            sys.exit()

    if 'config_file' not in locals():
        logging.info('main not correctly called : config file is needed')
        print help_str
        sys.exit()

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    config_libc = config_section_map(config, 'libC')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    script_name = os.path.basename(__file__)
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    if not ('url_base' in locals()):
        print "missing  url base for .ann files"
        sys.exit()
    logging.info("getting recorded concepts from " + url_base)

    #  default results location
    if not ('res_file' in locals()):
        print "missing  output dir"
        sys.exit()
    logging.info("Results will be at  " + res_file + "<conceptname>")
    if not os.path.exists(res_file):
        logging.info("output dir does not exist... creating " + res_file)
        os.system("mkdir -p " + res_file)

    # Recuperation de la liste de concept
    logging.info('get concept list from ' + concept_file)
    if concept_file.startswith('http://'):
        response = urllib2.urlopen(concept_file).read()
    else:
        response = open(concept_file).read()

    ##################################
    # Chargement de l'Histogramme en memoire
    ##################################
    logging.info('opening histogram model file and read it')
    histogram = open(histogram_file).read()

    ##################################
    # Init variables
    ##################################
    open_files = {}
    concept_streams = {}
    join_seq = " "

    ##################################
    # Initializing concepts
    ##################################
    begintime = timeit.default_timer()
    for concept in response.splitlines():
        logging.info(str(concept))
        concept_record_url = url_base + concept + ".ann"
        logging.info("getting record from " + concept_record_url)
        concept_stream = urllib2.urlopen(concept_record_url).read()
        concept_streams[concept] = concept_stream.splitlines()
        concept_file = res_file + concept + ".svm"
        logging.info("opening results file -> " + concept_file)
        fo = open(concept_file, "wb")
        open_files[concept] = fo

    ###################################
    # Main loop*
    ###################################
    current_line = 0
    for current_line_histogram in histogram.splitlines():  # loop through the histogram model once
        histogram_line = join_seq.join((current_line_histogram.split(' ')[1:]))  # get the histogram
        for concept in response.splitlines():
            indice = re.split('[ ]*', concept_streams[concept][current_line])[1]
            if not indice == '0':
                open_files[concept].write(str(indice) + ' ')
                open_files[concept].write(histogram_line)
                open_files[concept].write('\n')
            else:
                logging.info('found O at line ' + str(current_line) + ' for ' + concept)
        current_line += 1

    logging.info('nb line tot : ' + str(current_line))
    for out_file in open_files:
        open_files[out_file].close()

    endtime = timeit.default_timer()
    logging.info('end after  ' + str(endtime - begintime))
Exemplo n.º 14
0
def main(argv):
    ###############################
    # Getting program options
    ###############################
    generate_all_file = False
    help_str = 'svm-train.py -c <concepts list>'
    try:
        opts, args = getopt.getopt(argv, "", ["base-url-rel=", "input-top=",
                                              "config=",
                                              "results-dir="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--input-top":
            input_top_files = arg
        elif opt in "--base-url-rel":
            rel_base = arg
        elif opt in "--results-dir":
            results_dir = arg

    if generate_all_file:
        logging.info("all option activated")

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    section_trec_eval = config_section_map(config, 'trecEval')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    #########################
    # Get list of top formatted files
    #########################
    concepts = glob.glob(os.path.join(input_top_files, '*.top'))
    print input_top_files
    for concept in concepts:
        logging.info("concept file " + concept)

    ##########################
    # Create output dir
    ##########################
    logging.info("results_dir = " + results_dir)
    if not os.path.exists(results_dir):
        logging.info('no output dir creating at ' + results_dir)
        if not subprocess.call(['mkdir', '-p', results_dir]) == 0:
            logging.warning('cannot create output dir, aborting')
            sys.exit(1)

    begin_time = timeit.default_timer()

    for concept_file in concepts:
        concept_name = os.path.basename(concept_file).split(".")[0]
        res_output = results_dir + concept_name
        rel_path = '/tmp/' + concept_name + '.rel'
        url = rel_base + concept_name + ".rel"
        # curl instead of wget because wget do not override files
        os.system("curl " + rel_base + "/" + concept_name + ".rel " + ">/tmp/" + concept_name + ".rel ")
        if not os.path.exists(rel_path):
            logging.warning("Download error dor file " + url)
            sys.exit(1)
        cmd = [section_trec_eval['trec_eval'], '-q', rel_path, concept_file]
        with open(res_output, "w") as outfile:
            ret = subprocess.call(cmd, stdout=outfile)
            if ret != 0:
                logging.warning("error for " + concept_file)
        os.remove("/tmp/" + concept_name + ".rel")

    end_time = timeit.default_timer()
    logging.info('end after  ' + str(end_time - begin_time) + 's generated ' + str(len(concepts)) + " concept models")
Exemplo n.º 15
0
def main(argv):
    ###############################
    # Getting program options
    ###############################
    help_str = 'svm-train.py -c <concepts list>'
    try:
        opts, args = getopt.getopt(argv, "h", ["samples=", "results=",
                                               "config=", "nb-clusters=",
                                               "nb-iter="])
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--samples":
            data = arg
        elif opt in "--results":
            results = arg
        elif opt in "--nb-clusters":
            nb_clusters = arg
        elif opt in "--nb-iter":
            nb_iter = arg


    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, 'General')
    config_scripts = config_section_map(config, 'Scripts')

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general['log_dir']

    now = datetime.now()
    date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log'
    logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG)

    if 'data'not in locals() or not os.path.exists(data):
        logging.warning("no data")
        exit(1)

    if 'nb_clusters' not in locals():
        logging.warning("need to specify the number of clusters needed for kmeans")
        exit(1)
    logging.info("running with " + nb_clusters + " clusters to generate")

    if 'nb_iter' not in locals():
        logging.warning("need to specify the number of max iterations")
        exit(1)
    logging.info("running with " + nb_iter + " nb iterations")

    ########################################
    # Creation du fichier et dossier pour les resultats
    ########################################
    logging.info("results_dir = " + results)
    result_name = os.path.basename(results)
    results_dir = os.path.dirname(results)
    if not os.path.exists(results_dir):
        logging.info('no output dir creating at ' + results_dir)
        if not subprocess.call(['mkdir', '-p', results_dir]) == 0:
            logging.warning('cannot create output dir, aborting')
            sys.exit(1)
    logging.info('output dir is ' + results_dir)

    ####################
    # Creation de la commande
    ####################
    kmeans_script = config_scripts['kmeans']
    command = "R --slave --no-save --no-restore --no-environ --args "
    command += data + " " + nb_clusters + " " + results + " " + nb_iter + " < " + kmeans_script

    logging.info("command : " + command)
    begin_time = timeit.default_timer()
    #########################
    # Lancement du clustering
    #########################
    os.system(command)
    end_time = timeit.default_timer()
    logging.info('end after  ' + str(end_time - begin_time))
Exemplo n.º 16
0
def main(argv):
    ###############################
    # Getting program options
    ###############################
    nb_thread = 2
    help_str = "svm-train.py -c <concepts list>"
    try:
        opts, args = getopt.getopt(
            argv, "tc:ho:", ["concepts=", "results-dir=", "input-svm=", "config=", "svm-args=", "nb-thread="]
        )
    except getopt.GetoptError as err:
        print help_str
        print str(err)
        sys.exit(2)
    for opt, arg in opts:
        if opt == "-h":
            print help_str
            sys.exit()
        elif opt in "--config":
            config_file = arg
        elif opt in "--input-svm":
            input_svm = arg
        elif opt in "--results-dir":
            results_dir = arg
        elif opt in "--svm-args":
            svm_options = arg
        elif opt in "--nb-thread":
            nb_thread = int(arg)

    #########################
    # Chargement de la config
    #########################
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    config_general = config_section_map(config, "General")
    config_svm = config_section_map(config, "libSvm")

    #########################
    # Configuration du logger
    #########################
    log_dir = config_general["log_dir"]

    ############################
    # Initialisation de lib-svm
    ############################
    svm_train = config_svm["svm_train"]

    now = datetime.now()
    script_name = os.path.basename(__file__)
    date_str = (
        str(now.day) + "_" + str(now.hour) + "_" + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond)
    )
    logfile_name = os.path.basename(__file__).split(".")[0] + "-" + date_str + ".log"
    logging.basicConfig(filename=log_dir + "/" + logfile_name, level=logging.DEBUG)

    logging.info("Running with " + str(nb_thread) + " threads ")
    #########################
    # Init svm train_photos command
    #########################
    train_cmd = [svm_train]
    if "svm_options" in locals():
        logging.info("svm options " + svm_options)
        train_cmd = train_cmd + svm_options.split(" ")  # bug with space in subprocess so we have to split it here

    #########################
    # Get list of svm files
    #########################
    concepts = glob.glob(input_svm + "*.svm")
    for concept in concepts:
        logging.info("concept file " + concept)

    ##########################
    # Create output dir
    ##########################
    logging.info("results_dir = " + results_dir)
    if not os.path.exists(results_dir):
        logging.info("no output dir creating at " + results_dir)
        if not subprocess.call(["mkdir", "-p", results_dir]) == 0:
            logging.warning("cannot create output dir, aborting")
            sys.exit(1)
    logging.info("output dir is " + results_dir)

    logging.info("initialisation des concepts")
    begin_time = timeit.default_timer()
    cmds = []
    for concept_file in concepts:
        command = []
        concept_name = os.path.basename(concept_file).split(".")[0]
        model_output = results_dir + concept_name + ".model"
        logging.info("model for " + concept_name + " registered at " + model_output)
        command = [concept_file, model_output]
        command = train_cmd + command
        logging.info("svm call : " + " ".join(command))
        cmds.append(command)

    logging.info("---------------PROCESS--------------------")

    process = []
    while len(cmds) != 0 or len(process) != 0:
        if len(process) < nb_thread and len(cmds) != 0:
            cmd = cmds.pop()
            process.append([cmd, subprocess.Popen(cmd)])
            logging.info("running : " + " ".join(cmd))
        for idx, p in enumerate(process):
            if p[1].poll() is not None:
                logging.info(" end : " + " ".join(p[0]))
                process.pop(idx)

    end_time = timeit.default_timer()
    logging.info("end after  " + str(end_time - begin_time) + "s generated " + str(len(concepts)) + " concept models")