def __init__(self): self.config = ConfigParser.ConfigParser() if os.path.isfile("/usr/local/etc/moustash/moustash.ini") == False: print "Configuration file (/usr/local/etc/moustash/moustash.ini) not found !!!" print "Exit !!!" sys.exit(1) self.config.read("/usr/local/etc/moustash/moustash.ini") self.moustash_config = {} self.moustash_config["Moustash"] = config_section_map(self.config, "Moustash") self.moustash_config["Cuir"] = config_section_map(self.config, "Cuir") self.broker = {} if self.moustash_config["Moustash"]["transport"] == "redis": redis_parameters = {"redis_host" : "localhost", "redis_port" : "6379", "redis_db" : "0", "redis_namespace": "logstash:moustash"} self.fill_broker_options(redis_parameters) self.broker_connection = self.connect_to_redis() elif self.moustash_config["Moustash"]["transport"] == "rabbitmq": rabbitmq_parameters = {"rabbitmq_host" : "localhost", "rabbitmq_port": "5672", "rabbitmq_ssl" : "0", "rabbitmq_ssl_key" : None, "rabbitmq_ssl_cert" : None, "rabbitmq_ssl_cacert" : None, "rabbitmq_vhost" : "/", "rabbitmq_username" : "guest", "rabbitmq_password" : "guest", "rabbitmq_queue" : "logstash-queue", "rabbitmq_queue_durable" : "0", "rabbitmq_exchange_type" : "direct", "rabbitmq_exchange_durable" : "0", "rabbitmq_key" : "logstash-key", "rabbitmq_exchange" : "logstash-exchange"} self.fill_broker_options(rabbitmq_parameters) self.broker_connection = self.connect_to_rabbitmq() else: print("Not yet implemented : %s !" % moustash_config["Moustash"]["transport"])
def __init__(self): self.config = ConfigParser.ConfigParser() if os.path.isfile("/usr/local/etc/moustash/moustash.ini") == False: print "Configuration file (/usr/local/etc/moustash/moustash.ini) not found !!!" print "Exit !!!" sys.exit(1) self.config.read("/usr/local/etc/moustash/moustash.ini") self.moustash_config = {} self.moustash_config["Moustash"] = config_section_map( self.config, "Moustash") self.moustash_config["Cuir"] = config_section_map(self.config, "Cuir") self.broker = {} if self.moustash_config["Moustash"]["transport"] == "redis": redis_parameters = { "redis_host": "localhost", "redis_port": "6379", "redis_db": "0", "redis_namespace": "logstash:moustash" } self.fill_broker_options(redis_parameters) self.broker_connection = self.connect_to_redis() elif self.moustash_config["Moustash"]["transport"] == "rabbitmq": rabbitmq_parameters = { "rabbitmq_host": "localhost", "rabbitmq_port": "5672", "rabbitmq_ssl": "0", "rabbitmq_ssl_key": None, "rabbitmq_ssl_cert": None, "rabbitmq_ssl_cacert": None, "rabbitmq_vhost": "/", "rabbitmq_username": "******", "rabbitmq_password": "******", "rabbitmq_queue": "logstash-queue", "rabbitmq_queue_durable": "0", "rabbitmq_exchange_type": "direct", "rabbitmq_exchange_durable": "0", "rabbitmq_key": "logstash-key", "rabbitmq_exchange": "logstash-exchange" } self.fill_broker_options(rabbitmq_parameters) self.broker_connection = self.connect_to_rabbitmq() else: print("Not yet implemented : %s !" % moustash_config["Moustash"]["transport"])
def main(argv): ############################### # Getting programme options ############################### help_str = 'no help provided' try: opts, args = getopt.getopt(argv, "ho:c:i:", ["config=", "output-file=", "input-folder="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in ("-o", "--output-file"): res_file = arg elif opt in ("-i", "--input-folder"): input_folder = arg elif opt in ("-c", "--config"): config_file = arg ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) fo = open(res_file, "wb") ############################# # Get list of results folders ############################# results = glob.glob(input_folder + '/*') fieldnames = ["concept", "g", "w", "map"] writer = csv.DictWriter(fo, fieldnames=fieldnames) writer.writeheader() for result in results: fname = os.path.basename(result) values = re.split("g-|_w-", fname) files = glob.glob(os.path.join(result, "val/trec_eval_results") + "/*") for file_res in files: concept_name = os.path.basename(file_res) with open(file_res, "rb") as trec_res: for line in trec_res.read().splitlines(): if "map" in line and "gm_map" not in line and concept_name in line: print line token_line = re.split('[ \t]*', line) row = dict() row[fieldnames[0]] = concept_name row[fieldnames[1]] = values[1] row[fieldnames[2]] = values[2] row[fieldnames[3]] = token_line[-1] writer.writerow(row) fo.close()
def main(argv): ############################### # Getting program options ############################### generate_all_file = False help_str = 'svm-train.py -c <concepts list>' try: opts, args = getopt.getopt(argv, "", ["list-id=", "input-predictions=", "config=", "all", "results-dir="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--input-predictions": input_predictions = arg elif opt in "--list-id": list_id = arg elif opt in "--results-dir": results_dir = arg elif opt in "--all": generate_all_file = True if generate_all_file: logging.info("all option activated") ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) ######################### # Get list of predictions files ######################### concepts = glob.glob(input_predictions + '*.out') for concept in concepts: logging.info("concept file " + concept) ######################### # Get list of id ######################### logging.info('get concept list from ' + list_id) if list_id.startswith('http://'): res = urllib2.urlopen(list_id).read() else: res = open(list_id).read() photo_ids = res.splitlines() logging.info('found ids of ' + str(len(photo_ids)) + ' photos') ########################## # Create output dir ########################## logging.info("results_dir = " + results_dir) if not os.path.exists(results_dir): logging.info('no output dir creating at ' + results_dir) if not subprocess.call(['mkdir', '-p', results_dir]) == 0: logging.warning('cannot create output dir, aborting') sys.exit(1) logging.info('output dir is ' + results_dir) if generate_all_file: fall = open(results_dir + 'all.top', "wb") logging.info('initialisation des concepts') begin_time = timeit.default_timer() for concept_file in concepts: concept_name = os.path.basename(concept_file).split(".")[0] top_output = results_dir + concept_name + '.top' fo = open(concept_file) file_result = open(top_output, 'wb') predictions = fo.read() lines = predictions.splitlines() indicator = lines[0].split(" ") score_row = 2 if indicator[1] == "1": score_row = 1 idx_begin = 1 for photo_id in photo_ids: id_string = photo_id.split(".")[0] line = concept_name + " Q0 " + id_string + " 0 " + lines[idx_begin].split(" ")[score_row] + " R\n" if generate_all_file: fall.write(line) file_result.write(line) idx_begin += 1 file_result.close() if generate_all_file: fall.close() end_time = timeit.default_timer() logging.info('end after ' + str(end_time - begin_time) + 's generated ' + str(len(concepts)) + " concept models")
def main(argv): ############################### # Getting programme options ############################### help_str = 'no help provided' try: opts, args = getopt.getopt(argv, "hr:c:i:", ["config=", "output-file=", "input-folder="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in ("-c", "--config"): config_file = arg ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') config_predict = config_section_map(config, 'Predict') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) model_folders = dict() folder_tmpl = Template('centers-${centers}_g-${g}_w-${w}') res_file = config_predict['best_results_sift'] input_folder = config_predict['sift_folders'] with open(res_file, "r") as best_option: content = best_option.read().splitlines() for line in content: row = line.split(" ") concept_name = row[1] concept_map = dict() nb_centers = row[3] g_value = row[5] w_value = row[7] folder_name = folder_tmpl.substitute(g=g_value, centers=nb_centers, w=w_value) if not input_folder.startswith("/"): folder_path = os.path.join(config_general['project_dir'], input_folder, folder_name) else: folder_path = os.path.join(input_folder, folder_name) logging.info("folder for " + concept_name + " -> " + folder_path) if not os.path.exists(folder_path): logging.warning("folder " + folder_name + " required for " + concept_name + " not found") else: concept_map['sift_folder'] = folder_path concept_map['sift_g'] = g_value concept_map['sift_w'] = w_value concept_map['sift_centers'] = nb_centers model_folders[concept_name] = concept_map
def main(argv): ############################### # Getting program options ############################### help_str = 'svm-train.py -c <concepts list>' try: opts, args = getopt.getopt(argv, "tc:ho:", ["concepts=", "results-dir=", "input-svm=", "config=", "svm-args=", "histograms="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--input-svm": input_svm = arg elif opt in "--histograms": histogram_file = arg elif opt in "--results-dir": results_dir = arg elif opt in "--svm-args": svm_options = arg ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') config_svm = config_section_map(config, 'libSvm') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] ############################ # Initialisation de lib-svm ############################ svm_predict = config_svm['svm_predict'] now = datetime.now() script_name = os.path.basename(__file__) date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) ######################### # Init svm train_photos command ######################### train_cmd = [svm_predict] if 'svm_options' in locals(): logging.info('svm options ' + svm_options) train_cmd = train_cmd + svm_options.split(' ') # bug with space in subprocess so we have to split it here train_cmd += [histogram_file] ######################### # Get list of svm files ######################### concepts = glob.glob(input_svm + '*.model') for concept in concepts: logging.info("concept file " + concept) ########################## # Create output dir ########################## logging.info("results_dir = " + results_dir) if not os.path.exists(results_dir): logging.info('no output dir creating at ' + results_dir) if not subprocess.call(['mkdir', '-p', results_dir]) == 0: logging.warning('cannot create output dir, aborting') sys.exit(1) logging.info('output dir is ' + results_dir) logging.info('initialisation des concepts') begin_time = timeit.default_timer() for concept_file in concepts: concept_name = os.path.basename(concept_file).split(".")[0] model_output = results_dir + concept_name + '.out' logging.info('model for ' + concept_name + ' registered at ' + model_output) command = [concept_file, model_output] command = train_cmd + command logging.info('svm call : ' + " ".join(command)) ret = subprocess.call(command) if ret != 0: logging.info('exit code from train_photos for ' + concept_name + ' : ' + str(ret)) end_time = timeit.default_timer() logging.info('end after ' + str(end_time - begin_time) + 's generated ' + str(len(concepts)) + " concept models")
def main(argv): ############################### # Getting program options ############################### generate_all_file = False help_str = '' try: opts, args = getopt.getopt(argv, "", ["output=", "input-dir=", "config=", "nb-cluster="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--input-dir": input_dir = arg elif opt in "--output": output = arg elif opt in "--nb-cluster": nb_cluster = float(arg) if generate_all_file: logging.info("all option activated") ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) ######################### # Get mapping files ######################### logging.info("looking for mapping in folder : " + input_dir) mappings = glob.glob(input_dir + '/*.map') logging.info("found " + str(len(mappings)) + " mapping files") logging.info("nb clusters : " + str(nb_cluster)) ######################################## # Creation du fichier et dossier pour les resultats ######################################## logging.info("results file = " + output) result_name = os.path.basename(output) results_dir = os.path.dirname(output) if not os.path.exists(results_dir): logging.info('no output dir creating at ' + results_dir) if not subprocess.call(['mkdir', '-p', results_dir]) == 0: logging.warning('cannot create output dir, aborting') sys.exit(1) logging.info('output dir is ' + results_dir) output_temp = output + ".temp" with open(output_temp, 'w') as res_file: for m in mappings: hist = create_histogram(m, nb_cluster) res_file.write(os.path.basename(os.path.splitext(m)[0])) res_file.write(" ") for i, val in enumerate(hist): if val != 0.0: res_file.write(str(i+1) + ":") res_file.write(str(val) + " ") res_file.write("\n") ##################### # Ordering the file #################### os.system("sort " + output_temp + " > " + output) sed_command = "sed -i.back -e 's/^[0-9]*_[0-9]* /0 /g' " + output os.system(sed_command) logging.info(sed_command) os.remove(output_temp)
def main(argv): ############################### # Getting programme options ############################### help_str = 'histogram.py -u <url> -o <outputfile>' download = False try: opts, args = getopt.getopt(argv, "hd:u:o:c:", ["config=", "url=", "output="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in ("-u", "--url"): url_list = arg elif opt in ("-o", "--output"): res_file = arg elif opt in "-d": dl_dir = arg elif opt in ("-c", "--config"): config_file = arg if not (('url_list' in locals()) and ('res_file' in locals()) and ('dl_dir' in locals()) and ('config_file' in locals())): logging.warning('histogram.py not correctly called') print help_str sys.exit() ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') config_libc = config_section_map(config, 'libC') ###################### # Chargement des libs C ###################### lib = cdll.LoadLibrary(config_libc['libhistogram']) libc = CDLL('libc.so.6') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() script_name = os.path.basename(__file__) date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) logging.info('Getting url from "' + url_list) logging.info('Output file is "' + res_file) # url_list = 'http://mrim.imag.fr/GINF53C4/PROJET/train/urls.txt' # res_file = "../results/train_photos.results" photo_path = dl_dir if not (os.path.isdir(photo_path)): logging.info("creating folder to dl photos") os.system("mkdir -p " + photo_path) else: logging.info("Photo will be dl to " + photo_path) logging.info('get url list from ' + url_list) response = urllib2.urlopen(url_list).read() logging.info("dl all photos") begin_dl = timeit.default_timer() nb_photo = 0 nb_existing = 0 for line in response.splitlines(): if not os.path.exists(photo_path + "/" + line.split('/')[-1]): os.system("wget -P " + photo_path + " " + line + " >/dev/null 2>&1") nb_photo += 1 else: nb_existing += 1 elapsed = timeit.default_timer() - begin_dl logging.info("dl " + str(nb_photo) + " photos took " + str(elapsed)) logging.info(str(nb_existing) + " photos already in the folder") logging.info('Opening file for results ' + res_file) results_path = "/".join(os.path.join(res_file.split('/')[0:-1])) if not os.path.exists(results_path): os.system("mkdir -p " + results_path) fp = libc.fopen(res_file, "w") ################################## # Begin main loop ################################## hist = pointer(HISTROGRAM()) begin_time = timeit.default_timer() logging.info('Begin at ' + str(begin_time)) nb_elem = 0 for line in response.splitlines(): path = photo_path + "/" + line.split('/')[-1] lib.read_img(hist, path) lib.print_histogram_libsvm(fp, hist, 0) lib.free_histogram(hist) nb_elem += 1 if nb_elem % 500 == 0: logging.info(str(nb_elem) + ' in ' + str(timeit.default_timer() - begin_time)) end_time = timeit.default_timer() logging.info('end after ' + str(end_time - begin_time)) logging.info(str(nb_elem) + ' have been treated')
def main(argv): ############################### # Getting program options ############################### nb_thread = 2 help_str = 'svm-train.py -c <concepts list>' try: opts, args = getopt.getopt(argv, "tc:ho:", ["input-folder=", "results-dir=", "cluster-map=", "config=", "nb-clusters=", "nb-thread="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--input-folder": input_sift = arg elif opt in "--results-dir": results_dir = arg elif opt in "--cluster-map": centroids_file = arg elif opt in "--nb-thread": nb_thread = int(arg) elif opt in "--nb-clusters": nb_clusters = int(arg) ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') config_scripts = config_section_map(config, 'Scripts') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) logging.info("Running with " + str(nb_thread) + " threads ") ######################### # Init svm train_photos command ######################### one_nn_script = config_scripts['1nn'] ######################### # Get list of sift files ######################### sifts = glob.glob(input_sift + '*.sift') logging.info("getting " + str(len(sifts)) + " sift files") ########################## # Create output dir ########################## logging.info("results_dir = " + results_dir) if not os.path.exists(results_dir): logging.info('no output dir creating at ' + results_dir) if not subprocess.call(['mkdir', '-p', results_dir]) == 0: logging.warning('cannot create output dir, aborting') sys.exit(1) logging.info('output dir is ' + results_dir) logging.info('initialisation des fichiers Sift') begin_time = timeit.default_timer() cmds = [] for sift_file in sifts: sift_file_name = os.path.basename(sift_file) file_name = os.path.splitext(sift_file_name)[0] + ".map" results_file = os.path.join(results_dir, file_name) command = [] # R --slave --no-save --no-restore --no-environ --args centers256.txt 256 all_for_R_demo_30 res1nn.txt < 1nn.R command += ["R", "--slave", "--no-save", "--no-restore", "--no-environ", "--args"] command += [centroids_file, str(nb_clusters), sift_file, results_file] cmds.append(command) logging.info('---------------PROCESS--------------------') process = [] while len(cmds) != 0 or len(process) != 0: if len(process) < nb_thread and len(cmds) != 0: file_string = open(one_nn_script, "r") cmd = cmds.pop() temp_sift_file = os.path.join("/tmp", os.path.splitext(os.path.basename(cmd[8]))[0] + ".temp") os.system("sed -n '4,$p' " + cmd[8] + " | tr -d \";\" |sed 's/<CIRCLE [1-9].*> //' > " + temp_sift_file) cmd[8] = temp_sift_file process.append([cmd, subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=file_string, preexec_fn=os.setsid), temp_sift_file, file_string]) logging.info("running : " + " ".join(cmd)) for idx, p in enumerate(process): if p[1].poll() is not None: stream_data = p[1].communicate() rc = p[1].returncode if rc != 0: logging.warning(stream_data) logging.warning("command " + " ".join(p[0])) logging.info(" end : " + " ".join(p[0])) logging.info(" remove temp file : " + p[2]) os.remove(p[2]) p[3].close() process.pop(idx) end_time = timeit.default_timer() logging.info('end after ' + str(end_time - begin_time) + 's generated ' + str(len(sifts)) + " concept models") sys.exit(0)
def main(argv): ############################### # Getting program options ############################### help_str = 'eval photo' try: opts, args = getopt.getopt(argv, None, ["config=", "image-path=", "result="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--image-path": image_path = arg elif opt in "--result": result = arg ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') config_predict = config_section_map(config, 'Predict') config_scripts = config_section_map(config, 'Scripts') config_svm = config_section_map(config, 'libSvm') config_svm = config_section_map(config, 'libSvm') config_libc = config_section_map(config, 'libC') ######################### # Init svm train_photos command ######################### one_nn_script = config_scripts['1nn'] svm_predict = config_svm['svm_predict'] ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) ########################### # Generation of sift file ########################### if not os.path.exists(image_path): logging.warning("not image found at path " + image_path) exit(1) working_dir = os.path.dirname(image_path) sift_file = os.path.join(working_dir, os.path.splitext(os.path.basename(image_path))[0] + '.sift') color_descriptor_exec = os.path.join(config_general['project_dir'], 'dep', 'colorDescriptor') create_sift_cmd = [ color_descriptor_exec, '--descriptor', 'sift', image_path, '--output', sift_file ] logging.info('sift commande : ' + str(create_sift_cmd)) process = subprocess.Popen(create_sift_cmd) while process.poll() is None: Time.sleep(0) pass ########################### # Parametrage des concepts ########################### model_folders = dict() folder_tmpl = Template('centers-${centers}_g-${g}_w-${w}') centers_tmpl = Template(os.path.join(config_predict['centers_folders'], 'centers${nb_centers}.txt')) model_folder_tmpl = Template(os.path.join(config_predict['sift_folders'], 'centers${nb_centers}.txt')) res_file = config_predict['best_results_sift'] input_folder = config_predict['sift_folders'] concepts = [] with open(res_file, "r") as best_option: content = best_option.read().splitlines() for line in content: if 'all' in line: continue row = line.split(" ") concept_name = row[1] concepts.append(concept_name) concept_map = dict() nb_centers = row[3] g_value = row[5] w_value = row[7] folder_name = folder_tmpl.substitute(g=g_value, centers=nb_centers, w=w_value) if not input_folder.startswith("/"): folder_path = os.path.join(config_general['project_dir'], input_folder, folder_name) else: folder_path = os.path.join(input_folder, folder_name) logging.info("folder for " + concept_name + " -> " + folder_path) if not os.path.exists(folder_path): logging.warning("folder " + folder_name + " required for " + concept_name + " not found") else: concept_map['sift_folder'] = folder_path concept_map['sift_g'] = g_value concept_map['sift_w'] = w_value concept_map['sift_centers'] = nb_centers concept_map['sift_centers_file'] = centers_tmpl.substitute(nb_centers=nb_centers) concept_map['sift_model_file'] = os.path.join(folder_path, 'model', concept_name + ".model") if not os.path.exists(os.path.join(folder_path, 'model', concept_name + ".model")): logging.warning("no model found : " + os.path.join(folder_path, 'model', concept_name + ".model")) model_folders[concept_name] = concept_map logging.info("map for concept : " + concept_name + " -> " + str(concept_map)) concepts = set(concepts) #Parameters for fusion fusion_model_map = dict() fusion_res_files = config_predict['best_results_fusion'] fusion_parameters = open(fusion_res_files, 'r').read().splitlines() for line in fusion_parameters: content = line.split(' ') concept_map = dict() concept_name = content[1] sift_coef = float(content[3]) color_coef = float(content[5]) concept_map['sift_coef'] = sift_coef concept_map['color_coef'] = color_coef fusion_model_map[concept_name] = concept_map # PArameters for color color_res_file = config_predict['best_results_color'] color_input_folder = config_predict['color_folders'] color_folder_tmpl = Template('g-${g}_w-${w}') color_model_folder = dict() with open(color_res_file, "r") as color_best_option: content = color_best_option.read().splitlines() for line in content: if 'all' in line: continue row = line.split(" ") concept_name = row[1] color_concept_map = dict() g_value = row[3] w_value = row[5] folder_name = color_folder_tmpl.substitute(g=g_value, w=w_value) if not input_folder.startswith("/"): folder_path = os.path.join(config_general['project_dir'], color_input_folder, folder_name) else: folder_path = os.path.join(color_input_folder, folder_name) logging.info("folder for " + concept_name + " -> " + folder_path) if not os.path.exists(folder_path): logging.warning("folder " + folder_name + " required for " + concept_name + " not found [color]") else: color_concept_map['color_folder'] = folder_path color_concept_map['color_g'] = g_value color_concept_map['color_w'] = w_value color_concept_map['color_model_file'] = os.path.join(folder_path, 'model', concept_name + ".model") if not os.path.exists(os.path.join(folder_path, 'model', concept_name + ".model")): logging.warning("no model found : " + os.path.exists(os.path.join(folder_path, 'model', concept_name + ".model"))) color_model_folder[concept_name] = color_concept_map logging.info("map for concept : " + concept_name + " -> " + str(concept_map)) jpg_name = image_path if(os.path.splitext(image_path)[1] == '.PNG' or os.path.splitext(image_path)[1] == '.png') : jpg_name = os.path.splitext(image_path)[0] + '.jpg' print "convert" os.system('convert ' + image_path + ' ' + jpg_name ); ####################################################" # Color histogrammes ##################################################### lib = cdll.LoadLibrary(config_libc['libhistogram']) libc = CDLL('libc.so.6') fp = libc.fopen(os.path.join(working_dir, "color_histogram.svm"), "w") hist = pointer(HISTROGRAM()) path = jpg_name lib.read_img(hist, path) lib.print_histogram_libsvm(fp, hist, 0) lib.free_histogram(hist) libc.fflush(fp) # need to fflush otherwise data won't be writed till the prg finished :'( libc.close(fp) if(os.path.splitext(image_path)[1] == '.PNG' or os.path.splitext(image_path)[1] == '.png') : os.system('rm ' + jpg_name); ####################################################" # Predict Color ######################################################" res_files = [] for concept in color_model_folder: concept_map = color_model_folder[concept] g_param = concept_map['color_g'] w_param = concept_map['color_w'] svm_file = os.path.join(working_dir, 'color_histogram.svm') logging.info('color svm file for predict ' + svm_file) logging.info('model for predict ' + concept_map['color_model_file']) concept_out = os.path.join(working_dir, concept + '.color.out') res_files.append(concept_out ) logging.info("best parameters for " + concept + " concept -> " + " g : " + g_param + " w : " + w_param) predict_command = [ svm_predict, '-b', '1', svm_file, concept_map['color_model_file'], concept_out ] logging.info("predict color cmd : " + " ".join(predict_command)) predict_process = subprocess.Popen(predict_command) while predict_process.poll() is None: Time.sleep(0) pass p_rc = predict_process.returncode if p_rc != 0: logging.warning("error during prediction for concept " + concept) logging.warning("command : " + " ".join(predict_command)) exit(1) collect_dict = dict() fusion_collect_dict = dict() for res in res_files: cpt_name = os.path.basename(res.split('.')[0]) if not cpt_name in fusion_collect_dict: fusion_collect_dict[cpt_name] = 0.0 if not os.path.exists(res): logging.warning("no output res for " + res) continue with open(res, "r") as results_stream: content = results_stream.read().splitlines() is_concept = content[1].split(" ")[0] if content[0].split(" ")[1] == "1": res_map = content[1].split(" ")[1] else: res_map = content[1].split(" ")[2] fusion_collect_dict[cpt_name] = float(res_map) * float(fusion_model_map[cpt_name]['color_coef']) # calcul of fusion on the thumb collect_dict[cpt_name] = { "map" : res_map, "is_concept" : is_concept, "concept" : cpt_name } finale_res = os.path.basename(image_path) + ".color.json" finale_res_path = os.path.join(working_dir, finale_res) with open(finale_res_path, "w") as final_res_stream: final_res_stream.write(json.dumps(collect_dict)) while process.poll() is None: Time.sleep(0) pass rc = process.returncode if rc != 0: logging.warning("error while generating sift file") exit(1) if not os.path.exists(sift_file): logging.warning('error while creating sift file') exit(1) ########################### # Generation of mapping ########################### temp_sift_file = os.path.join(working_dir, sift_file + '.tmp') os.system("sed -n '4,$p' " + sift_file + " | tr -d \";\" |sed 's/<CIRCLE [1-9].*> //' > " + temp_sift_file) map_files = [] for concept in concepts: if concept_name not in model_folders: logging.info("missing concept " + concept_name + " into information map") continue with open(one_nn_script, "r") as file_string: concept_map = model_folders[concept] mapping_file = os.path.join(working_dir, "mapping-center" + str(concept_map['sift_centers']) + ".map") logging.info("mapping at " + mapping_file) map_files.append(mapping_file) # on stock les chemin vers les fichiers pour les histogrammes if not os.path.exists(mapping_file): command = [] command += ["R", "--slave", "--no-save", "--no-restore", "--no-environ", "--args"] command += [concept_map['sift_centers_file'], str(concept_map['sift_centers']), temp_sift_file, mapping_file] logging.info("mapping command for concept " + concept + " -> " + " ".join(command)) mapping_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=file_string) while mapping_process.poll() is None: Time.sleep(0) pass map_files = set(map_files) generated_mapping_files = glob.glob(working_dir + '/*.map') for gen_file in set(generated_mapping_files): if gen_file not in map_files: logging.warning("failed to create mapping for file " + gen_file) nb_cluster = re.search('([0-9]*).map$', gen_file).group(1) result_files_svm_path = os.path.join(working_dir, "svm_file" + nb_cluster + ".svm") try: svm_file = open(result_files_svm_path, "w") logging.info("opening : " + result_files_svm_path) nb_cluster_int = int(nb_cluster) histogram = create_histogram(gen_file, nb_cluster_int) svm_file.write("0 ") for i, val in enumerate(histogram): if val != 0.0: svm_file.write(str(i+1) + ":") svm_file.write(str(val) + " ") svm_file.write("\n") svm_file.close() except ValueError: logging.warning("error in map file, cannot convert " + nb_cluster + " to int") res_files = [] for concept in model_folders: concept_map = model_folders[concept] g_param = concept_map['sift_g'] w_param = concept_map['sift_w'] nb_centers = concept_map['sift_centers'] svm_file = os.path.join(working_dir, 'svm_file' + nb_centers + ".svm") logging.info('svm file for predict ' + svm_file) logging.info('model for predict ' + concept_map['sift_model_file']) concept_out = os.path.join(working_dir, concept + '.sift.out') res_files.append(concept_out ) logging.info("best parameters for " + concept + " concept -> " + "centers " + nb_centers + " g : " + g_param + " w : " + w_param) predict_command = [ svm_predict, '-b', '1', svm_file, concept_map['sift_model_file'], concept_out ] logging.info(" ".join(predict_command)) predict_process = subprocess.Popen(predict_command) while predict_process.poll() is None: Time.sleep(0) pass p_rc = predict_process.returncode if p_rc != 0: logging.warning("error during prediction for concept " + concept) logging.warning("command : " + " ".join(predict_command)) exit(1) collect_dict = dict() for res in res_files: cpt_name = os.path.basename(res.split('.')[0]) if not os.path.exists(res): logging.warning("no output res for " + res) continue with open(res, "r") as results_stream: content = results_stream.read().splitlines() is_concept = content[1].split(" ")[0] if content[0].split(" ")[1] == "1": res_map = content[1].split(" ")[1] else: res_map = content[1].split(" ")[2] fusion_collect_dict[cpt_name] += float(res_map) * float(fusion_model_map[cpt_name]['sift_coef']) # calcul of fusion on the thumb collect_dict[cpt_name] = { "map" : res_map, "is_concept" : is_concept, "concept" : cpt_name } finale_res = os.path.basename(image_path) + ".sift.json" finale_res_path = os.path.join(working_dir, finale_res) with open(finale_res_path, "w") as final_res_stream: final_res_stream.write(json.dumps(collect_dict)) fusion_json_dict = dict() for concept in fusion_collect_dict: fusion_json_dict[concept] = dict() fusion_json_dict[concept]['is_concept'] = 1 if fusion_collect_dict[concept] > 0.5 else -1 fusion_json_dict[concept]['concept'] = concept fusion_json_dict[concept]['map'] = fusion_collect_dict[concept] finale_res = os.path.basename(image_path) + ".fusion.json" finale_res_path = os.path.join(working_dir, finale_res) with open(finale_res_path, "w") as final_res_stream: final_res_stream.write(json.dumps(fusion_json_dict))
def main(argv): ############################### # Getting program options ############################### nb_thread = 2 help_str = "formatSift.py" try: opts, args = getopt.getopt(argv, None, ["config=", "url-list=", "results-dir=", "download-dir=", "freq-cut="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == "-h": print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--url-list": url_list = arg elif opt in "--results-dir": results_dir = arg elif opt in "--download-dir": download_dir = arg elif opt in "--freq-cut": cut_every = int(arg) ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, "General") ######################### # Configuration du logger ######################### log_dir = config_general["log_dir"] now = datetime.now() date_str = ( str(now.day) + "_" + str(now.hour) + "_" + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) ) logfile_name = os.path.basename(__file__).split(".")[0] + "-" + date_str + ".log" logging.basicConfig(filename=log_dir + "/" + logfile_name, level=logging.DEBUG) if "cut_every" not in locals(): logging.warning("no defined step") exit(1) ################################################## # Recuperation de la liste des fichiers SIFT ################################################## if "url_list" not in locals(): logging.warning("no sift files provided ") exit(1) logging.info(url_list) if url_list.startswith("http://") or url_list.startswith("https://"): raw_file = urllib2.urlopen(url_list).read() else: raw_file = open(url_list).read() urls = raw_file.splitlines() logging.info("nombre d'url : " + str(len(urls))) ################################################## # Creation du dossier de sauvegarde ################################################## logging.info("download dir = " + download_dir) if not os.path.exists(results_dir): logging.info("no output dir creating at " + results_dir) if not subprocess.call(["mkdir", "-p", download_dir]) == 0: logging.warning("cannot create download dir, aborting") sys.exit(1) logging.info("download dir is " + results_dir) ######################################## # Creation du dossier pour les resultats ######################################## logging.info("results_dir = " + results_dir) result_name = os.path.basename(results_dir) results_dir = os.path.dirname(results_dir) if not os.path.exists(results_dir): logging.info("no output dir creating at " + results_dir) if not subprocess.call(["mkdir", "-p", results_dir]) == 0: logging.warning("cannot create output dir, aborting") sys.exit(1) logging.info("output dir is " + results_dir) logging.info("download sift files") begin_dl = timeit.default_timer() nb_existing = 0 for url in urls: if url.endswith(".sift"): name = url.split("/")[-1] sift_path = os.path.join(download_dir, name) if not os.path.exists(sift_path): os.system("wget -P " + download_dir + " " + url + " >/dev/null 2>&1") else: nb_existing = 0 elapsed = timeit.default_timer() - begin_dl logging.info("dl " + str(len(urls)) + " sift file took " + str(elapsed)) logging.info(str(nb_existing) + " sift already in the folder") res_file = open(os.path.join(results_dir, result_name), "w") for url in urls: if url.endswith(".sift"): name = url.split("/")[-1] with open(os.path.join(download_dir, name)) as sift: content = sift.read().splitlines() step = 4 # on passe les trois premieres lignes while step < len(content): line = content[step].split(";")[-2] line = line.lstrip() line = line.rstrip() res_file.write(line) res_file.write("\n") step += cut_every res_file.close() logging.info("end of program")
def main(argv): """ programme which generate a new trec formated files It take in entry a files which indeicates for each concepts the trec formated files to merge. the sum of int float must be 1. Line example aeroplane;/path/toFile1:float1;/path/toFile2:float2;/path/toFile3:float3 """ ############################### # Getting programme options ############################### help_str = 'no help provided' try: opts, args = getopt.getopt(argv, "ho:c:i:", ["config=", "output-file=", "input-folder="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in ("-o", "--output-folder"): res_folder = arg elif opt in ("-i", "--input-file"): input_file = arg elif opt in ("-c", "--config"): config_file = arg ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) logging.info("running") FileAndPods = namedtuple("FileAndPods", "file name pods") if not os.path.exists(input_file): logging.warning("input file doesn't exist : " + input_file) sys.exit(1) if not os.path.exists(res_folder): logging.info("creating res folder at : " + res_folder) os.system('mkdir -p ' + res_folder) template = Template("$concept Q0 $name 0 $map R") with open(input_file, 'r') as input_stream : lines = input_stream.read().splitlines() for line in lines: # EACH CONCEPT content = line.split(';') concept_name = content[0] results_file = os.path.join(res_folder , concept_name + '.top') files = [] size = -1 nb_trec_file = 0 for i in range(1, len(content)) : # each file pair = content[i].split(':') if not os.path.exists(pair[0]): logging.warning("not able to merge files for concept " + str(concept_name) + " no file " + paire[0] ) continue nb_trec_file += 1 stream = open(pair[0], 'r') tuple = FileAndPods(stream.read().splitlines(),concept_name, pair[1]) stream.close() if (size == -1) : size = len(tuple.file) else : if len(tuple.file) != size: logging.warning("files doesn't have same length cannot merge") files.append(tuple) with open(results_file, 'w') as res_writer: for i in range(0,size): photo_name = None new_val = 0 for elem in files: line = elem.file[i].split(' ') if photo_name is None: photo_name = line[2] else : if photo_name != line[2] : logging.warning("photoname doesn't match") logging.warning(photo_name + " & " + line[2]) sys.exit(1) current_val = float(line[4]) new_val += current_val * float(elem.pods) new_line = template.substitute(concept=elem.name ,name=photo_name, map=new_val) res_writer.write(new_line + "\n") logging.info('end of programme')
def main(argv): """ Programme qui depuis une liste de concept et de photo attribut a une liste d'histogrammes au format svm un concept Les arguments sont -c une url vers une liste de concepts -H la liste des histogrammes dans lordre d'aparition dans le fichiers de concepts. -o base path et name pour la sortie des fichiers "/file/starting_name_" -u url de base pour recuperer les concepts "default : http://mrim.imag.fr/GINF53C4/PROJET/val_photos/ann/ """ ############################### # Getting program options ############################### help_str = 'concept.py -c <concept> -H <histogram> -o <filesbase> -u <urlbase>' try: opts, args = getopt.getopt(argv, "hu:o:H:c:", ["url=", "output=", "config="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in ("-u", "--url"): url_base = arg elif opt in "-c": concept_file = arg elif opt in "-H": histogram_file = arg elif opt in ("-o", "--output"): res_file = arg elif opt in "--config": config_file = arg if not ('histogram_file' in locals()): logging.info('main not correctly called : Histogram file is needed') print help_str sys.exit() if 'config_file' not in locals(): logging.info('main not correctly called : config file is needed') print help_str sys.exit() ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') config_libc = config_section_map(config, 'libC') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() script_name = os.path.basename(__file__) date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) if not ('url_base' in locals()): print "missing url base for .ann files" sys.exit() logging.info("getting recorded concepts from " + url_base) # default results location if not ('res_file' in locals()): print "missing output dir" sys.exit() logging.info("Results will be at " + res_file + "<conceptname>") if not os.path.exists(res_file): logging.info("output dir does not exist... creating " + res_file) os.system("mkdir -p " + res_file) # Recuperation de la liste de concept logging.info('get concept list from ' + concept_file) if concept_file.startswith('http://'): response = urllib2.urlopen(concept_file).read() else: response = open(concept_file).read() ################################## # Chargement de l'Histogramme en memoire ################################## logging.info('opening histogram model file and read it') histogram = open(histogram_file).read() ################################## # Init variables ################################## open_files = {} concept_streams = {} join_seq = " " ################################## # Initializing concepts ################################## begintime = timeit.default_timer() for concept in response.splitlines(): logging.info(str(concept)) concept_record_url = url_base + concept + ".ann" logging.info("getting record from " + concept_record_url) concept_stream = urllib2.urlopen(concept_record_url).read() concept_streams[concept] = concept_stream.splitlines() concept_file = res_file + concept + ".svm" logging.info("opening results file -> " + concept_file) fo = open(concept_file, "wb") open_files[concept] = fo ################################### # Main loop* ################################### current_line = 0 for current_line_histogram in histogram.splitlines(): # loop through the histogram model once histogram_line = join_seq.join((current_line_histogram.split(' ')[1:])) # get the histogram for concept in response.splitlines(): indice = re.split('[ ]*', concept_streams[concept][current_line])[1] if not indice == '0': open_files[concept].write(str(indice) + ' ') open_files[concept].write(histogram_line) open_files[concept].write('\n') else: logging.info('found O at line ' + str(current_line) + ' for ' + concept) current_line += 1 logging.info('nb line tot : ' + str(current_line)) for out_file in open_files: open_files[out_file].close() endtime = timeit.default_timer() logging.info('end after ' + str(endtime - begintime))
def main(argv): ############################### # Getting program options ############################### generate_all_file = False help_str = 'svm-train.py -c <concepts list>' try: opts, args = getopt.getopt(argv, "", ["base-url-rel=", "input-top=", "config=", "results-dir="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--input-top": input_top_files = arg elif opt in "--base-url-rel": rel_base = arg elif opt in "--results-dir": results_dir = arg if generate_all_file: logging.info("all option activated") ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') section_trec_eval = config_section_map(config, 'trecEval') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) ######################### # Get list of top formatted files ######################### concepts = glob.glob(os.path.join(input_top_files, '*.top')) print input_top_files for concept in concepts: logging.info("concept file " + concept) ########################## # Create output dir ########################## logging.info("results_dir = " + results_dir) if not os.path.exists(results_dir): logging.info('no output dir creating at ' + results_dir) if not subprocess.call(['mkdir', '-p', results_dir]) == 0: logging.warning('cannot create output dir, aborting') sys.exit(1) begin_time = timeit.default_timer() for concept_file in concepts: concept_name = os.path.basename(concept_file).split(".")[0] res_output = results_dir + concept_name rel_path = '/tmp/' + concept_name + '.rel' url = rel_base + concept_name + ".rel" # curl instead of wget because wget do not override files os.system("curl " + rel_base + "/" + concept_name + ".rel " + ">/tmp/" + concept_name + ".rel ") if not os.path.exists(rel_path): logging.warning("Download error dor file " + url) sys.exit(1) cmd = [section_trec_eval['trec_eval'], '-q', rel_path, concept_file] with open(res_output, "w") as outfile: ret = subprocess.call(cmd, stdout=outfile) if ret != 0: logging.warning("error for " + concept_file) os.remove("/tmp/" + concept_name + ".rel") end_time = timeit.default_timer() logging.info('end after ' + str(end_time - begin_time) + 's generated ' + str(len(concepts)) + " concept models")
def main(argv): ############################### # Getting program options ############################### help_str = 'svm-train.py -c <concepts list>' try: opts, args = getopt.getopt(argv, "h", ["samples=", "results=", "config=", "nb-clusters=", "nb-iter="]) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == '-h': print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--samples": data = arg elif opt in "--results": results = arg elif opt in "--nb-clusters": nb_clusters = arg elif opt in "--nb-iter": nb_iter = arg ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, 'General') config_scripts = config_section_map(config, 'Scripts') ######################### # Configuration du logger ######################### log_dir = config_general['log_dir'] now = datetime.now() date_str = str(now.day) + '_' + str(now.hour) + '_' + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) logfile_name = os.path.basename(__file__).split('.')[0] + '-' + date_str + '.log' logging.basicConfig(filename=log_dir + '/' + logfile_name, level=logging.DEBUG) if 'data'not in locals() or not os.path.exists(data): logging.warning("no data") exit(1) if 'nb_clusters' not in locals(): logging.warning("need to specify the number of clusters needed for kmeans") exit(1) logging.info("running with " + nb_clusters + " clusters to generate") if 'nb_iter' not in locals(): logging.warning("need to specify the number of max iterations") exit(1) logging.info("running with " + nb_iter + " nb iterations") ######################################## # Creation du fichier et dossier pour les resultats ######################################## logging.info("results_dir = " + results) result_name = os.path.basename(results) results_dir = os.path.dirname(results) if not os.path.exists(results_dir): logging.info('no output dir creating at ' + results_dir) if not subprocess.call(['mkdir', '-p', results_dir]) == 0: logging.warning('cannot create output dir, aborting') sys.exit(1) logging.info('output dir is ' + results_dir) #################### # Creation de la commande #################### kmeans_script = config_scripts['kmeans'] command = "R --slave --no-save --no-restore --no-environ --args " command += data + " " + nb_clusters + " " + results + " " + nb_iter + " < " + kmeans_script logging.info("command : " + command) begin_time = timeit.default_timer() ######################### # Lancement du clustering ######################### os.system(command) end_time = timeit.default_timer() logging.info('end after ' + str(end_time - begin_time))
def main(argv): ############################### # Getting program options ############################### nb_thread = 2 help_str = "svm-train.py -c <concepts list>" try: opts, args = getopt.getopt( argv, "tc:ho:", ["concepts=", "results-dir=", "input-svm=", "config=", "svm-args=", "nb-thread="] ) except getopt.GetoptError as err: print help_str print str(err) sys.exit(2) for opt, arg in opts: if opt == "-h": print help_str sys.exit() elif opt in "--config": config_file = arg elif opt in "--input-svm": input_svm = arg elif opt in "--results-dir": results_dir = arg elif opt in "--svm-args": svm_options = arg elif opt in "--nb-thread": nb_thread = int(arg) ######################### # Chargement de la config ######################### config = ConfigParser.ConfigParser() config.read(config_file) config_general = config_section_map(config, "General") config_svm = config_section_map(config, "libSvm") ######################### # Configuration du logger ######################### log_dir = config_general["log_dir"] ############################ # Initialisation de lib-svm ############################ svm_train = config_svm["svm_train"] now = datetime.now() script_name = os.path.basename(__file__) date_str = ( str(now.day) + "_" + str(now.hour) + "_" + str(now.minute) + "_" + str(now.second) + "_" + str(now.microsecond) ) logfile_name = os.path.basename(__file__).split(".")[0] + "-" + date_str + ".log" logging.basicConfig(filename=log_dir + "/" + logfile_name, level=logging.DEBUG) logging.info("Running with " + str(nb_thread) + " threads ") ######################### # Init svm train_photos command ######################### train_cmd = [svm_train] if "svm_options" in locals(): logging.info("svm options " + svm_options) train_cmd = train_cmd + svm_options.split(" ") # bug with space in subprocess so we have to split it here ######################### # Get list of svm files ######################### concepts = glob.glob(input_svm + "*.svm") for concept in concepts: logging.info("concept file " + concept) ########################## # Create output dir ########################## logging.info("results_dir = " + results_dir) if not os.path.exists(results_dir): logging.info("no output dir creating at " + results_dir) if not subprocess.call(["mkdir", "-p", results_dir]) == 0: logging.warning("cannot create output dir, aborting") sys.exit(1) logging.info("output dir is " + results_dir) logging.info("initialisation des concepts") begin_time = timeit.default_timer() cmds = [] for concept_file in concepts: command = [] concept_name = os.path.basename(concept_file).split(".")[0] model_output = results_dir + concept_name + ".model" logging.info("model for " + concept_name + " registered at " + model_output) command = [concept_file, model_output] command = train_cmd + command logging.info("svm call : " + " ".join(command)) cmds.append(command) logging.info("---------------PROCESS--------------------") process = [] while len(cmds) != 0 or len(process) != 0: if len(process) < nb_thread and len(cmds) != 0: cmd = cmds.pop() process.append([cmd, subprocess.Popen(cmd)]) logging.info("running : " + " ".join(cmd)) for idx, p in enumerate(process): if p[1].poll() is not None: logging.info(" end : " + " ".join(p[0])) process.pop(idx) end_time = timeit.default_timer() logging.info("end after " + str(end_time - begin_time) + "s generated " + str(len(concepts)) + " concept models")