def main(): args = setup_argparser().parse_args() filename = args.file max_depth = args.max_depth min_size = args.min_size validate = args.validate x, labels = import_file(filename) dt = DecisionTree(max_depth=max_depth, min_size=min_size) tree = dt.fit(x) TreeNode.show_tree(tree) predicted_labels = dt.predict(x) p, r, f1 = get_metrics(labels, predicted_labels, class_label=1) acc = accuracy(labels, predicted_labels) print("Naive results") print("Accuracy: {}, Precision: {}, Recall: {}, F-1: {}".format( acc, p, r, f1)) if validate: ten_cv = CrossValidation(k=10) dt = DecisionTree(max_depth=max_depth, min_size=min_size) train_scores, val_scores, *_ = ten_cv.cross_validate(dt, x, labels) print("10-fold cross validation") print("Training scores: {0}\nValidation scores: {1}".format( train_scores, val_scores)) return
def export_excel_to_python(self, infile, outfile, converter_name, sheet_index=0): converter = self.find_converter(converter_name) input_path = os.path.join(xlsconfig.INPUT_PATH, infile) output_path = os.path.join(xlsconfig.TEMP_PATH, outfile + ".py") converter_file = os.path.splitext(converter.__file__)[0] + ".py" if xlsconfig.FAST_MODE and util.if_file_newer( output_path, input_path) and util.if_file_newer( output_path, converter_file): data_module = util.import_file(outfile) else: print infile, "-", converter_name parser = self.parser_class(input_path, converter, sheet_index) try: parser.run() except: traceback.print_exc() return False data_module = self.create_data_module(infile, outfile, converter_name, parser) if data_module is None: return False self.store_data_module(data_module) return True
def main(): args = setup_argparser().parse_args() filepath = args.file num_clusters = args.num_clusters data, truth_clusters = import_file(filepath, correct_clusters=True) points = [Point(x) for x in data] aggclustering = AgglomerativeClustering(num_clusters=num_clusters) labels = aggclustering.fit(points) logging.info("Labels: {}".format(labels)) logging.info("Rand score: {}".format(rand_score(truth_clusters, labels))) logging.info("Jaccard coefficient: {}".format( jaccard_coeff(truth_clusters, labels))) # We apply PCA dim reduction to both data, and centroids to be able to plot them plot(reduce_dimensionality(data), truth_clusters, None, suffix="hierarchical_truth") plot(reduce_dimensionality(data), labels, None, suffix="hierarchical_computed") return
def main(): args = setup_argparser().parse_args() filepath = args.file min_pts = args.min_points eps = args.eps logging.info(args) data, truth_clusters = import_file(filepath, correct_clusters=False) db = DBSCAN(eps=eps, min_points=min_pts) db.dbscan(data) logging.info("Rand Index: {}".format(rand_score(truth_clusters, db.labels))) logging.info("Jaccard Coefficient: {}".format( jaccard_coeff(truth_clusters, db.labels))) # There's barely any difference b/w what we classify and what Sklearns does - this looks correct # We apply PCA dim reduction to both data, and centroids to be able to plot them plot(reduce_dimensionality(data), truth_clusters, None, suffix="dbscan_truth") plot(reduce_dimensionality(data), db.labels, None, suffix="dbscan_computed") return
def create_header_for_excels(excel_files): print "=== create header for excels ..." if xlsconfig.USE_OPENPYXL: from gen_header_with_openpyxl import create_header else: from gen_header_with_xlrd import create_header sys.path.insert(0, xlsconfig.CONVERTER_PATH) for value in xlsconfig.CONVENTION_TABLE: pattern = value[0] converter = import_file(xlsconfig.CONVERTER_ALIAS + "." + value[1]) if not getattr(converter, "AUTO_GEN_HEADER", True): continue new_name = value[2] if len(value) > 2 else None sheet_index = value[3] if len(value) > 3 else 0 compiled_pattern = re.compile(pattern) for infile in excel_files: if not compiled_pattern.match(infile): continue input_file = os.path.join(xlsconfig.INPUT_PATH, infile) if create_header(input_file, converter, sheet_index): pass elif not xlsconfig.FORCE_RUN: return sys.path.remove(xlsconfig.CONVERTER_PATH) return
def main(): args = setup_argparser().parse_args() filename = args.file num_trees = args.num_trees sampling_ratio = args.sampling_ratio max_depth = args.max_depth min_size = args.min_size features_ratio = args.features_ratio x, labels = import_file(filename) rf = RandomForest(num_trees=num_trees, sampling_ratio=sampling_ratio, max_depth=max_depth, min_size=min_size, features_ratio=features_ratio) rf.fit(x) predictions = rf.predict(x) p, r, f1 = get_metrics(labels, predictions, class_label=1) acc = accuracy(labels, predictions) print("Naive results") print("Accuracy: {}, Precision: {}, Recall: {}, F-1: {}".format( acc, p, r, f1)) ten_cv = CrossValidation(k=10) rf = RandomForest(num_trees=num_trees, sampling_ratio=sampling_ratio, max_depth=max_depth, min_size=min_size, features_ratio=features_ratio) train_scores, val_scores, *_ = ten_cv.cross_validate(rf, x, labels) print("10-fold cross validation") print("Training scores: {0}\nValidation scores: {1}".format( train_scores, val_scores)) return
def find_converter(self, name): converter = self.converter_modules.get(name) if converter is None: full_name = xlsconfig.CONVERTER_ALIAS + "." + name converter = util.import_file(full_name) converter._name = name self.converter_modules[name] = converter return converter
def generate(task_id, token, team): read_task_generation_config( task_id) # Make sure that the config is available logger.info('Generating task {} with token {}', task_id, token) task_dir = os.path.join(configuration['tasks_path'], str(task_id)) os.makedirs(task_dir, exist_ok=True) mod = util.import_file(os.path.join(task_dir, 'generate.py')) raw_task = tasks.read_task(task_id) gen_task = mod.generate(task=raw_task, token=token, team=team) write_generated_task(gen_task, token) with open(os.path.join(task_dir, 'generated', token, 'gen_ts'), 'w') as f: f.write(str(time.time()))
def generate_code(): print "=== 生成代码类文件 ..." configure_file_path = os.path.join(xlsconfig.TEMP_PATH, "configures.py") if not os.path.exists(configure_file_path): return log_error("配置文件'%s'不存在", configure_file_path) sys.path.insert(0, xlsconfig.TEMP_PATH) configure_module = util.import_file("configures") sys.path.remove(xlsconfig.TEMP_PATH) for key, cfg in configure_module.configures.iteritems(): _generate(cfg["types"], key)
def load_converter(self, name): converter = None full_path = os.path.join(xlsconfig.CONVERTER_PATH, xlsconfig.CONVERTER_ALIAS, name.replace('.', '//') + ".py") if not os.path.isfile(full_path): return None full_name = xlsconfig.CONVERTER_ALIAS + "." + name converter = util.import_file(full_name) # 此名称有可能是文件夹,要加上校验 if not hasattr(converter, "CONFIG"): return None converter._name = name return converter
def init(self): for module in os.listdir(self.config['modules_path']): if module[0] != '_': temp = util.import_file(self.config['modules_path'], module) if util.check_module(temp): temp.load(self.listeners) print("Listeners loaded: ", self.listeners) """ Setup the bots """ for bot_settings in self.config['bots'].values(): bot = twx.botapi.TelegramBot(bot_settings['api_key']) bot.update_bot_info().wait() print("Loaded bot: ", bot.username) self.bots.append(bot)
def main(): args = setup_argparser().parse_args() filepath = args.file num_clusters = args.num_clusters random_start = args.random_start max_iterations = args.num_iterations tolerance = args.tolerance initial_centroids = args.centroids if initial_centroids and len(initial_centroids) != num_clusters: raise ValueError( "Number of centroids provided does not match number of clusters") logging.info(args) data, truth_clusters = import_file(filepath, correct_clusters=True) # num_clusters = len(set(truth_clusters)) kmeans = KMeans(num_clusters=num_clusters, tolerance=tolerance, max_iterations=max_iterations) centroids = kmeans.fit(data, initial_centroids=initial_centroids, random_start=random_start) score = kmeans.score(truth_clusters) logging.info("Centroids: {}".format(centroids)) logging.info("Accuracy Score: {}".format(score)) logging.info("Rand Index: {}".format( rand_score(truth_clusters, kmeans.labels))) logging.info("Jaccard Coefficient: {}".format( jaccard_coeff(truth_clusters, kmeans.labels))) # We apply PCA dim reduction to both data, and centroids to be able to plot them plot(reduce_dimensionality(data), truth_clusters, None, suffix="kmeans_truth") plot(reduce_dimensionality(data), kmeans.labels, reduce_dimensionality(centroids), suffix="kmeans_computed") return
def get_options(): """ Parses all required arguments either from command-line input or terminal prompts """ parser = ArgumentParser(prog="CancerCraft") auth_type = parser.add_subparsers(dest='auth_type', help='choose either Mojang or MCLeaks as authentication type') auth_type_Mojang = auth_type.add_parser('Mojang', help='Mojang authentication') auth_type_Mojang.add_argument('-u', '--user', dest='username', type=str, help='Mojang username', default=None) auth_type_Mojang.add_argument('-p', '--pass', dest='password', type=str, help='Mojang password', default=None) auth_type_MCLeaks = auth_type.add_parser('MCLeaks', help='MCLeaks authentication') auth_type_MCLeaks.add_argument('-n', '--name', dest='name', type=str, help='name of the cached MCLeaks account (leave blank to use a new one)', default=None) parser.add_argument('-s', '--server', dest='server', type=str, help='server host or host:port (enclose IPv6 addresses in square brackets)', default=None) parser.add_argument('-b', '--bot', dest='bot_path', type=str, help='relative/absoulte path of the bot file (relative path from: ./bots)', default=None) parser.add_argument('-i', '--ignore', dest='ignore', action='store_true', help='will assume that non-required variables such as \'name\' were left blank on purpose and thus ignore them', default=False) options = parser.parse_args() if not options.auth_type: options.auth_type = input('Enter the desired authentication type (Mojang | MCLeaks): ') # TODO maybe don't be case-sensitive if options.auth_type == 'Mojang': if not hasattr(options, 'username') or not options.username: # TODO maybe use 'easier to ask for forgiveness than permission' (EAFP) over 'look before you leap' (LBYL) options.username = input('Enter your username: '******'password') or not options.password: options.password = getpass('Enter your password: '******'leave blank for offline mode' (combined with -b) elif options.auth_type == 'MCLeaks': if not hasattr(options, 'name') or not options.name: if options.ignore: options.name = None else: options.name = input('Enter the name of a cached MCLeaks account (leave blank to use a new one): ') else: raise ValueError(f'Invalid authentication type: \'{options.auth_type:s}\'') if not options.server: options.server = input('Enter server host or host:port (enclose IPv6 addresses in square brackets): ') match = re.match(r'((?P<host>[^\[\]:]+)|\[(?P<addr>[^\[\]]+)\])' r'(:(?P<port>\d+))?$', options.server) if match is None: raise ValueError('Invalid server address: \'{options.server:s}\'') options.address = match.group('host') or match.group('addr') options.port = int(match.group('port') or 25565) if not options.ignore: if not options.bot_path: options.bot_path = input('Enter the relative/absoulte path of the bot file (relative path from: ./bots): ') if options.bot_path: directory = path.dirname(path.realpath(__file__)) if not options.bot_path.endswith('.py'): options.bot_path += '.py' if path.isfile(f'{directory:s}/bots/{options.bot_path:s}'): options.bot_path = f'{directory:s}/bots/{options.bot_path:s}' elif not path.isfile(options.bot_path): raise ValueError(f'Invalid bot file path: \'{options.bot_path:s}\'') options.Bot = util.import_file('Bot', options.bot_path).Bot # TODO implement if options.auth_type == 'MCLeaks': raise NotImplementedError ############ return options