def main() -> None: """ Entrypoint into the command-line interface. """ parser = argparse.ArgumentParser( description="Lists files in a directory." ) parser.add_argument( "path", type=str, help="The path to show files in." ) # Parse and process commands args = parser.parse_args() if args.path: path = args.path files = list_files(path) for f in files: print( "d" if f.isdir else "f", f" {f.human_readable_bytes:<12}", f.path )
def read_games(tournament_dir): game_files = list_files(tournament_dir, '.json') games = [] for game_file in game_files: with open(game_file, 'r') as fin: games.append(json.load(fin)) return games
def read_data(data_path, max_games=None): game_files = list(sorted(list_files(data_path, '.json'))) augmentations = list(Augmentation.iter_augmentations()) if max_games is not None: game_files = list(game_files)[-max_games:] print('Using game files from %s to %s' % (game_files[0], game_files[-1])) x = [] y_policy = [] y_reward = [] for game_path in tqdm(game_files): with open(game_path, 'r') as fin: game_data = json.load(fin) winner, starter, actions, policies = AlphaConnectSerializer.deserialize(game_data) state = State.empty() states = [state] for action in actions: state = state.take_action(action) states.append(state) states, final_state = states[:-1], states[-1] n_samples = min(len(states), len(augmentations)) game_samples = sample(list(range(len(states))), n_samples) for augmentation, i in zip(augmentations, game_samples): augmentend_action_order = sorted(Action.iter_actions(), key=lambda a: a.augment(augmentation).to_int()) x.append(states[i].to_numpy(augmentation)) y_policy.append([policies[i].get(action, 0.0) for action in augmentend_action_order]) y_reward.append(winner_value(final_state.winner, states[i])) return np.array(x), np.array(y_policy), np.array(y_reward)
def pageRank(self): sourceDirectory =settings.PAGERANK_RESOURCE_DIRECTORY destDirectory = PAGERANK_DESTINATION_DIRECTORY docs = [] id2index = {} # print('start read files') # read files for file in map(lambda x: os.path.join(sourceDirectory,x),list_files(sourceDirectory, '*.json')): with open(file, 'r') as readFile: doc = json.load(readFile) id2index[doc['id']] = len(docs) self.progress_bar.next() docs.append(doc) # print('start calc page rank') # create links matrix n = len(docs) p = [] for doc in docs: pp = [0] * n for linkID in filter(lambda x: x in id2index.keys() , (set(doc['cited_in']) |set(doc['refrences'])) ): pp[id2index[linkID]] = 1 p.append(pp) # calculate page rank pr = self.pageRankMathCalculation(p,PAGERANK_ALFA,PAGERANK_ERROR) # print('start save files') # save docs os.makedirs(destDirectory, exist_ok=True) for doc,pagerank in zip(docs,pr): doc['pageRank'] = pagerank file_name = '{}.json'.format(doc['id']) with open(os.path.join(destDirectory , file_name), 'w') as outfile: json.dump(doc, outfile)
def load_subspace_pairs(subspace_pairs): """Load the pairs used for gender subspace definition. Returns: Dict[str, List[Tuple[str, str]]]: Dictionary of gender word pairs. The value is a list of (male, female) words; the key is the name of that list. """ filepairs = [] for direction_file in list_files(DIRECTIONS_PATH): with open(direction_file) as fd: filepairs.extend( (os.path.basename(direction_file), tuple(line.strip().split())) for line in fd) if subspace_pairs == 'pair': keyfunc = (lambda filepair: '-'.join(filepair[1])) elif subspace_pairs == 'group': keyfunc = (lambda filepair: filepair[0]) elif subspace_pairs == 'all': keyfunc = (lambda filepair: 'all') else: raise ValueError( 'unrecognized subspace pair parameter: {}'.format(subspace_pairs)) return { key: [filepair[1] for filepair in group] for key, group in groupby(filepairs, key=keyfunc) }
def __init__(self, path, x_sufix, page_size): self.path = path self.x_sufix = x_sufix self.v_sufix = None self.array_x_files = util.list_files(os.path.join(path, x_sufix), ext='png') self.pos = 0 self.page_size = page_size
def build_all_fasttext_models(model_type='skipgram'): if model_type not in ['skipgram', 'cbow']: raise ValueError('model_type must be "skipgram" or "cbow" but got "' + str(model_type) + '"') for corpus_file in list_files(CORPORA_PATH): if not corpus_file.endswith('-swapped'): create_pronoun_swapped_corpus(corpus_file, 'swap-pairs/pronouns') for corpus_file in list_files(CORPORA_PATH): model_stub = os.path.join( MODELS_PATH, os.path.basename(corpus_file) + '.' + model_type, ) if not os.path.exists(model_stub + '.bin'): subprocess.run(args=[ 'fasttext', 'skipgram', '-input', corpus_file, '-output', model_stub ])
def main(): if len(sys.argv) < 3 or len(sys.argv) > 4: print "usage: indexer </path/to/files/for/indexing/> <indexer> [cutoff]" exit() use_cutoff_freq = False if len(sys.argv) == 4 and sys.argv[3].lower() == "cutoff": use_cutoff_freq = True idxr = getattr(sys.modules[__name__], sys.argv[2])() for inp_file in list_files(sys.argv[1]): idxr.index_file(inp_file, use_cutoff_freq)
def bulk_add_documents_in_directory(self, folder, index_name, document_type, pattern='*.json'): """ add documents in directory that their name matches given pattern :param folder: directory address :param index_name: name of index (e.g. articles) :param document_type: type of documents (e.g. paper) :param pattern: file name pattern (wildcard). default: *.json :return: """ return self.bulk_add_documents_file_list( map(lambda name: self.base_folder+name, list_files(folder, pattern=pattern)), index_name, document_type)
def clusterDocs(self): api = TermVectorAPI(ELASTIC_URL) #print('start read files') for file in map(lambda x: os.path.join(CLUSTER_SOURCE_DIRECTORY,x),list_files(CLUSTER_SOURCE_DIRECTORY, '*.json')): with open(file, 'r') as readFile: doc = json.load(readFile) self.docsJson[doc['id']]= doc self.docVector[doc['id']] = Vector(api.get_term_vector(INDEX_NAME, DOCUMENT_TYPE, doc['id'])) #print('read all files successfully') #print('start init centroid') self.initCentroid(CLUSTER_NUM) #print('end init centroid') while True: self.oldDocCluster = self.docCluster.copy() self.docCluster = {} for docID in self.docsJson.keys(): self.docCluster[docID] = self.nearestCentroid(docID) self.updateCentroid() self.progress_bar.next() #print('one step clustring') if (self.terminateCondition()): self.progress_bar.finish() break #print('converge clustring') print('K = ',CLUSTER_NUM,' J = ',self.J()) candids = self.findCandidateText(CLUSTER_CANDIDATE_TEXT_LEN) #print('calc candid') c = [[] for x in range(len(self.centroidList))] for d in self.docCluster.keys(): c[self.docCluster[d]].append(d) #print('start save result') os.makedirs(CLUSTER_DESTINATION_DIRECTORY, exist_ok=True) os.makedirs(CLUSTER_CANDIDATE_TEXT_DIRECTORY, exist_ok=True) for i in range(len(self.centroidList)): res = {} res['id'] = i res['name'] = candids[i] res['pages'] = c[i] fileName = i.__str__()+'.json' print('Cluster {}: {}\tnumber of docs: {}', i, ' '.join(candids[i]), len(c[i])) #print(res) with open(os.path.join(CLUSTER_CANDIDATE_TEXT_DIRECTORY, fileName), 'w') as outfile: json.dump(res, outfile) for id in self.docsJson.keys(): self.docsJson[id]['cluster'] = self.docCluster[id] file_name = '{}.json'.format(id) with open(os.path.join(CLUSTER_DESTINATION_DIRECTORY , file_name), 'w') as outfile: json.dump(self.docsJson[id], outfile)
def __init__(self, dir_path): self._cache = {} _clases = {} # Open and load text file including the whole training data for subfolder in list_dir(dir_path): class_dir = os.path.join(dir_path, subfolder) class_idx = len(_clases) _clases[class_dir] = [] for filename in list_files(class_dir, '.JPEG'): _clases[class_dir].append({ 'path': os.path.join(class_dir, filename), 'class_idx': class_idx, 'name': os.path.splitext(filename)[0], 'rotation': 0 }) self.class_list = list(_clases.items())
def __init__(self, dir_path): self._cache = {} _clases = {} for subfolder in list_dir(dir_path): for character in list_dir(os.path.join(dir_path, subfolder)): class_dir = os.path.join(dir_path, subfolder, character) class_idx = len(_clases) _clases[class_dir] = [] for filename in list_files(class_dir, '.png'): _clases[class_dir].append({ 'path': os.path.join(class_dir, filename), 'class_idx': class_idx, 'name': character + '_' + os.path.splitext(filename)[0], 'rotation': 0 }) self.class_list = list(_clases.items())
def process_request(directory, request): if request == "get /": return util.list_files(directory) elif request.startswith("get /"): filename = request.replace("get /", "") return util.read_file(directory, filename) elif request.startswith("post /"): # obtain all request terms terms = request.split() # obtain filename filename = terms[1].replace("/", "") # obtain content content = "" content_terms = terms content_terms.pop(0) content_terms.pop(0) for t in content_terms: content = content + " " + t return util.overwrite_file(directory, filename, content) else: return "sorry, '%s' command does not exist" % request
def list_players(model_dir): players = [ (RandomPlayer, {}), (GreedyPlayer, {}), (MiniMaxPlayer, { 'depth': 1 }), (MiniMaxPlayer, { 'depth': 2 }), (MiniMaxPlayer, { 'depth': 3 }), (MonteCarloPlayer, { 'budget': 400 }), (MonteCarloPlayer, { 'budget': 800 }), (MonteCarloPlayer, { 'budget': 1600 }), (MonteCarloPlayer, { 'budget': 3200 }), (MonteCarloPlayer, { 'budget': 6400 }), ] model_files = list(sorted(list_files(model_dir, '.h5'))) for model_file in model_files: if model_file.endswith('0.h5'): players += [ (AlphaConnectPlayer, { 'model_path': model_file, 'search_budget': 1600 }), ] return players
def train(model): _train_epochs = train_conf['train_epochs'] _train_data = train_conf['train_data'] _batch_size = train_conf['batch_size'] for n in range(_train_epochs): tf.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) + '=' * 30 + '\n') train_data_list = list_files(_train_data) # dir to file list for f in train_data_list: t0 = time.time() tf.logging.info('<EPOCH {}>: Start training {}'.format(n + 1, f)) model.train( input_fn=lambda: input_fn(f, ModeKeys.TRAIN, _batch_size), hooks=None, steps=None, max_steps=None, saving_listeners=None) tf.logging.info( '<EPOCH {}>: Finish training {}, take {} mins'.format( n + 1, f, elapse_time(t0))) print('-' * 80)
def author_cluster_admin(): timer = Timer() timer.start() authors = list() for file in list_files(AUTHOR_CLUSTER_SOURCE_DIRECTORY, '*.json'): with open(os.path.join(AUTHOR_CLUSTER_SOURCE_DIRECTORY, file), 'r') as fp: author_data = json.load(fp) authors.append(Author(author_data)) from clustering.authors_cluster import Dendogram clusters = Dendogram(authors) clusters.cluster() min_similarity = 0.375 cluster_list = list(map( lambda cluster: list(map(lambda x: x.name, cluster)), map( lambda x: list(x.authors), clusters.get_clusters(min_similarity) ) )) cluster_dict = dict() for cluster in cluster_list: for author in cluster: cluster_dict[author] = cluster with open(AUTHOR_CLUSTER_FILE, 'w') as fp: json.dump(cluster_dict, fp) timer.end() return render_template('indexing_result.html', duration=timer.get_time_taken_pretty(), elastic_response=json.dumps(cluster_list, indent=True), success=True, numdocs=len(cluster_list) )
def gen_train_input(self, inputs, decode_fn): #--------------------- train logging.info('train_input: %s'%FLAGS.train_input) print 'train_input: %s'%FLAGS.train_input trainset = util.list_files(FLAGS.train_input) logging.info('trainset:{} {}'.format(len(trainset), trainset[:2])) print 'trainset:{} {}'.format(len(trainset), trainset[:2]) query, query_str, text, text_str = inputs( trainset, decode_fn=decode_fn, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, #seed=seed, num_threads=FLAGS.num_threads, batch_join=FLAGS.batch_join, shuffle_files=FLAGS.shuffle_files, fix_sequence=FLAGS.fix_sequence, num_prefetch_batches=FLAGS.num_prefetch_batches, min_after_dequeue=FLAGS.min_after_dequeue, name=self.input_train_name) return (query, query_str, text, text_str), trainset
def extract_meta(dirname, subdir, args): source = args.get('source') datasets = args.get('datasets') # Make sure dirname don't end in '/', otherwise our poor basename is not going to work well dirname = strip_dirname(dirname) # Extract metadata from csv and create a record # First check if okay by looking into processed.txt processedFile = dirname + '/processed.txt' processed = None if util.is_non_zero_file(processedFile): processed = util.read_properties(processedFile, log) if not processed: if not args.get('includeAll'): return False # NOT ACCEPTABLE else: if not args.get('includeAll'): return False # NOT ACCEPTABLE # Process log times = None processLog = dirname + '/process.log' if os.path.isfile(processLog): times = timings.computeTimings(processLog) # Take dirname and extract the final string as the id id = os.path.basename(dirname) if subdir is None: subdir = id meta1 = { 'fullId': source + '.' + id, 'id': id, 'source': source, 'datasets': datasets, 'path': subdir } if times is not None: total = timings.getTotal(times) meta1['totalProcessingSecs'] = total.get('secs') meta1['totalProcessingTime'] = total.get('time') # Extract create time from id matching: 2016-07-01_04-29-28 date_match = DATE_RE.search(id) if date_match: createdAt = date_match.group(0) # Reformat to be in ISO8601 format meta1['createdAt'] = createdAt[0:10] + 'T' + createdAt[ 11:13] + ':' + createdAt[14:16] + ':' + createdAt[17:19] # Look for dirname/id.txt and extract fields into our meta # if there is txt file, read it and append to metadata record metafile = dirname + '/' + id + '.txt' if util.is_non_zero_file(metafile): # Read in txt as dictionary meta = util.read_properties(metafile, log) else: meta = {} # go through files and find last time as updatedAt time for scan # Take our basic info and merge it into meta (overwriting what maybe there) if processed: meta.update(processed) meta.update(meta1) # Check what files we have meta['files'] = util.list_files(dirname) lastModified = util.lastModified(meta['files']) if lastModified: meta['updatedAt'] = util.millisToIso( lastModified.get('modifiedAtMillis')) # Check what stage we are in # NOTE: This requires meta to be filled in with id and files!!! if args.get('stages'): check_stages(args.get('stages'), meta, times) # Check if we have a ply file and how big it is filebase = id + '_vh_clean_2' if args.get('checkCleaned') else id plyfile = dirname + '/' + filebase + '.ply' plyfileSize = util.filesize(plyfile) meta['hasCleaned'] = args.get('checkCleaned') and plyfileSize > 0 if plyfileSize > 0: meta['fileType'] = 'ply' meta['fileSize'] = plyfileSize # Check if we have a png file pngfile = dirname + '/' + filebase + '.png' pngfileSize = util.filesize(pngfile) meta['hasScreenshot'] = pngfileSize > 0 # Check if we have a thumbnail file pngfile = dirname + '/' + filebase + '_thumb.png' pngfileSize = util.filesize(pngfile) meta['hasThumbnail'] = pngfileSize > 0 if source == 'nyuv2' and not meta.get('sceneType'): idParts = meta.get('id').split('_') meta['sceneType'] = '_'.join(idParts[0:len(idParts - 1)]) if meta.get('sceneLabel'): # Derive sceneName from sceneLabel sceneLabel = meta.get('sceneLabel') match = SCENELABEL_RE.match(sceneLabel) meta['sceneName'] = match.group(1) if match else sceneLabel return meta
def test_list_files_in_order(directory_hierarchy): files = list(list_files(directory_hierarchy)) assert list(sorted(files)) == files
def main() -> None: path = "." files = list_files(path) for f in files: print("d" if f.isdir else "f", f" {f.human_readable_bytes:<12}", f.path)
def test_Given_validPath_When_listFiles_Then_listAllFiles(input, output): assert set(list_files(input)) == set(output)
def new_model_path(model_dir): model_files = list(list_files(model_dir, '.h5')) model_iteration = len(model_files) model_path = os.path.abspath( os.path.join(model_dir, '%6.6d.h5' % model_iteration)) return model_iteration, model_path
def train_and_eval(model): _train_epochs = train_conf['train_epochs'] _train_data = train_conf['train_data'] _eval_data = train_conf['eval_data'] _test_data = train_conf['test_data'] _batch_size = train_conf['batch_size'] _epochs_per_eval = train_conf['epochs_per_eval'] for n in range(_train_epochs): tf.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) + '=' * 30 + '\n') train_data_list = list_files(_train_data) # dir to file list for f in train_data_list: t0 = time.time() tf.logging.info('<EPOCH {}>: Start training {}'.format(n + 1, f)) model.train( input_fn=lambda: input_fn(f, ModeKeys.TRAIN, _batch_size), hooks=None, steps=None, max_steps=None, saving_listeners=None) tf.logging.info( '<EPOCH {}>: Finish training {}, take {} mins'.format( n + 1, f, elapse_time(t0))) print('-' * 80) tf.logging.info('<EPOCH {}>: Start evaluating {}'.format( n + 1, _eval_data)) t0 = time.time() results = model.evaluate( input_fn=lambda: input_fn(f, ModeKeys.EVAL, _batch_size), steps=None, # Number of steps for which to evaluate model. hooks=None, checkpoint_path=None, # latest checkpoint in model_dir is used. name=None) tf.logging.info( '<EPOCH {}>: Finish evaluation {}, take {} mins'.format( n + 1, _eval_data, elapse_time(t0))) print('-' * 80) # Display evaluation metrics for key in sorted(results): print('{}: {}'.format(key, results[key])) # every epochs_per_eval test the model (use larger test dataset) if (n + 1) % _epochs_per_eval == 0: tf.logging.info('<EPOCH {}>: Start testing {}'.format( n + 1, _test_data)) results = model.evaluate( input_fn=lambda: input_fn(f, ModeKeys.EVAL, _batch_size), steps=None, # Number of steps for which to evaluate model. hooks=None, checkpoint_path= None, # If None, the latest checkpoint in model_dir is used. name=None) tf.logging.info( '<EPOCH {}>: Finish testing {}, take {} mins'.format( n + 1, _test_data, elapse_time(t0))) print('-' * 80) # Display evaluation metrics for key in sorted(results): print('{}: {}'.format(key, results[key]))
def is_first_model(model_dir): model_files = list(list_files(model_dir, '.h5')) return len(model_files) == 0
# Load the OpenCV’s Caffe-based deep learning face detector model print("[EXEC] Loading face detector model....") detector = cv2.dnn.readNetFromCaffe( "face_detection_model/deploy.prototxt", "face_detection_model/res10_300x300_ssd_iter_140000.caffemodel") # Load the embbeder model to extract a 128-D facial embedding vector # It contains the OpenCV deep learning Torch embedding model. print("[EXEC] Loading face recognizer model....") embedder = cv2.dnn.readNetFromTorch("openface_nn4.small2.v1.t7") print("[EXEC] Reading Image Paths.....") # Discrete each image path into a list imagePaths = list(list_files(rootPath="my_dataset")) print(imagePaths) knownEmbeddings = [] knownNames = [] total = 0 # Iterate over every single image for (i, imagePath) in enumerate(imagePaths): print("Processing image {} of {}".format(i + 1, len(imagePaths))) # Extract name of the image name = imagePath.split(os.path.sep)[-2] image = cv2.imread(imagePath) image = cv2.resize(image, dsize=(750, 600)) # Height and Width
def main(): md_files = list_files("www", is_markdown_file, recur=True) for md_file in md_files: html_file = md_file[:-2] + "html" md_to_html(md_file, html_file)
def test_list_files(): ret = '\n' ret += './=' + str(util.list_files('./dir2')) + '\n' ret += './=' + str(util.list_files('./dir2', pattern=r'\d.+')) + '\n' ret += './=' + str(util.list_files('./dir2', pattern=r'lang_.*')) + '\n' return ret
def _get_files(self, folder): if os.path.isdir(folder): return sorted(list_files(folder, valid_exts=self.IMG_EXTS)) else: raise (RuntimeError('No folder named "{}" found.'.format(folder)))
def inputs(files, decode_fn, batch_size=64, num_epochs = None, num_threads=12, shuffle_files=True, batch_join=True, shuffle_batch=True, min_after_dequeue=None, seed=None, enqueue_many=False, fix_random=False, no_random=False, fix_sequence=False, allow_smaller_final_batch=False, num_prefetch_batches=None, dynamic_pad=False, bucket_boundaries=None, length_index=None, length_fn=None, name='input'): """Reads input data num_epochs times. for sparse input here will do: 1. read serialized_example 2. shuffle serialized_examples 3. decdoe batch_serialized_examples notice read_sparse.inputs and also be used for dense inputs,but if you only need to decode part from serialized_example, then read.inputs will be better, less to put to suffle #--------decode example, can refer to libsvm-decode.py # def decode(batch_serialized_examples): # features = tf.parse_example( # batch_serialized_examples, # features={ # 'label' : tf.FixedLenFeature([], tf.int64), # 'index' : tf.VarLenFeature(tf.int64), # 'value' : tf.VarLenFeature(tf.float32), # }) # label = features['label'] # index = features['index'] # value = features['value'] # return label, index, value #string_input_reducer will shuffle files #shuffle will read file by file and shuffle withn file(in shuffle queue) #shuffle_batch_join will read multiple files and shuffle in shuffle queue(from many files) To get fixed sequence shuffle=False so by this way the sequence is as your data input unchange or shuffle=True seed=1024 #set batch_join=False by this way you have fixed random, so get same result NOTICE, shuffle=True,seed=1024,batch_join=True will not get same result shuffle=False,seed=1024,batch_join=True also, so batch_join seems seed only control inqueue random, can not get fixed result for no random -> fixed result set shuffle=False wihch will force batch_join=False then use batch for fixed random -> shuffle=True, seed set or fix_random=True read-records.py show above ok, but train-evaluate.py show not, only shuffle=False can get fixed result.. @FIXME strange for train-evaluate.py it looks you can set shuffle in string_input_producer True, but then must use batch, batch_join and shuffle_batch join all not fixed even with seed set, may be due to trainset two inputs read ? for read-records.py batch_join will be fixed, shuffle_batch_join not defualt parmas will give max random... Args: decode: user defined decode min_after_dequeue: set to >2w for production train, suggesed will be 0.4 * num_instances, but also NOTICE do not exceed mem #--default parmas will make most randomness shuffle_files: wehter shuffle file shuffle_batch: batch or shuffle_batch batch_join: wether to use multiple reader or use one reader mutlitple thread fix_random: if True make at most random which can fix random result allow_smaller_final_batch: set True usefull if you want verify on small dataset """ if isinstance(files, str): files = util.list_files(files) assert len(files) > 0 if not min_after_dequeue : min_after_dequeue = 100000 if not num_epochs: num_epochs = None if fix_random: if seed is None: seed = 1024 shuffle_files = True batch_join = False #check can be True ? #to get fix_random #shuffle_batch = True and num_threads = 1 ok #shuffle_batch = False and num_threads >= 1 ok #from models/iamge-text-sim/read_records shuffle_batch = True will be quick, even single thread #and strange num_threas = 1 will be quicker then 12 shuffle_batch = True num_threads = 1 #shuffle_batch = False if fix_sequence: no_random = True allow_smaller_final_batch = True num_threads = 1 if no_random: shuffle_files = False batch_join = False shuffle_batch = False if dynamic_pad: #use tf.batch shuffle_batch = False #shuffle=True #batch_join = True #setting to False can get fixed result #seed = 1024 with tf.name_scope(name): filename_queue = tf.train.string_input_producer( files, num_epochs=num_epochs, shuffle=shuffle_files, seed=seed) # min_after_dequeue defines how big a buffer we will randomly sample # from -- bigger means better shuffling but slower start up and more # memory used. # capacity must be larger than min_after_dequeue and the amount larger # determines the maximum we will prefetch. Recommendation: # min_after_dequeue + (num_threads + a small safety margin) * batch_size #@TODO cifa10 always use num_prefetch_batches = 3, 3 * batch_size, check which is better if not num_prefetch_batches: num_prefetch_batches = num_threads + 3 capacity = min_after_dequeue + num_prefetch_batches * batch_size #@TODO diff between tf.batch_join and tf.batch, batch_join below means shuffle_batch_join.. TODO if batch_join: batch_list = [_read(filename_queue) for _ in xrange(num_threads)] #print batch_list if shuffle_batch: batch_serialized_examples = tf.train.shuffle_batch_join( batch_list, batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, enqueue_many=enqueue_many, allow_smaller_final_batch=allow_smaller_final_batch, name='shuffle_batch_join_queue') else: batch_serialized_examples = tf.train.batch_join( batch_list, batch_size=batch_size, capacity=capacity, enqueue_many=enqueue_many, allow_smaller_final_batch=allow_smaller_final_batch, dynamic_pad=dynamic_pad, name='batch_join_queue') else: serialized_example = list(_read(filename_queue)) #@FIXME... for bug now can not be more random if want fix random see D:\mine\tensorflow-exp\models\image-text-sim\train-evaluate-fixrandom.py if shuffle_batch: batch_serialized_examples = tf.train.shuffle_batch( serialized_example, batch_size=batch_size, num_threads=num_threads, capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, enqueue_many=enqueue_many, allow_smaller_final_batch=allow_smaller_final_batch, name='shuffle_batch_queue') else: batch_serialized_examples = tf.train.batch( serialized_example, batch_size=batch_size, #@TODO to make really fxied result use num_threads=1, may be shuffle_batch will be fix random? num_threads=num_threads, capacity=capacity, enqueue_many=enqueue_many, allow_smaller_final_batch=allow_smaller_final_batch, dynamic_pad=dynamic_pad, name='batch_queue') #print name #print decode_fn #print decode_fn(batch_serialized_examples) return decode_fn(batch_serialized_examples) if decode_fn is not None else batch_serialized_examples
def latest_model_path(model_dir): model_files = list(sorted(list_files(model_dir, '.h5'))) model_iteration = len(model_files) model_path = os.path.abspath(os.path.join(model_dir, model_files[-1])) return model_iteration - 1, model_path
def list_posts(): posts = [Post.constructFromPath(pf) for pf in list_files(conf.POST_PATH)] posts = posts[::-1] return json.dumps([p.postStringMap() for p in posts])