def __init__(self, collection, feature, distance, tpp='lemm', rootpath=ROOT_PATH): feat_dir = os.path.join(rootpath, collection, "FeatureData", feature) id_file = os.path.join(feat_dir, "id.txt") feat_file = os.path.join(feat_dir, "feature.bin") nr_of_images, ndims = map( int, open(os.path.join(feat_dir, 'shape.txt')).readline().split()) self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images, id_file) self.searcher.set_distance(distance) tagfile = os.path.join(rootpath, collection, "TextData", "id.userid.%stags.txt" % tpp) self.textstore = RecordStore(tagfile) self.nr_neighbors = 1000 self.nr_newtags = 100 printStatus( INFO, "nr_neighbors=%d, nr_newtags=%d" % (self.nr_neighbors, self.nr_newtags))
def printSummary(self): print '\n' if self.args.command in ['status', 'st']: terminal.blue('---SUMMARY---') print '' if len(Dolly.not_cloned) > 0: terminal.warning('The following repositories were not cloned') for repo in Dolly.not_cloned: util.printStatus(repo, False) print '' if len(Dolly.unpushed) > 0: terminal.warning('The following repositories contain unpushed commits') for repo in Dolly.unpushed: util.printStatus(repo, False) else: terminal.ok('No unpushed commits') print '' if len(Dolly.changes) > 0: terminal.warning('The following repositories contain uncomitted changes') for change in Dolly.changes: print '[{0}] {1}'.format(change['repo']['name'], change['change']) else: terminal.ok('No uncomitted changes') print '' if len(Dolly.warnings) > 0: terminal.warning('Some errors occured') for warning in Dolly.warnings: print warning
def select_and_move(): u.printStep( 'Find files to copy' ) desiredFiles = [] # find files in Processed fileNames = os.listdir(cfg.trimCastFolder) # filter out files that aren't mp3s fileNames = [f for f in fileNames if f.endswith('.mp3')] fData = {} for f in fileNames: p = path.join(cfg.trimCastFolder, f) statinfo = os.stat(p) fData[statinfo.st_ctime] = f # find oldest files n = cfg.maxFilesToCopy for k in fData.keys(): desiredFiles.append( fData[k] ) n -= 1 if n is 0: break u.printStatus('Found %d files to copy' % len(desiredFiles)) # move to Listening folder for f in desiredFiles: src = path.join(cfg.trimCastFolder, f) dst = path.join(cfg.listeningFolder, namemanip.find_date(f)+'_'+f) moveFile(src, dst)
def GC(seq): """Calculate GC ratio in given sequence""" # Check that sequence is non-empty seqlen = len(seq) if seqlen == 0: util.printStatus("WARNING in GCinterval(): sequence is an " "empty string") return None seq = seq.lower() return (seq.count("g") + seq.count("c")) / seqlen
def copy_to_ipod(): ### # Copy files from Listening folder to iPod u.printStep('Begin copy') # reserve some space desiredFiles = os.listdir(cfg.listeningFolder) u.printStatus( 'Making buffer space' ) try: copyFile(path.join(cfg.listeningFolder, desiredFiles[0]), cfg.freeSpaceMagic) except IOError, ex: u.printWarning("No space on device. Cannot copy any files (%s)" % ex) raise ex
def __init__(self, collection, feature, distance, tpp='lemm', rootpath=ROOT_PATH): feat_dir = os.path.join(rootpath, collection, "FeatureData", feature) id_file = os.path.join(feat_dir, "id.txt") feat_file = os.path.join(feat_dir, "feature.bin") nr_of_images, ndims = map(int, open(os.path.join(feat_dir,'shape.txt')).readline().split()) self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images, id_file) self.searcher.set_distance(distance) tagfile = os.path.join(rootpath, collection, "TextData", "id.userid.%stags.txt" % tpp) self.textstore = RecordStore(tagfile) self.nr_neighbors = 1000 self.nr_newtags = 100 printStatus(INFO, "nr_neighbors=%d, nr_newtags=%d" % (self.nr_neighbors, self.nr_newtags))
def __init__(self, tagfile): printStatus('textstore.RecordStore', 'read from %s' % tagfile) self.mapping = {} self.tag2freq = {} for line in open(tagfile): #.readlines(): print line.strip() [photoid, userid, tags] = line.strip().split('\t') self.mapping[photoid] = (userid, tags.lower()) for tag in set(str.split(tags)): self.tag2freq[tag] = self.tag2freq.get(tag,0) + 1 self.nr_images = len(self.mapping) self.nr_tags = len(self.tag2freq) print ("-> %d images, %d unique tags" % (self.nr_images, self.nr_tags))
def visit(self, host): pool = Pool(5, init_worker) def pr(repo): return pool.apply_async(process_repo, (self, repo)) results = zip(host.tree, map(pr, host.tree)) for r in results: repo, result = r project.Project.currentProj += 1 util.printStatus(repo) # Workaround to Python issue 8296 where a SIGINT will # lock up the process when no wait time is given. result.wait(9999999) if host.post_update: util.executeCommand(host.post_update)
def QualToInt(qual, phred=33): """Convert quality character to integer value""" # Check that quality is non-empty qlen = len(qual) if qlen == 0: util.printStatus("WARNING in QualToInt(): quality is an " "empty string") return None # Check that quality is either 33 or 64 if not phred == 33 and not phred == 64: util.printStatus("WARNING in QualToInt(): phred value is not the " "expected 33 or 64") quals = [] for nt in qual: quals.append(ord(nt) - phred) return quals
def QualToInt_interval(qual, phred=33, interval=10): """Convert quality character to integer value and calculate average""" # Check that interval is positive if interval < 1: util.printStatus("WARNING in QualToInt_interval(): cannot use an " "interval less than 1. Defaulting to interval = 10") interval = 10 # Check that quality is non-empty qlen = len(qual) if qlen == 0: util.printStatus("WARNING in QualToInt_interval(): quality is an " "empty string") return None # Check that quality is either 33 or 64 if not phred == 33 and not phred == 64: util.printStatus("WARNING in QualToInt_interval(): phred value is not " "the expected 33 or 64") quals = [] for i in range(0, qlen, interval): currQuals = qual[i: i + interval] quals.append(np.mean([ord(nt) - phred for nt in currQuals])) return quals
def GC_interval(seq, interval=10): """Calculate GC ratio in given sequence per interval""" # Check that interval is positive if interval < 1: util.printStatus("WARNING in GC_interval(): cannot use an interval " "less than 1. Defaulting to interval = 10") interval = 10 # Check that sequence is non-empty seqlen = len(seq) if seqlen == 0: util.printStatus("WARNING in GC_interval(): sequence is an " "empty string") return None seq = seq.lower() gcs = [] for i in range(0, seqlen, interval): currSeq = seq[i: i + interval] cslen = len(currSeq) gcs.append((currSeq.count("g") + currSeq.count("c")) / cslen) return gcs
def visit(self, host): for repo in host.tree: project.Project.currentProj += 1 util.printStatus(repo) self.status(repo)
def process(options, trainCollection, feature, testCollection): rootpath = options.rootpath tpp = options.tpp distance = options.distance k = options.k r = options.r donefile = options.donefile overwrite = options.overwrite numjobs = options.numjobs job = options.job blocksize = options.blocksize if options.testset is None: testset = testCollection test_tag_file = os.path.join(rootpath, testCollection, "TextData", "id.userid.%stags.txt" % tpp) try: testStore = RecordStore(test_tag_file) resultName = "tagrel" except: testStore = None printStatus( INFO, "Failed to load %s, will do image auto-tagging" % test_tag_file) resultName = "autotagging" nnName = distance + "knn" resultfile = os.path.join(rootpath, testCollection, resultName, testset, trainCollection, "%s,%s,%d,%s" % (feature, nnName, k, tpp), "id.tagvotes.txt") if numjobs > 1: resultfile += ".%d.%d" % (numjobs, job) if checkToSkip(resultfile, overwrite): return 0 if donefile: doneset = set([x.split()[0] for x in open(donefile).readlines()[:-1]]) else: doneset = set() printStatus( INFO, "%d images have been done already, and they will be ignored" % len(doneset)) test_imset = readImageSet(testCollection, testset, rootpath) test_imset = [x for x in test_imset if x not in doneset] test_imset = [ test_imset[i] for i in range(len(test_imset)) if (i % numjobs + 1) == job ] test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature) test_feat_file = BigFile(test_feat_dir) learner = TagrelLearner(trainCollection, feature, distance, tpp=tpp, rootpath=rootpath) learner.set_nr_neighbors(k) learner.set_nr_autotags(r) printStatus( INFO, "working on %d-%d, %d test images -> %s" % (numjobs, job, len(test_imset), resultfile)) done = 0 makedirsforfile(resultfile) fw = open(resultfile, "w") read_time = 0 test_time = 0 start = 0 while start < len(test_imset): end = min(len(test_imset), start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end - 1)) s_time = time.time() renamed, vectors = test_feat_file.read(test_imset[start:end]) read_time += time.time() - s_time nr_images = len(renamed) #assert(len(test_imset[start:end]) == nr_images) # some images may have no visual features available s_time = time.time() output = [None] * nr_images for i in range(nr_images): if testStore: (qry_userid, qry_tags) = testStore.lookup(renamed[i]) else: qry_userid = None qry_tags = None tagvotes = learner.estimate(vectors[i], qry_tags, qry_userid) output[i] = '%s %s\n' % (renamed[i], " ".join([ "%s %s" % (tag, niceNumber(vote, 8)) for (tag, vote) in tagvotes ])) test_time += time.time() - s_time start = end fw.write(''.join(output)) fw.flush() done += len(output) # done printStatus( INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time)) fw.close() return 1
def set_nr_neighbors(self, k): self.nr_neighbors = k printStatus(INFO, "setting nr_neighbors to %d" % k)
u.printStep('Begin copy') # reserve some space desiredFiles = os.listdir(cfg.listeningFolder) u.printStatus( 'Making buffer space' ) try: copyFile(path.join(cfg.listeningFolder, desiredFiles[0]), cfg.freeSpaceMagic) except IOError, ex: u.printWarning("No space on device. Cannot copy any files (%s)" % ex) raise ex except KeyboardInterrupt, ex: u.printWarning('Interrupt caught, skipping copying step') return ####### Early Return for f in desiredFiles: u.printStatus( 'Copying: %s' % f ) src = path.join(cfg.listeningFolder, f) dst = path.join(cfg.iPodCastFolder, f) try: # move out of listening folder to ipod # hopefully, the move will only occur if there's space moveFile(src, dst) except IOError, ex: u.printWarning( "Warning: Out of space on device (%s)" % ex ) # failure means it will stay in listening folder for the next iPod sync except KeyboardInterrupt, ex: u.printWarning('Interrupt caught, not copying any more files') # free up junk space u.printStatus( 'Clearing buffer space' ) removeFile(cfg.freeSpaceMagic)
def set_nr_autotags(self, k): self.nr_newtags = k printStatus(INFO, "setting nr_autotags to %d" % k)
help="File is in FASTA format") parser.add_argument("--gzip", action="store_true", help="File is compressed with GZIP") parser.add_argument("--gc", action="store_true", help="Calculate GC") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") args = parser.parse_args() seqFile = args.fastq outdir = args.outdir if args.outdir.endswith("/") else args.outdir + "/" # Check file exists if not os.path.isfile(seqFile): util.printStatus("Sequence input file '" + fn + "' does not exist.") util.exitScript() # Check output directory exists if not os.path.isdir(outdir): util.printStatus("Output directory '" + outdir + "' does not exist.") util.exitScript() # Give warning on file extension sf_lc = seqFile.lower() if args.gzip and not sf_lc.endswith(".gz"): util.printStatus("WARNING: file does not end in '.gz' - may not " "be a GZIP file") elif args.fasta and (not sf_lc.endswith(".fasta") and not sf_lc.endswith(".fna") and not sf_lc.endswith(".fa")):
def process(options, trainCollection, feature, testCollection): rootpath = options.rootpath tpp = options.tpp distance = options.distance k = options.k r = options.r donefile = options.donefile overwrite = options.overwrite numjobs = options.numjobs job = options.job blocksize = options.blocksize if options.testset is None: testset = testCollection test_tag_file = os.path.join(rootpath, testCollection, "TextData", "id.userid.%stags.txt"%tpp) try: testStore = RecordStore(test_tag_file) resultName = "tagrel" except: testStore = None printStatus(INFO, "Failed to load %s, will do image auto-tagging" % test_tag_file) resultName = "autotagging" nnName = distance + "knn" resultfile = os.path.join(rootpath, testCollection,resultName,testset,trainCollection,"%s,%s,%d,%s" % (feature,nnName,k,tpp), "id.tagvotes.txt") if numjobs>1: resultfile += ".%d.%d" % (numjobs,job) if checkToSkip(resultfile, overwrite): return 0 if donefile: doneset = set([x.split()[0] for x in open(donefile).readlines()[:-1]]) else: doneset = set() printStatus(INFO, "%d images have been done already, and they will be ignored" % len(doneset)) test_imset = readImageSet(testCollection, testset, rootpath) test_imset = [x for x in test_imset if x not in doneset] test_imset = [test_imset[i] for i in range(len(test_imset)) if (i%numjobs+1) == job] test_feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', feature) test_feat_file = BigFile(test_feat_dir) learner = TagrelLearner(trainCollection, feature, distance, tpp=tpp, rootpath=rootpath) learner.set_nr_neighbors(k) learner.set_nr_autotags(r) printStatus(INFO, "working on %d-%d, %d test images -> %s" % (numjobs,job,len(test_imset),resultfile)) done = 0 makedirsforfile(resultfile) fw = open(resultfile, "w") read_time = 0 test_time = 0 start = 0 while start < len(test_imset): end = min(len(test_imset), start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end-1)) s_time = time.time() renamed, vectors = test_feat_file.read(test_imset[start:end]) read_time += time.time() - s_time nr_images = len(renamed) #assert(len(test_imset[start:end]) == nr_images) # some images may have no visual features available s_time = time.time() output = [None] * nr_images for i in range(nr_images): if testStore: (qry_userid, qry_tags) = testStore.lookup(renamed[i]) else: qry_userid = None qry_tags = None tagvotes = learner.estimate(vectors[i], qry_tags, qry_userid) output[i] = '%s %s\n' % (renamed[i], " ".join(["%s %s" % (tag, niceNumber(vote,8)) for (tag,vote) in tagvotes])) test_time += time.time() - s_time start = end fw.write(''.join(output)) fw.flush() done += len(output) # done printStatus(INFO, "%d done. read time %g seconds, test_time %g seconds" % (done, read_time, test_time)) fw.close() return 1