def add_missing(): # For this testset, find how short of 10,000 we are # Find that many new FpFiles that are not # already part of our testset, add to testset (make sure >60sec) files = db.session.query(db.FPFile).filter(db.FPFile.negative == False)\ .outerjoin(evaluation.Testfile).filter(evaluation.Testfile.id==None).all() print "need %s candidate pos files. got %s to choose from" % (num_pos, len(files)) todo = [] random.shuffle(files) while len(todo) < num_pos: x = files.pop(0) with audioread.audio_open(x.path.encode("utf-8")) as f: duration = f.duration print duration if duration >= 60.0: todo.append(x) neg = db.session.query(db.FPFile).filter(db.FPFile.negative == True)\ .outerjoin(evaluation.Testfile).filter(evaluation.Testfile.id==None).all() print "need %s candidate neg files. got %s to choose from" % (num_neg, len(neg)) random.shuffle(neg) while len(todo) < num_pos+num_neg: x = neg.pop(0) with audioread.audio_open(x.path.encode("utf-8")) as f: duration = f.duration if duration >= 60.0: todo.append(x) print "adding %s files" % len(todo) testset = db.session.query(evaluation.Testset).get(testset_id) for fpfile in todo: tfile = evaluation.Testfile(testset, fpfile) db.session.add(tfile) db.session.commit()
def short(): """Actually delete bad testfiles (and all their results)""" countneg = 0 countpos = 0 testset_id = 4 testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id) print "Number testfiles: %s" % testfiles.count() for i, tf in enumerate(testfiles): if i % 100 == 0: print i with audioread.audio_open(tf.file.path.encode("utf-8")) as f: duration = f.duration if duration < 60.0: if tf.file.negative: countneg+=1 else: countpos+=1 print "Removing short duration file: %s (%s)" % (tf.file.path.encode("utf-8"), duration) cur = db.session.query(evaluation.Result).filter(evaluation.Result.testfile_id==tf.id) print "%d results to remove" % cur.count() cur.delete() db.session.query(evaluation.Testfile).filter(evaluation.Testfile.id==tf.id).delete() db.session.commit() testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id) print "New number testfiles: %s" % testfiles.count() print "deleted negative: %s" % countneg print "deleted positive: %s" % countpos
def getExecCommand(self, fromfile, tofile): from chromaprint_support import audioread try: with audioread.audio_open(fromfile) as f: if f.samplerate == 44100: return ["sox", "-m", fromfile, self.mixfile, tofile, "trim", "0", "35"] else: c1 = ["sox", fromfile, "-t", "sox", "-", "trim", "0", "35", "rate", "44100"] c2 = ["sox", "-m", "-t", "sox", "-", self.mixfile, tofile] return (c1, c2) except audioread.DecodeError: return ["sox", "-m", fromfile, self.mixfile, tofile, "trim", "0", "35"]
def getExecCommand(self, fromfile, tofile): from chromaprint_support import audioread try: with audioread.audio_open(fromfile) as f: if f.samplerate == 44100: return [ "sox", "-m", fromfile, self.mixfile, tofile, "trim", "0", "35" ] else: c1 = [ "sox", fromfile, "-t", "sox", "-", "trim", "0", "35", "rate", "44100" ] c2 = ["sox", "-m", "-t", "sox", "-", self.mixfile, tofile] return (c1, c2) except audioread.DecodeError: return [ "sox", "-m", fromfile, self.mixfile, tofile, "trim", "0", "35" ]
def rate(): """ Delete results for files that have a non-44.1k samplerate so we can re-do""" testset_id = 4 c = 0 testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id) print "Number testfiles: %s" % testfiles.count() for i, tf in enumerate(testfiles): if i % 100 == 0: print i with audioread.audio_open(tf.file.path.encode("utf-8")) as f: rate = f.samplerate if rate != 44100: c += 1 print "Unexpected samplerate: %s (%s)" % (tf.file.path.encode("utf-8"), rate) cur = db.session.query(evaluation.Result).filter(evaluation.Result.testfile_id==tf.id) print "%d results to remove" % cur.count() #cur.delete() db.session.commit() testfiles = db.session.query(evaluation.Testfile).filter(evaluation.Testfile.testset_id==testset_id) print "to change", c print "New number testfiles: %s" % testfiles.count()
def pre_lookup(self, file): with audioread.audio_open(file) as f: duration = f.duration return {"duration": duration}