def process_highlevel(corpus, filepath, chop):
    """
        Utility method used to test the hierarchical system and create
        high level segments along with analysis and constituent units
        stored in a gaia dataset for a given ``corpus`` and file from that 
        ``corpus`` .
      
    """
    units = corpus.list_audio_units(audio_filename=filepath, chop=chop)
    new_segments = []
    m = Mosaic()
    analyser = EssentiaAnalyser()
    for u in units:
        if not os.path.isfile(switch_ext(u, '.yaml')):
            log.error("Cannot find analysis, assume that this file is silent: '%s'" % u)
            continue
        m.add_unit(MosaicUnit(u))
        log.debug("Number of units in mosaic is: %d, length of mosaic is %d" % (len(m.units), m.length))
        # This AND clause prevents a Gaia Dataset of only 1 Point being created.
        # The problem is that if the dataset only has one point then all 
        # descriptors get removed during 'cleaner' analysis
        if m.length > 5 and len(m.units) > 1: 
            log.debug("Current length of mosaic is %f, adding to list" % m.length)
            new_segments.append(m)
            m = Mosaic()
    if len(new_segments) == 0:
        log.warn("Retrieved only 1 high level mosaic unit of length: %f" % m.length)
        new_segments.append(m)
    if m != new_segments[-1]:
        log.debug("The final mosaic is of length %f only" % new_segments[-1].length)
        log.debug("The last mosaic has a length of: %f" % m.length)
        m.merge_mosaics(new_segments[-1])
        new_segments[-1] = m
    log.debug("Finished assembling units into segments of > 5s")
    log.debug("There are %d segments in total to be processed for %s" % (len(new_segments), os.path.basename(filepath)))
    highlevel_dir = corpus._make_segments_dir(filepath, 'highlevel_%s' % chop)
    for index, seg in enumerate(new_segments):
        path = os.path.join(highlevel_dir, '%05d.wav' % index)
        seg.export(path)
        log.debug("Analysing audio: %s" % path)
        analyser.analyse_audio(path)
        unit_dict = {}
        log.debug("Segment has %d units" % len(seg.units))
        for unit in seg.units:
            #path_comps = os.path.split(os.path.dirname(unit.filepath))
            #chop_dir = path_comps[1]
            #name = os.path.split(path_comps[0])[1] + '_' + chop_dir + \
            #    '_' + switch_ext(os.path.basename(unit.filepath), '')
            unit_dict.update({switch_ext(unit.filepath, '.yaml'): switch_ext(unit.filepath, '.yaml')})
        tu_ds = gaia_transform(unit_dict)
        tu_ds.save(os.path.join(highlevel_dir, '%05d.db' % index))
示例#2
0
 def write_aubio_onsets(self, onset_list, filepath):
     print ("Onsets are :%s" % onset_list)
     audio = MonoLoader(filename=filepath)()
     marker = AudioOnsetsMarker(onsets = onset_list, type = 'beep')
     marked_audio = marker(audio)
     wavwrite(marked_audio, switch_ext(os.path.basename(filepath), \
         'AUBIOONSETS.wav'), 44100)
示例#3
0
    def analyse_audio(self,audio_filepath):
        """
            This function invokes the essentia binary.
            Reads in the output file, deletes the file 
            and returns a dictionary.
            
        """
    
        #We actuallly have to be in the same directory as the streaming
        # extracor in order to make this work....
        current_dir = os.getcwd()
        os.chdir(settings.ESSENTIA_BIN_DIR)
        
        command = [self.ESSENTIA_BIN, audio_filepath, \
            switch_ext(audio_filepath, '.yaml')]
        process = subprocess.Popen(command, stdout=subprocess.PIPE, \
        stderr = subprocess.PIPE)
        (stdout, stderr) = process.communicate()
        log.debug('%s \n %s' % (stdout, stderr))
        pat = re.compile('ERROR.+$')
        match = pat.search(stdout)
        
        os.chdir(current_dir)

        if match:
            raise EssentiaError(match.group())
        # Change this to return the analysis filepath!!
        return audio_filepath
示例#4
0
def execute_flac_convert():
    """
    Cycles through test_data, converting all flac to wav
    Script includes a utility remove spaces and problem 
    characters from file name 
    """
    files = [f for f in glob('*.flac')]

    for af in files:
        x = flacread(af)[0]
        log.debug("Found a flac file: '%s'" % af)
        n = switch_ext(strip_all(af), '.wav')
        print ("Converting '%s' to: '%s'" % (af, n))
        wavwrite(x, n, 44100)
示例#5
0
def execute_mp3_convert():
    """
    Cycles through test_data, converting all mp3 to wav
    Script includes a utility remove spaces and problem 
    characters from file name.
    **WARNING** - This routine uses ffmpeg to convert the mp3s.
    It will fail if ffmpeg is not installed *or* if ffmpeg is installed
    without mp3 support.
    
    """
    files = [f for f in glob('*.mp3')]

    for af in files:
        log.debug("Found an mp3 file: '%s'" % af)
        # Initial step is rename as shell command is a little fussy
        nf = strip_all(af)
        os.rename(af, nf)
        n = switch_ext(nf, '.wav')
        log.info("Converting '%s' to: '%s'" % (nf, n))
        os.system("ffmpeg -i %s %s" % (nf, n))
def highlevel_mosaic(target, tcorpus, scorpus, scope=5):
    """
        This will be used to test the highlevel mosaicing process.
        The scope variable controls the number of results which are returned 
        for each target unit which is sought.

    """
    # Create a temporary file for the mosaic audio
    filepath = os.path.join(os.getcwd(), 'temp_mosaic.wav')
    if os.path.isfile(filepath):
        os.remove(filepath)
    mosaic = Mosaic(filepath)
    cost = RepeatUnitCost()
    context = Context()
    gridder = Gridder()
    units = tcorpus.list_audio_units(audio_filename=target, chop='highlevel')
    hdb = scorpus.get_gaia_unit_db(chop='highlevel_%s' % self.chop)
    distance = get_mood_distance(hdb)
    v = View(hdb, distance)
    results = {}
    for f in units:
        p = Point()
        p.load(switch_ext(f, '.yaml'))
        unit_name = switch_ext(os.path.basename(f), '')
        p.setName(unit_name)
        p_m = hdb.history().mapPoint(p)
        results.update({f:v.nnSearch(p_m).get(scope)})
    log.debug("Ok, now we have a dict with each target segment, along with its corresponding nearest matches in source db")
    log.debug("Check to see that we have every second of target audio accounted for - I think not!") 
    #return results
    #new_results = results.copy()
    ds = DataSet()
    for r in results:
        units = []
        for u in results[r]:
            ds.load(switch_ext(u[0], '.db'))
            for n in ds.pointNames():
                units.append(n)
        new_ds = gaia_transform(dict(zip(units, units)))
        results.update({r:new_ds})
    #return results
    # Very important - target units must be in correct order
    index = 0
    index_skip = 0
    for r in sorted(results.keys()):
        tds = DataSet()
        tds.load(switch_ext(r, '.db'))
        #return tds, results
        sds = results[r]
        source_set = set(sds.layout().descriptorNames())
        target_set = set(tds.layout().descriptorNames())
        remove_from_source = source_set.difference(target_set)
        remove_from_target = target_set.difference(source_set)
        if len(remove_from_source) > 0:
            log.debug("Will try to remove %s from the source DataSet" % remove_from_source)
            try:
                sds = transform(results[r], 'remove', {'descriptorNames':list(remove_from_source)})
            except Exception, e:
                log.error("Failed to remove %s from source DataSet" % list(remove_from_source))
                return results[r], tds
        if len(remove_from_target) > 0:
            log.debug("Will try to remove %s from the target DataSet" % remove_from_source)
            try:
                tds = transform(tds, 'remove', {'descriptorNames':list(remove_from_target)})
            except Exception, e:
                log.error("Failed to remove %s from target DataSet" % list(remove_from_target))
                return results[r], tds
            p = Point()
            p.load(pname)
            p_m = sds.history().mapPoint(p)
            unit_results = sv.nnSearch(p_m).get(scope)
            log.debug("For %s, the closest matching points are: %s" % (pname, unit_results))
            log.debug("Applying repition cost")
            unit_results = cost.get_results(unit_results)
            log.debug("Results are now: %s" % str(unit_results))
            log.debug("Applying Context cost")
            unit_results = context.get_results(unit_results)
            log.debug("Results are now: %s" % str(unit_results))
            path = unit_results[0][0]
            
            log.debug("Choosing this unit: %s" % path)
            filepath = switch_ext(path, '.wav')
            su = MosaicUnit(filepath)
            tl = float(p['length'])
            log.debug("Length of target unit is %f, length of chosen source unit is %f" % (tl, su.length))
            selected = gridder.fit(su, tl)  
            mosaic.add_unit(selected)
            context.append(path)
            index += 1
       
    return mosaic
        

   # Overall architecture of this mode should support low level and/or highlevel options.
   # The loudness and the beat skip are all highlevel features.
   # Two sets of constraints - highlevel + lowlevel.
   # Track continuity is also supported +  - maybe not...conditions (used to be context) - Just a small subset to begin