def load_audio_metadata_speechrecognition(cls, conn, path, audio_path): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : delimiter Delimiter for the metadata file. Returns ------- :class:`CASTable` ''' output_name = random_name('AudioTable_Metadata', 6) dc = DataClean(conn=conn, contents_as_path=path) dc_response = dc.process_contents(audio_path = audio_path) tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False, col_names=dc_response['col_names']) scode = 'length _fName_ $1000; ' scode += '_fName_ = _filename_; ' ctbl = CASTable(tbl, computedvars=['_fName_'], computedvarsprogram=scode) conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True)) return CASTable(tbl)
def load_audio_metadata_speechrecognition(cls, conn, path, audio_path): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : delimiter Delimiter for the metadata file. Returns ------- :class:`CASTable` Examples -------- >>> import swat >>> from dlpy.audio import AudioTable >>> s=swat.CAS("cloud.example.com", 5570) >>> aud_tbl = AudioTable.load_audio_metadata_speechrecognition(s, path="/path/to/metadata/file.txt", audio_path="/path/to/audio/file.txt") >>> aud_tbl.set_connection(s) ''' if conn is None: conn = cls.get_connection() if conn is None: raise DLPyError('cannot get a connection object to the current session.') output_name = random_name('AudioTable_Metadata', 6) dc = DataClean(conn=conn, contents_as_path=path) dc_response = dc.process_contents(audio_path = audio_path) tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False, col_names=dc_response['col_names']) scode = 'length _fName_ varchar(*); ' scode += '_fName_ = _filename_; ' ctbl = CASTable(tbl, computedvars=['_fName_'], computedvarsprogram=scode) conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True)) return CASTable(tbl)
def __init__(self, name, **table_params): CASTable.__init__(self, name, **table_params) self.patch_level = 0
def __deepcopy__(self, memo): out = CASTable.__deepcopy__(self, memo) out.patch_level = self.patch_level return out
def __copy__(self): out = CASTable.__copy__(self) out.patch_level = self.patch_level return out
def __extract_audio_features(conn, table, frame_shift=10, frame_length=25, n_bins=40, n_ceps=40, feature_scaling_method='STANDARDIZATION', n_output_frames=500, casout=None, label_level=0, random_shuffle=True, **kwargs): conn.loadactionset('audio', _messagelevel='error') if isinstance(table, AudioTable) is False and isinstance(table, CASTable) is False: return None if casout is None: casout = dict(name=random_name('AudioTable', 6)) elif isinstance(casout, CASTable) or isinstance(casout, AudioTable): casout = casout.to_outtable_params() # always use dither with 0 to turn it off rt = conn.retrieve('audio.computefeatures', _messagelevel='error', table=table, frameExtractionOptions=dict(frameshift=frame_shift, framelength=frame_length, dither=0.0), melBanksOptions=dict(nbins=n_bins), mfccOptions=dict(nceps=n_ceps), featureScalingMethod=feature_scaling_method, nOutputFrames=n_output_frames, casout=casout, **kwargs) if rt.severity > 1: for msg in rt.messages: print(msg) return None server_type = get_cas_host_type(conn).lower() if server_type.startswith("lin") or server_type.startswith("osx"): fs = "/" else: fs = "\\" if label_level: scode = "i=find(_path_,'{0}',-length(_path_)); ".format(fs) scode += "length _fName_ varchar(*); length _label_ varchar(*); " scode += "_fName_=substr(_path_, i+length('{0}'), length(_path_)-i);".format(fs) scode += "_label_=scan(_path_,{},'{}');".format(label_level, fs) ctable = CASTable(casout['name'], computedvars=['_fName_', '_label_'], computedvarsprogram=scode) else: scode = "i=find(_path_,'{0}',-length(_path_)); ".format(fs) scode += "length _fName_ varchar(*); " scode += "_fName_=substr(_path_, i+length('{0}'), length(_path_)-i);".format(fs) ctable = CASTable(casout['name'], computedvars=['_fName_'], computedvarsprogram=scode) if random_shuffle: conn.table.shuffle(table=ctable, casout=dict(name=casout['name'], replace=True)) else: conn.table.partition(table=ctable, casout=dict(name=casout['name'], replace=True)) out = AudioTable(casout['name']) out.set_connection(connection=conn) out.feature_size = n_ceps out.num_of_frames_col = '_num_frames_' if label_level: out.label_col = '_label_' else: out.label_col = None return out
def extract_audio_features(cls, conn, table, frame_shift=10, frame_length=25, n_bins=40, n_ceps=40, feature_scaling_method='STANDARDIZATION', n_output_frames=500, casout=None, **kwargs): ''' Extracts audio features from the audio files Parameters ---------- conn : CAS A connection object to the current session. table : AudioTable An audio table containing the audio files. frame_shift : int, optional Specifies the time difference (in milliseconds) between the beginnings of consecutive frames. Default: 10 frame_length : int, optional Specifies the length of a frame (in milliseconds). Default: 25 n_bins : int, optional Specifies the number of triangular mel-frequency bins. Default: 40 n_ceps : int, optional Specifies the number of cepstral coefficients in each MFCC feature frame (including C0). Default: 40 feature_scaling_method : string, optional Specifies the feature scaling method to apply to the computed feature vectors. Default: 'standardization' n_output_frames : int, optional Specifies the exact number of frames to include in the output table (extra frames are dropped and missing frames are padded with zeros). Default: 500 casout : dict or string or CASTable, optional CAS Output table kwargs : keyword-arguments, optional Additional parameter for feature extraction. Returns ------- :class:`AudioTable` If table exists None If no table exists ''' if isinstance(table, AudioTable) or isinstance(table, CASTable): if casout is None: casout = dict(name=random_name('AudioTable', 6)) elif isinstance(casout, CASTable) or isinstance(casout, AudioTable): casout = casout.to_outtable_params() rt = conn.retrieve('audio.computefeatures', _messagelevel='error', table=table, frameExtractionOptions=dict(frameshift=frame_shift, framelength=frame_length), melBanksOptions=dict(nbins=n_bins), mfccOptions=dict(nceps=n_ceps), featureScalingMethod=feature_scaling_method, nOutputFrames=n_output_frames, casout=casout, **kwargs) if rt.severity > 1: for msg in rt.messages: print(msg) return None server_type = get_cas_host_type(conn).lower() if server_type.startswith("lin") or server_type.startswith("osx"): fs = "/" else: fs = "\\" scode = "i=find(_path_,'{0}',-length(_path_)); ".format(fs) scode += "length _fName_ $1000; " scode += "_fName_=substr(_path_, i+length('{0}'), length(_path_)-i);".format(fs) ctable = CASTable(casout['name'], computedvars=['_fName_'], computedvarsprogram=scode) conn.table.partition(table=ctable, casout=dict(name=casout['name'], replace=True)) return AudioTable(casout['name']) return None