class readLabeledData(): logger = utils.getLogger() def __init__(self,filename): self.__filename = filename def getLabledTuples(self): uniqueTimeSentences = [] sentenceTimeTuples = {} try: with open(self.__filename,'rb') as csvtrainfile: datareader = csv.DictReader(csvtrainfile) for row in datareader: sentence = row['labeledSentence'] sentence = utils.process(row['labeledSentence']) reftime = row['refTime'] begintime = row['beginTime'] endtime = row['endTime'] if sentence not in uniqueTimeSentences: uniqueTimeSentences.append(sentence) sentenceTimeTuples[sentence] = (reftime,begintime,endtime) except IOError as e: readUnLabeledData.logger.error("Can not open file:{}, {}".format(self.__filename,e)) return sentenceTimeTuples
def apply_metric(self, metric_func): logger = getLogger(os.path.join(self.ckpt_dir, self.metric+'_scores.log'), log_file=True) all_scores_dir = os.path.join(self.ckpt_dir, 'all_scores') if not os.path.isdir(all_scores_dir): os.makedirs(all_scores_dir) if not os.path.isdir(os.path.join(all_scores_dir, 'scores_arrays')): os.makedirs(os.path.join(all_scores_dir, 'scores_arrays')) if self.tt_list == '': conditions = os.listdir(self.est_path) else: with open(self.tt_list, 'r') as f: conditions = [t.strip().split('/')[-1].replace('.ex', '') for t in f.readlines()] for condition in conditions: mix_scores_array = [] est_scores_array = [] score_name = condition + '_' + self.metric f = open(os.path.join(all_scores_dir, score_name + '.txt'), 'w') count = 0 for filename in os.listdir(os.path.join(self.est_path, condition)): if not filename.endswith('_mix.wav'): continue count += 1 mix, _ = sf.read(os.path.join(self.est_path, condition, filename), dtype=np.float32) sph, _ = sf.read(os.path.join(self.est_path, condition, filename.replace('_mix', '_sph')), dtype=np.float32) sph_est, _ = sf.read(os.path.join(self.est_path, condition, filename.replace('_mix', '_sph_est')), dtype=np.float32) mix_score = metric_func(sph, mix) est_score = metric_func(sph, sph_est) f.write('utt {}: mix {:.4f}, est {:.4f}\n'.format(filename, mix_score, est_score)) f.flush() mix_scores_array.append(mix_score) est_scores_array.append(est_score) mix_scores_array = np.array(mix_scores_array, dtype=np.float32) est_scores_array = np.array(est_scores_array, dtype=np.float32) f.write('========================================\n') f.write('{} results: ({} utts)\n'.format(self.metric, count)) f.write('mix : {:.4f} +- {:.4f}\n'.format(np.mean(mix_scores_array), np.std(mix_scores_array))) f.write('est : {:.4f} +- {:.4f}\n'.format(np.mean(est_scores_array), np.std(est_scores_array))) f.close() np.save(os.path.join(all_scores_dir, 'scores_arrays', score_name + '_mix.npy'), mix_scores_array) np.save(os.path.join(all_scores_dir, 'scores_arrays', score_name + '_est.npy'), est_scores_array) message = 'Evaluating {}: {} utts: '.format(condition, count) + \ '{} [ mix: {:.4f}, est: {:.4f} | delta: {:.4f} ]'.format(self.metric, np.mean(mix_scores_array), np.mean(est_scores_array), np.mean(est_scores_array)-np.mean(mix_scores_array)) logger.info(message)
class readUnLabeledData(): logger = utils.getLogger() def __init__(self,filename,numTerminalTags): self.__filename = filename (self.__processedSentenceTimeTuples,self.__processedSentenceOriginalSentenceMap) = self.__returnProcessedTuples(numTerminalTags) #utils.print_ds(self.__processedSentenceTimeTuples) def __returnProcessedTuples(self,numTerminalTags): uniqueTimeSentences = [] sentenceTimeTuples = {} processedOldSentenceMap = {} try: with open(self.__filename,'rb' ) as csvtestfile: datareader = csv.DictReader(csvtestfile) for row in datareader: #print row originalSentence = utils.process(row['sentence']) processedSentence = self._process(originalSentence) for i in range(numTerminalTags): processedSentence = "<~start~>_<~START~> "+processedSentence+" <~end~>_<~END~>" reftime = row['refTime'] if processedSentence not in uniqueTimeSentences: uniqueTimeSentences.append(processedSentence) sentenceTimeTuples[processedSentence] = (reftime) processedOldSentenceMap[processedSentence] = originalSentence except IOError as e: readUnLabeledData.logger.error("Can not open file :{}, {}".format(self.__filename,e)) return (sentenceTimeTuples, processedOldSentenceMap) def getProcessedSentenceTuples(self): return self.__processedSentenceTimeTuples def getOrignialSentence(self,processedSentence): return utils.getDictValue(self.__processedSentenceOriginalSentenceMap, processedSentence,None) def _process(self,sentence): sentence = sentence.strip() words = sentence.split() processedSentence = [] for word in words: try: int(word) processedSentence.append("DIGIT") except ValueError: processedSentence.append(word.lower()) return " ".join(processedSentence)
class BeginEndTime: logger = utils.getLogger() def __init__(self, wordLabelTuples, referenceDate): self.tagMap = {} durationTuples = [] prefix = "" for t in wordLabelTuples: (label, word) = (t[0], t[1]) #Remove the angled Braces around the label label = label[1:len(label) - 1] #Get all TDPS and TDSS (prefix,suffix) tuples if label == "TDSS": if prefix != "": durationTuple = (prefix, word) durationTuples.append(durationTuple) prefix = "" continue else: BeginEndTime.logger.debug( "Duration suffix without a prefix :{}".format(t)) if label == "TDPN" or label == "TDPS": if prefix != "": durationTuple = (prefix, '') durationTuples.append(durationTuple) prefix = word continue self.tagMap = utils.addKeyValue(self.tagMap, label, word) if prefix != "": durationTuples.append((prefix, '')) if len(durationTuples) != 0: self.tagMap["TIME_PERIOD"] = durationTuples print "printing tagmap..." print self.tagMap self.tagMap = self.__validate(self.tagMap) self.beginTime = self.__getBeginTime(self.tagMap, referenceDate) self.endTime = self.__getEndTime(self.beginTime, self.tagMap) # Convert hours to 24 hour format def _convertTo24(self, tagMap, hourkey, ampmKey): newValues = [] if hourkey in tagMap.keys(): values = tagMap[hourkey] for val in values: val = int(val) if ampmKey in tagMap.keys(): ampm = 0 # 0 for am, 1 for pm if "am" in tagMap[ampmKey]: ampm = 0 else: if "pm" in tagMap[ampmKey]: ampm = 1 if ampm == 1: if (val < 12): val += 12 newValues.append(val) tagMap[hourkey] = newValues return tagMap #Convert year digit to a standard 4 digit format def _convertTo4digitYear(self, tagMap, yearKey): if yearKey not in tagMap.keys(): return tagMap newValues = [] t = date.today() #Using the current century as the base yr_2 = str(t.year)[:2] values = tagMap[yearKey] for val in values: newVal = val if len(val) == 2: year = yr_2 + val newVal = int(year) newValues.append(newVal) tagMap[yearKey] = newValues return tagMap # calculate time stamp for (prefix,suffix) tuples def __calculateDurationOffset(self, timeTuple, endTime): offsetTime = SpecialDaysCalendar.getTimeStruct(timeTuple, endTime) if offsetTime != None: return offsetTime prefixstring = timeTuple[0] suffixString = timeTuple[1] prefix = None if timeDurationOffsetAliases.has_key(timeTuple): aliasTimeTuple = timeDurationOffsetAliases[timeTuple] prefixstring = aliasTimeTuple[0] suffixString = aliasTimeTuple[1] try: if prefixstring != "": if prefixstring in timeStringToNumberAlias.keys(): prefix = timeStringToNumberAlias[prefixstring] else: prefix = int(prefixstring) else: prefix = int(timeTuple[0]) except ValueError as v: BeginEndTime.logger.debug( "TDP can not be converted to integer".format(v)) return endTime else: if suffixString in timeStringAlias.keys(): suffixString = timeStringAlias[suffixString] + "s" offset = None if suffixString == "days": #Including current day as well offset = timedelta(days=(prefix - 1)) if suffixString == "hours": offset = timedelta(hours=prefix) if suffixString == "seconds": offset = timedelta(seconds=prefix) if suffixString == "months": offset = timedelta(months=prefix) if suffixString == "minutes": offset = timedelta(minutes=prefix) stTime = datetime.datetime(endTime['year'], endTime['month'], endTime['day'], endTime['hour'], endTime['minute'], endTime['second']) newDate = stTime + offset endTime['year'] = newDate.year endTime['month'] = newDate.month endTime['day'] = newDate.day endTime['hour'] = newDate.hour endTime['minute'] = newDate.minute endTime['second'] = newDate.second else: BeginEndTime.logger.debug( "Invalid suffix:{}".format(suffixString)) return endTime def __calculateDayOffset(self, beginTime, daystring, isbeginDay=True): if daystring == "today" or daystring == "eod" or daystring == "midnight" or daystring == "day": if isbeginDay: return beginTime else: beginTime['hour'] = 23 beginTime['minute'] = 59 beginTime['second'] = 59 return beginTime if daystring == "now": return beginTime if daystring == "weekend": if isbeginDay == True: daystring = "friday" else: daystring = "sunday" if daystring == "weekend": if isbeginDay == True: daystring = "monday" else: daystring = "friday" days_ahead = 0 if daystring == "tomorrow": days_ahead = 1 else: try: weekday = timeStringToNumberAlias[daystring] days_ahead = weekday - beginTime["weekday"] if days_ahead <= 0: days_ahead += 7 except KeyError: BeginEndTime.logger.debug( "Invalid daystring:{}".format(daystring)) if isbeginDay == False: #Default offset of one day days_ahead = 1 else: return beginTime d = datetime.datetime(beginTime['year'], beginTime['month'], beginTime['day'], beginTime['hour'], beginTime['minute'], beginTime['second']) newDate = d + datetime.timedelta(days_ahead) beginTime['year'] = newDate.year beginTime['month'] = newDate.month beginTime['day'] = newDate.day beginTime['hour'] = newDate.hour beginTime['minute'] = newDate.minute beginTime['second'] = newDate.second beginTime['weekday'] = newDate.weekday() if isbeginDay == False: beginTime["hour"] = 23 beginTime["minute"] = 59 beginTime["second"] = 59 return beginTime def __calculateHourOffset(self, beginTime, hourstring): if hourstring == "now": return beginTime if hourstring == "tonight" or hourstring == "midnight": beginTime['hour'] = 23 beginTime['minute'] = 59 beginTime['second'] = 59 if hourstring == "noon": beginTime['hour'] = 12 beginTime['minute'] = 0 beginTime['second'] = 0 return beginTime def __validate(self, tagMap): for key in tagMap.keys(): validValues = [] for value in tagMap[key]: if key[len(key) - 1] == "N": try: intvalue = int(value) if key == 'BTPMN' or key == "ETPMN": if intvalue > AllowedValues[ 'maxMonth'] or intvalue < AllowedValues[ 'minMonth']: raise ValueError( "Invalid Month :{}".format(value)) else: validValues.append(value) elif key == 'BTPmN' or key == 'ETPmN': if intvalue > AllowedValues[ 'maxMinute'] or intvalue < AllowedValues[ 'minMinute']: raise ValueError( "Invalid Minute :{}".format(value)) else: validValues.append(value) elif key == 'BTPHN' or key == 'ETPHN': if intvalue > AllowedValues[ 'maxHour'] or intvalue < AllowedValues[ 'minHour']: raise ValueError( "Invalid Hour :{}".format(value)) else: validValues.append(value) elif key == 'BTPDN' or key == 'ETPDN': if intvalue > AllowedValues[ 'maxDay'] or intvalue < AllowedValues[ 'minDay']: raise ValueError( "Invalid Day {}".format(value)) else: validValues.append(value) elif key == 'BTPYN' or key == 'ETPYN': if len(value) == 2: if intvalue > AllowedValues[ 'smaxYear'] or intvalue < AllowedValues[ 'sminYear']: raise ValueError( "Invalid Year {}".format(value)) else: validValues.append(value) elif len(value) == 4: if intvalue > AllowedValues[ 'maxYear'] or intvalue < AllowedValues[ 'minYear']: raise ValueError( "Invalid Year {}".format(value)) else: validValues.append(value) else: raise ValueError( "Invalid Year {}".format(value)) else: validValues.append(value) except ValueError as v: BeginEndTime.logger.debug( "Invalid Tagmap entries {}".format(v)) else: validValues.append(value) tagMap[key] = validValues return tagMap def __calculateRepeat(self, beginTime, repeat, repeatFrequency=30): beginTimes = [] d = datetime.datetime(beginTime['year'], beginTime['month'], beginTime['day'], beginTime['hour'], beginTime['minute'], beginTime['second']) for i in range(repeatFrequency): days_ahead = 0 if repeat == "daily": days_ahead = i newDate = d + datetime.timedelta(days_ahead) beginTimenew = copy.deepcopy(beginTime) beginTimenew['year'] = newDate.year beginTimenew['month'] = newDate.month beginTimenew['day'] = newDate.day beginTimenew['hour'] = newDate.hour beginTimenew['minute'] = newDate.minute beginTimenew['second'] = newDate.second beginTimes.append(beginTimenew) return beginTimes def __validateDayMonthHour(self, beginTime): try: timestring = int( datetime.datetime(beginTime['year'], beginTime['month'], beginTime['day'], beginTime['hour'], beginTime['minute']).strftime('%s')) return True except: return False # if time1 and time2 are equal it returns 0 # returns -1 if time1 is greater than time2 , returns 1 otherwise # Compares to the minute level # if one of them is none then it returns the other timestamp # If one of them is contains invalid combination of day month year then #it returns 1 if time2 is valid and -1 if time1 is valid #if both are invalid raise an exception def __compareTimes(self, time1, time2): if time1 == None and time2 == None: return 0 if time1 == None: return 1 validTime1 = self.__validateDayMonthHour(time1) validTime2 = self.__validateDayMonthHour(time2) if validTime1 == False and validTime2 == False: raise ValueError("invalid time1 and time2 given for comparisons") if validTime1 == False: return 1 if validTime2 == False: return -1 if time2 == None: return -1 if time1["year"] == time2["year"] and time1["month"] == time2[ "month"] and time1["hour"] == time2["hour"] and time1[ "minute"] == time2["minute"]: return 0 if time1["year"] > time2["year"]: return -1 else: if time1['year'] == time2['year']: if time1['month'] > time2['month']: return -1 else: if time1['month'] == time2['month']: if time1['hour'] > time2['hour']: return -1 else: if time1['hour'] == time2['hour']: if time1['minute'] > time2['minute']: return -1 else: return 1 else: return 1 else: return 1 else: return 1 def __getBeginTime(self, tagMap, refDate): beginTime = DefaultTime.getFromDateTime(refDate) tagMap = self._convertTo24(tagMap, 'BTPHN', 'BTPAP') tagMap = self._convertTo4digitYear(tagMap, 'BTPYN') if 'BTPMS' in tagMap.keys(): monthstrings = tagMap['BTPMS'] months = [] for monthstring in monthstrings: index = 0 try: if monthstring not in timeStringAlias.keys(): BeginEndTime.logger.debug( "Invalid month string, not in standard form:{}". format(monthstring)) continue stdmonthstring = timeStringAlias[monthstring] mon = timeStringToNumberAlias[stdmonthstring] months.append(mon) #if index == 0: # months = [mon] # index += 1 #else: # months.append(mon) except KeyError: BeginEndTime.logger.debug( "Invalid monthstring {}".format(monthstring)) if len(months) > 0: minMonth = min(months) beginTime['month'] = minMonth if 'BTPDS' in tagMap.keys(): index = 0 for daystring in tagMap['BTPDS']: if daystring not in timeStringAlias.keys(): BeginEndTime.logger.debug( "Invalid day string, not in standard form:{}".format( daystring)) continue stddaystring = timeStringAlias[daystring] newBeginTime = self.__calculateDayOffset( beginTime, stddaystring, True) if index == 0: beginTime = newBeginTime index += 1 continue if self.__compareTimes(newBeginTime, beginTime) == 1: beginTime = newBeginTime if 'BTPHS' in tagMap.keys(): index = 0 for hourstring in tagMap['BTPHS']: if hourstring not in timeStringAlias.keys(): BeginEndTime.logger.debug( "Invalid hour string, not in standard form:{}".format( hourstring)) continue stdhourstring = timeStringAlias[hourstring] newBeginTime = self.__calculateHourOffset( beginTime, stdhourstring) if index == 0: beginTime = newBeginTime index += 1 continue if self.__compareTimes(newBeginTime, beginTime) == 1: beginTime = newBeginTime for key in tagMap.keys(): if key in tagTimeAlias.keys(): if key[0] == 'B': vals = [] for val in tagMap[key]: try: intval = int(val) vals.append(intval) except ValueError as v: BeginEndTime.logger.debug( "ERROR Converting {} for key {}, error:". format(tagMap[key], key, v)) if len(set(vals)) > 1: BeginEndTime.logger.debug( "More than one string for month/year/day detected key:{} value:{}" .format(key, vals)) if len(vals) > 0: minval = min(vals) beginTime[tagTimeAlias[key]] = minval # Handling repetitions #if 'BTPR' in tagMap.keys(): # repeatString = tagMap['BTPR'] # beginTime = self.__calculateRepeat(beginTime,repeatString) return beginTime def __getEndTime(self, beginTime, tagMap): endTime = copy.deepcopy(beginTime) tagMap = self._convertTo24(tagMap, 'ETPHN', 'ETPAP') tagMap = self._convertTo4digitYear(tagMap, 'ETPYN') if 'TIME_PERIOD' in tagMap.keys(): for timeTuple in tagMap['TIME_PERIOD']: newEndTime = self.__calculateDurationOffset(timeTuple, endTime) if self.__compareTimes(endTime, newEndTime) == 1: endTime = newEndTime if 'ETPMS' in tagMap.keys(): index = 0 for monthstring in tagMap['ETPMS']: try: if monthstring not in timeStringAlias.keys(): BeginEndTime.logger.debug( "Invalid month string, not in standard form:{}". format(monthstring)) continue stdmonthstring = timeStringAlias[monthstring] newMon = timeStringToNumberAlias[stdmonthstring] if index == 0: endTime['month'] = newMon if index > 0: if newMon > endTime['month']: endTime['month'] = newMon if tagMap.has_key('ETPDS') == False and tagMap.has_key( 'TIME_PERIOD') == False: (weekday, maxdays) = calendar.monthrange( endTime['year'], endTime['month']) endTime['day'] = maxdays index += 1 except KeyError: BeginEndTime.logger.debug( "Invalid monthstring:{}".format(monthstring)) if 'ETPDS' in tagMap.keys(): for daystring in tagMap['ETPDS']: if daystring not in timeStringAlias.keys(): BeginEndTime.logger.debug( "Invalid day string, not in standard form:{}".format( daystring)) continue stddaystring = timeStringAlias[daystring] newEndTime = self.__calculateDayOffset(endTime, stddaystring, False) if self.__compareTimes(newEndTime, endTime) == -1: endTime = newEndTime if 'ETPHS' in tagMap.keys(): for hourstring in tagMap['ETPHS']: if hourstring not in timeStringAlias.keys(): BeginEndTime.logger.debug( "Invalid hour string, not in standard form:{}".format( hourstring)) continue hourstring = timeStringAlias[hourstring] stdhourstring = timeStringAlias[hourstring] newEndTime = self.__calculateHourOffset(endTime, stdhourstring) if self.__compareTimes(newEndTime, endTime) == -1: endTime = newEndTime for key in tagMap.keys(): if key in tagTimeAlias.keys(): if key[0] == 'E': vals = [] for val in tagMap[key]: try: intval = int(val) vals.append(intval) except ValueError as v: BeginEndTime.logger.debug( "ERROR Converting {} for key {}, error:". format(tagMap[key], key, v)) if (len(set(vals)) > 1): BeginEndTime.logger.debug( "More than one value for month/day/year key:{} value:{}" .format(key, vals)) if len(vals) > 0: maxval = max(vals) endTime[tagTimeAlias[key]] = maxval if endTime["hour"] == 0 and endTime["minute"] == 0 and endTime[ "minute"] == 0: endTime['hour'] = 23 endTime['minute'] = 59 endTime['second'] = 59 if self.__compareTimes(endTime, beginTime) == 1: BeginEndTime.logger.debug("EndTime less than Start Time") #Default One day offset endTime = copy.deepcopy(beginTime) endTime = self.__calculateDayOffset(endTime, "day", False) if (endTime['year'] - beginTime['year']) > 1: BeginEndTime.logger.debug( "Endtime and beginTime are off by more than 1 year") #endTime['year'] = beginTime['year'] return endTime def getBeginTime(self): return self.beginTime def getEndTime(self): return self.endTime
class labeledSentencesProcessor(): logger = utils.getLogger() def __init__(self,sentences,numTerminalTags=1,numValidTags=2): self._baseLabels = [ "<ETPYN>", "<ETPDS>","<ETPDN>", "<ETPMS>", "<ETPMN>", "<ETPHS>", "<ETPHN>", "<ETPmN>","<ETPmS>", "<ETPAP>","<ETPZ>", "<BTPYS>", "<BTPYN>", "<BTPDS>","<BTPDN>", "<BTPMS>", "<BTPMN>", "<BTPHS>", "<BTPHN>", "<BTPmN>", "<BTPmS>", "<BTPAP>", "<BTPZ>", "<TDPS>" , "<TDPN>" , "<TDSS>" , "<TDSN>" , "<TPRD>" ] (self._labelWords, self._filesentences) = self.__parseLabeledData(sentences, numTerminalTags,numValidTags) def _isbaseLabel(self,word_label): for label in self._baseLabels: #Remove the <> while comparing because training data does not have <> label = label[1:len(label)-1] if label in word_label: return True return False def _returnWordLabel(self,word,addlabel,label): monthL = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']; monthS = ['jan', 'feb', 'mar', 'apr', 'may', 'june', 'july', 'aug', 'sept', 'oct', 'nov', 'dec']; dayL = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] dayS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] try: int(word) if addlabel == True: if label is None: label = "<DIGIT>" return ("DIGIT",label) else: label = "<NA>" return ("DIGIT","<NA>") except ValueError: if addlabel == True: if label is None: label = "<"+word.upper()+">" else: return (word.lower(),"<NA>") return(word.lower(),label) def _process(self,word_label,index,numValidTags,words): addlabel = False if numValidTags == -1: addlabel = True else: for i in range(index-(numValidTags),index+numValidTags+1,1): if i>=0 and i<len(words): if self._isbaseLabel(words[i]) ==True: addlabel = True if "_" in word_label: (word, label) = word_label.split("_") label = "<"+label+">" if word == '' and label == '<>': return ("<UDERSCORE>", "<NA>") else: if word == '' or label == '': raise KeyError return self._returnWordLabel(word, addlabel, label) else: return self._returnWordLabel(word_label, addlabel, None) def __parseLabeledData(self,insentences,numTerminalTags,numValidTags): sentences = [] labelWords = {} for line in insentences: line = line.strip() words = line.split() tokens = [] index = 0 for fullword in words: (word,label) = self._process(fullword,index,numValidTags,words) labelWords = utils.addKeyValue(labelWords,label,word) token = word+"_"+label tokens.append(token) index = index + 1 processedline = " ".join(tokens) for i in range(numTerminalTags): processedline = "<~start~>_<~START~> "+processedline+" <~end~>_<~END~>" sentences.append(processedline) return (labelWords,sentences) def gettrainLabels(self): return self.getLabelWords().keys() def getSentences(self): return self._filesentences def getLabelWordsMap(self): return self._labelWords def getBaseLabels(self): return self._baseLabels def getUnProcessedLines(self): return self._unprocessedLines
def train(self, args): with open(args.tr_list, 'r') as f: self.tr_list = [line.strip() for line in f.readlines()] self.tr_size = len(self.tr_list) self.cv_file = args.cv_file self.ckpt_dir = args.ckpt_dir self.logging_period = args.logging_period self.resume_model = args.resume_model self.time_log = args.time_log self.lr = args.lr self.lr_decay_factor = args.lr_decay_factor self.lr_decay_period = args.lr_decay_period self.clip_norm = args.clip_norm self.max_n_epochs = args.max_n_epochs self.batch_size = args.batch_size self.buffer_size = args.buffer_size self.loss_log = args.loss_log self.unit = args.unit self.segment_size = args.segment_size self.segment_shift = args.segment_shift self.gpu_ids = tuple(map(int, args.gpu_ids.split(','))) if len(self.gpu_ids) == 1 and self.gpu_ids[0] == -1: # cpu only self.device = torch.device('cpu') else: # gpu self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if not os.path.isdir(self.ckpt_dir): os.makedirs(self.ckpt_dir) logger = getLogger(os.path.join(self.ckpt_dir, 'train.log'), log_file=True) # create data loaders for training and cross validation tr_loader = AudioLoader(self.tr_list, self.sample_rate, self.unit, self.segment_size, self.segment_shift, self.batch_size, self.buffer_size, self.in_norm, mode='train') cv_loader = AudioLoader(self.cv_file, self.sample_rate, unit='utt', segment_size=None, segment_shift=None, batch_size=1, buffer_size=10, in_norm=self.in_norm, mode='eval') # create a network net = Net() logger.info('Model summary:\n{}'.format(net)) net = net.to(self.device) if len(self.gpu_ids) > 1: net = DataParallel(net, device_ids=self.gpu_ids) # calculate model size param_count = numParams(net) logger.info('Trainable parameter count: {:,d} -> {:.2f} MB\n'.format( param_count, param_count * 32 / 8 / (2**20))) # net feeder feeder = NetFeeder(self.device, self.win_size, self.hop_size) # training criterion and optimizer criterion = LossFunction() optimizer = Adam(net.parameters(), lr=self.lr, amsgrad=False) scheduler = lr_scheduler.StepLR(optimizer, step_size=self.lr_decay_period, gamma=self.lr_decay_factor) # resume model if needed if self.resume_model: logger.info('Resuming model from {}'.format(self.resume_model)) ckpt = CheckPoint() ckpt.load(self.resume_model, self.device) state_dict = {} for key in ckpt.net_state_dict: if len(self.gpu_ids) > 1: state_dict['module.' + key] = ckpt.net_state_dict[key] else: state_dict[key] = ckpt.net_state_dict[key] net.load_state_dict(state_dict) optimizer.load_state_dict(ckpt.optim_state_dict) ckpt_info = ckpt.ckpt_info logger.info( 'model info: epoch {}, iter {}, cv_loss - {:.4f}\n'.format( ckpt.ckpt_info['cur_epoch'] + 1, ckpt.ckpt_info['cur_iter'] + 1, ckpt.ckpt_info['cv_loss'])) else: logger.info('Training from scratch...\n') ckpt_info = { 'cur_epoch': 0, 'cur_iter': 0, 'tr_loss': None, 'cv_loss': None, 'best_loss': float('inf') } start_iter = 0 # train model while ckpt_info['cur_epoch'] < self.max_n_epochs: accu_tr_loss = 0. accu_n_frames = 0 net.train() for n_iter, egs in enumerate(tr_loader): n_iter += start_iter mix = egs['mix'] sph = egs['sph'] n_samples = egs['n_samples'] mix = mix.to(self.device) sph = sph.to(self.device) n_samples = n_samples.to(self.device) n_frames = countFrames(n_samples, self.win_size, self.hop_size) start_time = timeit.default_timer() # prepare features and labels feat, lbl = feeder(mix, sph) loss_mask = lossMask(shape=lbl.shape, n_frames=n_frames, device=self.device) # forward + backward + optimize optimizer.zero_grad() with torch.enable_grad(): est = net(feat) loss = criterion(est, lbl, loss_mask, n_frames) loss.backward() if self.clip_norm >= 0.0: clip_grad_norm_(net.parameters(), self.clip_norm) optimizer.step() # calculate loss running_loss = loss.data.item() accu_tr_loss += running_loss * sum(n_frames) accu_n_frames += sum(n_frames) end_time = timeit.default_timer() batch_time = end_time - start_time if self.time_log: with open(self.time_log, 'a+') as f: print( 'Epoch [{}/{}], Iter [{}], tr_loss = {:.4f} / {:.4f}, batch_time (s) = {:.4f}' .format(ckpt_info['cur_epoch'] + 1, self.max_n_epochs, n_iter, running_loss, accu_tr_loss / accu_n_frames, batch_time), file=f) f.flush() else: print( 'Epoch [{}/{}], Iter [{}], tr_loss = {:.4f} / {:.4f}, batch_time (s) = {:.4f}' .format(ckpt_info['cur_epoch'] + 1, self.max_n_epochs, n_iter, running_loss, accu_tr_loss / accu_n_frames, batch_time), flush=True) if (n_iter + 1) % self.logging_period == 0: avg_tr_loss = accu_tr_loss / accu_n_frames avg_cv_loss = self.validate(net, cv_loader, criterion, feeder) net.train() ckpt_info['cur_iter'] = n_iter is_best = True if avg_cv_loss < ckpt_info[ 'best_loss'] else False ckpt_info[ 'best_loss'] = avg_cv_loss if is_best else ckpt_info[ 'best_loss'] latest_model = 'latest.pt' best_model = 'best.pt' ckpt_info['tr_loss'] = avg_tr_loss ckpt_info['cv_loss'] = avg_cv_loss if len(self.gpu_ids) > 1: ckpt = CheckPoint(ckpt_info, net.module.state_dict(), optimizer.state_dict()) else: ckpt = CheckPoint(ckpt_info, net.state_dict(), optimizer.state_dict()) logger.info('Saving checkpoint into {}'.format( os.path.join(self.ckpt_dir, latest_model))) if is_best: logger.info('Saving checkpoint into {}'.format( os.path.join(self.ckpt_dir, best_model))) logger.info( 'Epoch [{}/{}], ( tr_loss: {:.4f} | cv_loss: {:.4f} )\n' .format(ckpt_info['cur_epoch'] + 1, self.max_n_epochs, avg_tr_loss, avg_cv_loss)) model_path = os.path.join(self.ckpt_dir, 'models') if not os.path.isdir(model_path): os.makedirs(model_path) ckpt.save(os.path.join(model_path, latest_model), is_best, os.path.join(model_path, best_model)) lossLog(os.path.join(self.ckpt_dir, self.loss_log), ckpt, self.logging_period) accu_tr_loss = 0. accu_n_frames = 0 if n_iter + 1 == self.tr_size // self.batch_size: start_iter = 0 ckpt_info['cur_iter'] = 0 break ckpt_info['cur_epoch'] += 1 scheduler.step() # learning rate decay return
def test(self, args): with open(args.tt_list, 'r') as f: self.tt_list = [line.strip() for line in f.readlines()] self.model_file = args.model_file self.ckpt_dir = args.ckpt_dir self.est_path = args.est_path self.write_ideal = args.write_ideal self.gpu_ids = tuple(map(int, args.gpu_ids.split(','))) if len(self.gpu_ids) == 1 and self.gpu_ids[0] == -1: # cpu only self.device = torch.device('cpu') else: # gpu self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if not os.path.isdir(self.ckpt_dir): os.makedirs(self.ckpt_dir) logger = getLogger(os.path.join(self.ckpt_dir, 'test.log'), log_file=True) # create a network net = Net() logger.info('Model summary:\n{}'.format(net)) net = net.to(self.device) # calculate model size param_count = numParams(net) logger.info('Trainable parameter count: {:,d} -> {:.2f} MB\n'.format( param_count, param_count * 32 / 8 / (2**20))) # training criterion and optimizer criterion = LossFunction() # net feeder feeder = NetFeeder(self.device, self.win_size, self.hop_size) # resynthesizer resynthesizer = Resynthesizer(self.device, self.win_size, self.hop_size) # load model logger.info('Loading model from {}'.format(self.model_file)) ckpt = CheckPoint() ckpt.load(self.model_file, self.device) net.load_state_dict(ckpt.net_state_dict) logger.info('model info: epoch {}, iter {}, cv_loss - {:.4f}\n'.format( ckpt.ckpt_info['cur_epoch'] + 1, ckpt.ckpt_info['cur_iter'] + 1, ckpt.ckpt_info['cv_loss'])) net.eval() for i in range(len(self.tt_list)): # create a data loader for testing tt_loader = AudioLoader(self.tt_list[i], self.sample_rate, unit='utt', segment_size=None, segment_shift=None, batch_size=1, buffer_size=10, in_norm=self.in_norm, mode='eval') logger.info('[{}/{}] Estimating on {}'.format( i + 1, len(self.tt_list), self.tt_list[i])) est_subdir = os.path.join( self.est_path, self.tt_list[i].split('/')[-1].replace('.ex', '')) if not os.path.isdir(est_subdir): os.makedirs(est_subdir) accu_tt_loss = 0. accu_n_frames = 0 for k, egs in enumerate(tt_loader): mix = egs['mix'] sph = egs['sph'] n_samples = egs['n_samples'] n_frames = countFrames(n_samples, self.win_size, self.hop_size) mix = mix.to(self.device) sph = sph.to(self.device) feat, lbl = feeder(mix, sph) with torch.no_grad(): loss_mask = lossMask(shape=lbl.shape, n_frames=n_frames, device=self.device) est = net(feat) loss = criterion(est, lbl, loss_mask, n_frames) accu_tt_loss += loss.data.item() * sum(n_frames) accu_n_frames += sum(n_frames) sph_idl = resynthesizer(lbl, mix) sph_est = resynthesizer(est, mix) # save estimates mix = mix[0].cpu().numpy() sph = sph[0].cpu().numpy() sph_est = sph_est[0].cpu().numpy() sph_idl = sph_idl[0].cpu().numpy() mix, sph, sph_est, sph_idl = wavNormalize( mix, sph, sph_est, sph_idl) sf.write(os.path.join(est_subdir, '{}_mix.wav'.format(k)), mix, self.sample_rate) sf.write(os.path.join(est_subdir, '{}_sph.wav'.format(k)), sph, self.sample_rate) sf.write(os.path.join(est_subdir, '{}_sph_est.wav'.format(k)), sph_est, self.sample_rate) if self.write_ideal: sf.write( os.path.join(est_subdir, '{}_sph_idl.wav'.format(k)), sph_idl, self.sample_rate) avg_tt_loss = accu_tt_loss / accu_n_frames logger.info('loss: {:.4f}'.format(avg_tt_loss)) return
import argparse import pprint import torch from utils.models import Model from utils.utils import getLogger logger = getLogger(__name__) def main(): torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # parse the configuarations parser = argparse.ArgumentParser( description='Additioal configurations for testing', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--gpu_ids', type=str, default='-1', help= 'IDs of GPUs to use (please use `,` to split multiple IDs); -1 means CPU only' ) parser.add_argument('--tt_list', type=str, required=True, help='Path to the list of testing files') parser.add_argument('--ckpt_dir',