示例#1
0
def predict(file):
  data = _G.load_data(file)
  print(f"Predicting {file}")
  for mod_name, model in Model.items():
    print(f"=== {mod_name} ===")
    result = defaultdict(list)
    for frame in data:
      for category, values in frame.items():
        if category in _G.IgnoredCategories:
          continue
        elif category.upper() not in mod_name:
          continue
        
        if "RFR" in mod_name:
          train_n = model.best_estimator_.n_features_
        else:
          print("WARNING: Unknown estimator", mod_name)
          break
        
        train = preprocessing(mod_name, train_n, values)
        
        try:
          result[mod_name].append(model.predict(train)[0])
        except ValueError:
          result[mod_name].append(0)
    # end for frame in data
    for mod_name in result:
      if "RFR" in mod_name:
        process_rfr_result(result[mod_name])
示例#2
0
def build_playback_archive():
  filename = _G.PlotPlaybackFilename
  if os.path.exists(filename):
    print(f"Archive {filename} already exists")
    return _G.load_data(filename)
  print(f"Building archive playback for '{filename}'")
  files  = sorted(glob_plots(filename))
  cur_timestamp = 0
  data   = []
  _len   = len(files)
  for i, file in enumerate(files):
    print(f"Processing {i}/{_len}")
    dat = PlotPlaybackRecord(cur_timestamp)
    dat.sx, dat.ex = find_plot_window_length(file)
    data.append(dat)
    cur_timestamp += _G.TimeWindowSize
  _G.dump_data(data, filename)
  print("Archived dumped")
  return data
示例#3
0

data = _G.all_data_files()
x_train = []
y_train = []
base = 0
incom_idx = []

max_nsize = 0

for file in data:
    parts = re.split(r"\\|\/", file)
    if len(parts[2].split('_')[-1]) < 3:
        continue
    labels = load_postive_label(parts)
    dat = _G.load_data(file)
    twlen = len(dat)
    for key in labels:
        tmp_y = [1 if i in labels[key] else 0 for i in range(twlen)]
        base = len(y_train)
        y_train.extend(tmp_y)
        for idx, frame in enumerate(dat):
            # if frame[Category].shape != dat[0][Category].shape:
            #   print(f"Frame#{base+idx} is incomplete, discard")
            #   incom_idx.append(base+idx)
            #   continue

            infidx = np.where(np.isinf(frame[Category]))
            if len(infidx[0]) > 0:
                print(
                    f"WARNING: INF value in frame#{idx} of {Category} in {file}"
示例#4
0
import re
from collections import defaultdict
from pprint import pprint
from threading import Thread

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import (GridSearchCV, StratifiedKFold,
                                     cross_val_score)

import _G
import argv_parse

Model = {
  #"RFR_MFCC": _G.load_data("rfr_mfcc.mod"),
  "RFR_ROLLOFF": _G.load_data("rfr_rolloff.mod"),
}

def getframe_timestamp_period(idx):
  return [idx * _G.TimeWindowSize, (idx+1) * _G.TimeWindowSize]

def process_rfr_result(result):
  score_dict = {}
  for i, v in enumerate(result):
    score_dict[i] = v
  score_dict = {k: v for k, v in sorted(score_dict.items(), key=lambda p: p[1], reverse=True)}
  print("Sorted result:")
  for i,v in score_dict.items():
    print(i, v)

def preprocessing(category, train_n, values):
    sam_datas = json.load(fp)
    for sdata in sam_datas:
      slug = sdata['slug']
      st   = sdata['start_t']
      dur  = sdata['duration']
      ed = (st + int(dur)) // _G.TimeWindowSize
      st = int(st) // _G.TimeWindowSize
      ret[slug] = []
      for i in range(st,ed+1):
        ret[slug].append(i)
  return ret[next(iter(ret))]

data = _G.all_data_files()

models = {
  "SVM": _G.load_data("svm_rolloff.mod"),
  "KNN": _G.load_data("knn_rolloff.mod"),
  "RFR": _G.load_data("rfr_rolloff.mod")
}

for mod_name, model in models.items():
  print(f"=== {mod_name} ===")
  ok_cnt = 0
  nonok_cnt = 0
  total_frame = 0
  real_ok_cnt = 0
  for file in data:
    parts   = re.split(r"\\|\/",file)
    if len(parts[2].split('_')[-1]) < 3:
      continue
    labels  = load_postive_label(parts)
示例#6
0
        for sdata in sam_datas:
            slug = sdata['slug']
            st = sdata['start_t']
            dur = sdata['duration']
            ed = (st + int(dur)) // _G.TimeWindowSize
            st = int(st) // _G.TimeWindowSize
            ret[slug] = []
            for i in range(st, ed + 1):
                ret[slug].append(i)
    return ret[next(iter(ret))]


data = _G.all_data_files()

models = {
    "SVM": _G.load_data("svm_zcr.mod"),
    #"KNN": _G.load_data("knn_zcr.mod"),
    "RFR": _G.load_data("rfr_zcr.mod")
}

for mod_name, model in models.items():
    print(f"=== {mod_name} ===")
    ok_cnt = 0
    nonok_cnt = 0
    total_frame = 0
    real_ok_cnt = 0
    for file in data:
        parts = re.split(r"\\|\/", file)
        if len(parts[2].split('_')[-1]) < 3:
            continue
        labels = load_postive_label(parts)
示例#7
0
            st = sdata['start_t']
            dur = sdata['duration']
            ed = (st + int(dur)) // _G.TimeWindowSize
            st = int(st) // _G.TimeWindowSize
            ret[slug] = []
            for i in range(st, ed + 1):
                ret[slug].append(i)
    return ret[next(iter(ret))]


data = _G.all_data_files()

models = {
    # "SVM": _G.load_data("svm_mfcc.mod"),
    # "KNN": _G.load_data("knn_mfcc.mod")
    "RFR": _G.load_data("rfr_mfcc.mod")
}

for mod_name, model in models.items():
    print(f"=== {mod_name} ===")
    ok_cnt = 0
    nonok_cnt = 0
    total_frame = 0
    real_ok_cnt = 0
    for file in data:
        parts = re.split(r"\\|\/", file)
        if len(parts[2].split('_')[-1]) < 3:
            continue
        labels = load_postive_label(parts)
        dat = _G.load_data(file)
        twlen = len(dat)
示例#8
0

data = _G.all_data_files()
x_train = []
y_train = []
base = 0
incom_idx = []

max_nsize = 0
cnt = 0
for file in data:
    parts = re.split(r"\\|\/", file)
    if len(parts[2].split('_')[-1]) < 3:
        continue
    labels = load_postive_label(parts)
    dat = _G.load_data(file)
    frame_len = len(dat)
    print(frame_len, file)
    for key in labels:
        tmp_y = [1 if i in labels[key] else 0 for i in range(frame_len)]
        base = len(y_train)
        y_train.extend(tmp_y)
        for idx, frame in enumerate(dat):
            cnt += 1
        for idx, frame in enumerate(dat):
            infidx = np.where(np.isinf(frame[Category]))
            if len(infidx[0]) > 0:
                print(
                    f"WARNING: INF value in frame#{idx} of {Category} in {file}"
                )
                print("INF val idx: ", infidx)
from mpl_toolkits.axes_grid1 import make_axes_locatable

import numpy as np
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestRegressor
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
from collections import defaultdict
from pprint import pprint

import _G
import argv_parse

if __name__ == "__main__":
    model = _G.load_data("rfr_rolloff.mod")
    model = model.best_estimator_
    print(model)
    print(model.min_samples_split)
    print(model.max_samples)
    #export_graphviz(model.estimators_[2], out_file='tree.dot')
    for i in range(50):
        export_graphviz(model.estimators_[i],
                        out_file=f"visualization/tree_{i}.dot")
        os.system(
            f"dot -Tpng visualization/tree_{i}.dot -o visualization/tree_{i}.png"
        )