Пример #1
0
def main():
    metadata = util.get_metadata()
    
    # Exctract mel spectrum features
    F = audioFeatureExtraction.dirWavFeatureExtraction("songs", 1.0, 1.0, 0.025, 0.025)
    with open("F", "wb") as f:
        pickle.dump(F, f)
Пример #2
0
def main():
    dataset = None
    if len(sys.argv) > 1:
        dataset = sys.argv[1]
    metadata = util.get_metadata((dataset + "_metadata") if dataset else None)
    
    mfcc = dict(zip([metadata[i][0] for i in range(1, len(metadata))], util.load_features((dataset + "_features") if dataset else None)))

    # Load pyAudioAnalysis features
    with open("F", "rb") as f:
        feats, files = pickle.load(f, encoding="latin1")
    files = [f.split(".")[0].split("XC")[-1] for f in files]
    F = dict(zip(files, feats))
    full_dataset = True
    for item in metadata[1:]:
        if item[0] not in F:
            full_dataset = False
    X2, X3 = [], []
    if full_dataset:
        X3 = [np.concatenate((F[item[0]], mfcc[item[0]]), axis=0) for item in metadata[1:]]
        X2 = [F[item[0]] for item in metadata[1:]]
    X1 = [mfcc[item[0]] for item in metadata[1:]]

    for X in [X1, X2]:
        NUM_RUNS = 50
        Y = util.load_labels((dataset + "_metadata") if dataset else None)
        samples = range(len(X))#range(1, len(X), 12)#random.sample(range(len(X)), 25)
        samps = samples#range(len(X))#samples 
        x = [X[i] for i in samps]
        y = [Y[i] for i in samples]
        N_ESTIMATORS = 20
        avg_mat = None 

        for run in range(NUM_RUNS): 
            clf = RandomForestClassifier(n_estimators=N_ESTIMATORS, max_features=20, oob_score=True).fit(X, Y)
            similarity = dict()
            for dt in clf.estimators_:
                leaves = dt.apply(X)
                for i in samps:
                    for j in samps:
                        if leaves[i] == leaves[j]:
                            similarity[(i,j)] = similarity.get((i,j), 0) + 1

            mat = np.array([[(1.0 - similarity.get((i,j), 0)/N_ESTIMATORS)**2 for j in samples] for i in samples])
            mat = squareform(mat)
            if avg_mat is None:
                avg_mat = mat
            else:
                avg_mat = np.add(avg_mat, mat)  
        avg_mat = avg_mat / NUM_RUNS
        linkage_matrix = linkage(avg_mat, "single")
        matplotlib.rcParams['lines.linewidth'] = 2.5
        dendrogram(linkage_matrix, color_threshold=0.8, labels=y, show_leaf_counts=True)
        plt.xlabel("label")
        plt.ylabel("distance")
        plt.show()
Пример #3
0
def metadata() -> Response:
    # Validate path and construct an absolute path
    try:
        path = get_complete_path(request.args.get("path", "/"), root)
    except Exception as e:
        msg = "Error in get_complete_path: {}".format(str(e))
        app.logger.warning(msg)
        return error("Could not browse")

    try:
        result = json.dumps(get_metadata(path))
        return Response(result, mimetype='application/json')
    except Exception as e:
        msg = "Error getting metadata: {}".format(str(e))
        app.logger.error(msg)
        return error("Could not retrieve metadata")
Пример #4
0
    def __init__(self, path, parent=None):
        self.path = path
        if parent:
            self.parent = parent
        if util.is_supported(root_join(path)):
            self.supported = True
            tags = util.get_metadata(root_join(path))
            self.artist = tags['artist']
            self.album_artist = tags['performer']
            if not self.album_artist:
                self.album_artist = self.artist
            self.album = tags['album']
            self.track = tags['title']
        else:
            self.supported = False
        self.size = os.path.getsize(root_join(self.path))

        self.rel_path = root_join(self.path)
        self.change_alerted = False
Пример #5
0
    def __init__(self, path, parent = None):
        self.path = path
        if parent:
            self.parent = parent
        if util.is_supported(root_join(path)):
            self.supported = True
            tags = util.get_metadata(root_join(path))
            self.artist = tags['artist']
            self.album_artist = tags['performer']
            if not self.album_artist:
                self.album_artist = self.artist
            self.album = tags['album']
            self.track = tags['title']
        else:
            self.supported = False
        self.size = os.path.getsize(root_join(self.path))

        self.rel_path = root_join(self.path)
        self.change_alerted = False
Пример #6
0
    def __init__(self, name, versionrange='', pkfile=None, search=True):
        metadata = None

        self.pkfile = pkfile or cf['pkfile']
        self.name, self.version = util.separate_fullname(name)

        if not self.version and search:
            if versionrange:
                match = util.get_match_version(
                            self.name, versionrange, self.pkfile
                        )
            else:
                match = util.get_latest_version(self.name, self.pkfile)
        else:
            match = name

        self.name, self.version = util.separate_fullname(match)
        self.fullname = util.format_pk_name(self.name, self.version)
        self.metadata = util.get_metadata(self.fullname)
        self.shortname = self.name

        self.base_path = join(cf['home'], self.fullname)
        self.install_path = join(self.base_path, "install")
        self.build_path = join(self.base_path, "build")
        self.log_path = join(self.base_path, "log")
        self.log = join(self.log_path, "chip.log")

        self.pkgfile = join(self.base_path, 'package.json')
        self.pkgpy = join(self.build_path, 'package.py')

        self.ptype = self.metadata.get('type')
        self.url = self.metadata.get('url')
        self.requirements = self.metadata.get("requires")
        self.data = self.metadata.get('data')

        self.env = {}
        self.activated = False
        self.deps = None
Пример #7
0
def main():
    dataset = None
    if len(sys.argv) > 1:
        dataset = sys.argv[1]
    metadata = util.get_metadata((dataset + "_metadata") if dataset else None)
    
    mfcc = dict(zip([metadata[i][0] for i in range(1, len(metadata))], util.load_features((dataset + "_features") if dataset else None)))
    feats, files = None,None
    with open("F", "rb") as f:
        feats, files = pickle.load(f, encoding="latin1")
    files = [f.split(".")[0].split("XC")[-1] for f in files]
    F = dict(zip(files, feats))
    full_dataset = True
    for item in metadata[1:]:
        if item[0] not in F:
            full_dataset = False
    X2, X3 = [], [] 
    if full_dataset:
        X3 = [np.concatenate((F[item[0]], mfcc[item[0]]), axis=0) for item in metadata[1:]]
        X2 = [F[item[0]] for item in metadata[1:]] 
    X1 = [mfcc[item[0]] for item in metadata[1:]] 
    Y = util.load_labels((dataset + "_metadata") if dataset else None)#"bbsmd.csv")

    for X in [X1, X2] if full_dataset else [X1,]:
        print("------")
       
        classifiers = [ RandomForestClassifier(n_estimators=50, max_features=15, oob_score=True),
            KNeighborsClassifier(3),
            svm.SVC(kernel='linear', C=1),
            svm.SVC(gamma=2, C=1),
            GaussianNB()
        ]
        for clf in classifiers:
            scores = cross_val_score(clf, X, Y, cv=5)
            score = sum(scores)/len(scores)
            print(type(clf).__name__, "\t", score)
Пример #8
0
def main():
    threads = []
    metadata = util.get_metadata()
    q = Queue(maxsize=7)
    oq = Queue()
    X = []
    for item in metadata[1:]:
        q.put(1)
        time.sleep(0.1)
        t = threading.Thread(target=process, args=(item[0], q, oq))
        t.start()
        threads.append(t)
        print("{}/{}".format(oq.qsize(),
                             len(metadata) - 1),
              end='\r',
              flush=True)

    for thread in threads:
        print("{}/{}".format(oq.qsize(),
                             len(metadata) - 1),
              end='\r',
              flush=True)
        thread.join()

    features = dict()
    while not oq.empty():
        item_id, feature = oq.get()
        features[item_id] = feature
    for item in metadata[1:]:
        if item[0] in features and features[item[0]] is not None:
            X.append(features[item[0]])
        else:
            # Note: Items with missing features will need to be removed from metadata.csv to preserve (X,Y) order
            print("Error getting features for {}".format(item[0]))
    with open("features", "wb") as f:
        pickle.dump(X, f)
Пример #9
0
    try:
        client = DropboxClient(session['access_token'])
    except ErrorResponse, e:
        abort(401)

    account = client.account_info()
    session['username'] = account['display_name']
    quota = float(account['quota_info']['quota'])
    shared = float(account['quota_info']['shared'])
    normal = float(account['quota_info']['normal'])
    total_bytes = int(normal + shared)
    session['used'] = human_readable(normal + shared)
    session['quota'] = human_readable(quota)

    job = q.enqueue(walk, client, get_metadata(client, '/'), 0, total_bytes)
    job.meta['access_token'] = session['access_token'];
    job.save()
    update_progress(job, 0, "/")
    session['job'] = job.key

    return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used'])

@app.route('/display_result')
def display_result():
    if 'job' not in session:
        return jsonify(ready=False, progress=0)

    job = get_job_from_key(session['job'], conn)
    if job is None:
        abort(400)
Пример #10
0
    try:
        client = DropboxClient(session['access_token'])
    except ErrorResponse, e:
        abort(401)

    account = client.account_info()
    session['username'] = account['display_name']
    quota = float(account['quota_info']['quota'])
    shared = float(account['quota_info']['shared'])
    normal = float(account['quota_info']['normal'])
    total_bytes = int(normal + shared)
    session['used'] = human_readable(normal + shared)
    session['quota'] = human_readable(quota)

    job = q.enqueue(walk, client, get_metadata(client, '/'), 0, total_bytes)
    job.meta['access_token'] = session['access_token']
    job.save()
    update_progress(job, 0, "/")
    session['job'] = job.key

    return render_template('display.html',
                           username=session['username'],
                           quota=session['quota'],
                           used=session['used'])


@app.route('/display_result')
def display_result():
    if 'job' not in session:
        return jsonify(ready=False, progress=0)
Пример #11
0
def main():
    dataset = None
    if len(sys.argv) > 1:
        dataset = sys.argv[1]
    metadata = util.get_metadata((dataset + "_metadata") if dataset else None)

    mfcc = dict(
        zip([metadata[i][0] for i in range(1, len(metadata))],
            util.load_features((dataset + "_features") if dataset else None)))

    # Load pyAudioAnalysis features
    with open("F", "rb") as f:
        feats, files = pickle.load(f, encoding="latin1")
    files = [f.split(".")[0].split("XC")[-1] for f in files]
    F = dict(zip(files, feats))
    full_dataset = True
    for item in metadata[1:]:
        if item[0] not in F:
            full_dataset = False
    X2, X3 = [], []
    if full_dataset:
        X3 = [
            np.concatenate((F[item[0]], mfcc[item[0]]), axis=0)
            for item in metadata[1:]
        ]
        X2 = [F[item[0]] for item in metadata[1:]]
    X1 = [mfcc[item[0]] for item in metadata[1:]]

    #X = util.load_features((dataset + "_features") if dataset else None)
    for X in [X1, X2]:
        labels = []
        avg_mat = None
        all_sims = dict()
        Y = util.load_labels((dataset + "_metadata") if dataset else None)
        samples = range(
            len(X))  #range(1, len(X), 12)#random.sample(range(len(X)), 25)
        samps = range(len(X))  #samples
        x = [X[i] for i in samps]
        y = [Y[i] for i in samples]

        N_ESTIMATORS = 80
        NUM_RUNS = 5

        for run in range(NUM_RUNS):
            clf = RandomForestClassifier(n_estimators=N_ESTIMATORS,
                                         max_features=25,
                                         oob_score=True).fit(X, Y)
            similarity = dict()
            for dt in clf.estimators_:
                leaves = dt.apply(X)
                for i in samps:
                    for j in samps:
                        if leaves[i] == leaves[j]:
                            similarity[(i, j)] = similarity.get(
                                (i, j), 0) + (1 / N_ESTIMATORS)

            species_similarity = dict()
            for i in samps:
                for j in samps:
                    species_similarity[(Y[i], Y[j])] = species_similarity.get(
                        (Y[i], Y[j]), 0) + similarity.get(
                            (i, j), 0)**2 / (Y.count(Y[i]) * Y.count(Y[j]))

            for k in species_similarity:
                species_similarity[k] = species_similarity[k]**(0.5)

            labels = clf.classes_
            for i in range(len(labels)):
                normal = species_similarity[(labels[i], labels[i])]
                for j in range(i, len(labels)):
                    k = labels[i], labels[j]
                    species_similarity[k] /= normal
                    species_similarity[(k[1], k[0])] = species_similarity[k]
                    all_sims[k] = all_sims.get(
                        k, 0) + species_similarity[k] / NUM_RUNS

            mat = np.array([[(1.0 - species_similarity.get((i, j), 0))**2
                             for j in labels] for i in labels])
            print(mat)
            mat = squareform(mat)
            if avg_mat is None:
                avg_mat = mat
            else:
                avg_mat = np.add(avg_mat, mat)
        avg_mat = avg_mat / NUM_RUNS
        print(avg_mat)
        for k in all_sims:
            if k[0] != k[1] and all_sims[k] > 0.1:
                print("{}\t{}\t{}".format(k[0], k[1], all_sims[k]))
        linkage_matrix = linkage(avg_mat, "single")
        matplotlib.rcParams['lines.linewidth'] = 2.5
        dendrogram(linkage_matrix,
                   color_threshold=0.65,
                   labels=labels,
                   show_leaf_counts=True)
        plt.xlabel("label")
        plt.ylabel("distance")
        plt.show()
Пример #12
0
def publish_initiator_pair(candidate_pair,
                           publish_job_data,
                           orbit_data,
                           aoi_id,
                           wuid=None,
                           job_num=None):

    logger.info("\nPUBLISH CANDIDATE PAIR : %s" % candidate_pair)
    master_ids_str = ""
    slave_ids_str = ""
    job_priority = 0

    master_acquisitions = candidate_pair["master_acqs"]
    slave_acquisitions = candidate_pair["slave_acqs"]
    union_geojson = candidate_pair["intersect_geojson"]
    starttime = candidate_pair["starttime"]
    endtime = candidate_pair["endtime"]
    orbitNumber = candidate_pair['orbitNumber']
    direction = candidate_pair['direction']
    platform = orbit_data['platform']
    logger.info("publish_data : orbitNumber : %s, direction : %s" %
                (orbitNumber, direction))

    project = publish_job_data["project"]
    '''
    spyddder_extract_version = job_data["spyddder_extract_version"] 
    standard_product_ifg_version = job_data["standard_product_ifg_version"] 
    acquisition_localizer_version = job_data["acquisition_localizer_version"]
    standard_product_localizer_version = job_data["standard_product_localizer_version"] 
    '''
    #job_data["job_type"] = job_type
    #job_data["job_version"] = job_version
    job_priority = publish_job_data["job_priority"]

    logger.info("MASTER : %s " % master_acquisitions)
    logger.info("SLAVE : %s" % slave_acquisitions)
    logger.info("project: %s" % project)

    #version = get_version()
    version = "v2.0.0"

    # set job type and disk space reqs
    disk_usage = "300GB"

    # query doc
    uu = UrlUtils()
    es_url = uu.rest_url

    grq_index_prefix = "grq"
    rest_url = es_url[:-1] if es_url.endswith('/') else es_url
    url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(
        rest_url, grq_index_prefix)

    # get metadata
    master_md = {
        i: util.get_metadata(i, rest_url, url)
        for i in master_acquisitions
    }
    #logger.info("master_md: {}".format(json.dumps(master_md, indent=2)))
    slave_md = {
        i: util.get_metadata(i, rest_url, url)
        for i in slave_acquisitions
    }
    #logger.info("slave_md: {}".format(json.dumps(slave_md, indent=2)))

    # get tracks
    track = util.get_track(master_md)
    logger.info("master_track: {}".format(track))
    slave_track = util.get_track(slave_md)
    logger.info("slave_track: {}".format(slave_track))
    if track != slave_track:
        raise RuntimeError(
            "Slave track {} doesn't match master track {}.".format(
                slave_track, track))

    ref_scence = master_md
    if len(master_acquisitions) == 1:
        ref_scence = master_md
    elif len(slave_acquisitions) == 1:
        ref_scence = slave_md
    elif len(master_acquisitions) > 1 and len(slave_acquisitions) > 1:
        raise RuntimeError("Single Scene Reference Required.")

    dem_type = util.get_dem_type(master_md)

    # get dem_type
    dem_type = util.get_dem_type(master_md)
    logger.info("master_dem_type: {}".format(dem_type))
    slave_dem_type = util.get_dem_type(slave_md)
    logger.info("slave_dem_type: {}".format(slave_dem_type))
    if dem_type != slave_dem_type:
        dem_type = "SRTM+v3"

    job_queue = "%s-job_worker-large" % project
    logger.info("submit_localize_job : Queue : %s" % job_queue)

    #localizer_job_type = "job-standard_product_localizer:%s" % standard_product_localizer_version

    logger.info("master acq type : %s of length %s" %
                (type(master_acquisitions), len(master_acquisitions)))
    logger.info("slave acq type : %s of length %s" %
                (type(slave_acquisitions), len(master_acquisitions)))

    if type(project) is list:
        project = project[0]

    for acq in sorted(master_acquisitions):
        #logger.info("master acq : %s" %acq)
        if master_ids_str == "":
            master_ids_str = acq
        else:
            master_ids_str += " " + acq

    for acq in sorted(slave_acquisitions):
        #logger.info("slave acq : %s" %acq)
        if slave_ids_str == "":
            slave_ids_str = acq
        else:
            slave_ids_str += " " + acq

    list_master_dt, list_slave_dt = util.get_scene_dates_from_metadata(
        master_md, slave_md)

    list_master_dt_str = list_master_dt.strftime('%Y%m%dT%H%M%S')
    list_slave_dt_str = list_slave_dt.strftime('%Y%m%dT%H%M%S')
    #ACQ_LIST_ID_TMPL = "acq_list-R{}_M{:d}S{:d}_TN{:03d}_{:%Y%m%dT%H%M%S}-{:%Y%m%dT%H%M%S}-{}-{}"

    id_hash = hashlib.md5(
        json.dumps([job_priority, master_ids_str, slave_ids_str,
                    dem_type]).encode("utf8")).hexdigest()
    '''
    id_hash = hashlib.md5(json.dumps([
        ACQ_LIST_ID_TMPL,
        m,
        master_orbit_urls[-1],
        slave_zip_urls[-1],
        slave_orbit_urls[-1],
        projects[-1],
        filter_strength,
	dem_type
    ]).encode("utf8")).hexdigest()
    '''

    orbit_type = 'poeorb'
    aoi_id = aoi_id.strip().replace(' ', '_')

    id = ACQ_LIST_ID_TMPL.format('M', len(master_acquisitions),
                                 len(slave_acquisitions), track,
                                 list_master_dt, list_slave_dt, orbit_type,
                                 id_hash[0:4], aoi_id)
    #id = "acq-list-%s" %id_hash[0:4]
    prod_dir = id
    os.makedirs(prod_dir, 0o755)

    met_file = os.path.join(prod_dir, "{}.met.json".format(id))
    ds_file = os.path.join(prod_dir, "{}.dataset.json".format(id))

    logger.info("\n\nPUBLISHING %s : " % id)
    #with open(met_file) as f: md = json.load(f)
    md = {}
    md['id'] = id
    md['project'] = project,
    md['master_acquisitions'] = master_ids_str
    md['slave_acquisitions'] = slave_ids_str
    '''
    md['spyddder_extract_version'] = spyddder_extract_version
    md['acquisition_localizer_version'] = acquisition_localizer_version
    md['standard_product_ifg_version'] = standard_product_ifg_version
    '''
    md['job_priority'] = job_priority
    md['_disk_usage'] = disk_usage
    md['soft_time_limit'] = 86400
    md['time_limit'] = 86700
    md['dem_type'] = dem_type
    md['track_number'] = track
    md['starttime'] = "%sZ" % starttime
    md['endtime'] = "%sZ" % endtime
    md['union_geojson'] = union_geojson
    md['master_scenes'] = master_acquisitions
    md['slave_scenes'] = slave_acquisitions
    md['orbitNumber'] = orbitNumber
    md['direction'] = direction
    md['platform'] = platform
    md['list_master_dt'] = list_master_dt_str
    md['list_slave_dt'] = list_slave_dt_str
    md['tags'] = aoi_id

    try:
        geom = ogr.CreateGeometryFromJson(json.dumps(union_geojson))
        env = geom.GetEnvelope()
        bbox = [
            [env[3], env[0]],
            [env[3], env[1]],
            [env[2], env[1]],
            [env[2], env[0]],
        ]
        md['bbox'] = bbox
    except Exception as e:
        logger.warn("Got exception creating bbox : {}".format(str(e)))
        traceback.print_exc()
        #logger.warn("Traceback: {}".format(traceback.format_exc()))

    with open(met_file, 'w') as f:
        json.dump(md, f, indent=2)

    print("creating dataset file : %s" % ds_file)
    util.create_dataset_json(id, version, met_file, ds_file)