def process_single_day(day, data_dir):
    day_t = arrow.get(day)
    fpath = os.path.join(data_dir, 'vehicle_positions', day + '.csv')
    stops = get_metadata(day_t, 'stops', data_dir)
    schedule = get_metadata(day_t, 'schedule', data_dir)
    print('Processing file:', fpath)
    return process_day(pd.read_csv(fpath), stops, schedule)
示例#2
0
def fetch_image_meta(paths=None):
    if paths:
        meta = get_metadata(f={'path': {'$in': paths}}, master_db=True)
    else:
        meta = get_metadata(master_db=True)

    meta = {m['imageName']: 1 for m in meta}
    return meta
示例#3
0
    def put(self):
        '''
        Add a new analytic via file upload. This is a security risk.
        '''
        try:
            time = datetime.now()
            # make the id more meaningful
            file = request.files['file']
            filename = secure_filename(file.filename)
            name,  ext = splitext(filename)
            if not ext in ALLOWED_EXTENSIONS:
                return 'This filetype is not supported.', 415

            #save the file
            analytic_id = name + str(time.year) + str(time.month) + str(
                time.day) + str(time.hour) + str(time.minute) + str(
                    time.second)
            filepath = ANALYTICS_OPALS + analytic_id + '.py'
            file.save(filepath)

            #get the metadata from the file
            metadata = utils.get_metadata(analytic_id)
            metadata['analytic_id'] = analytic_id

            _, col = analytics_collection()
            col.insert(metadata)
            meta = drop_id_key(metadata)
        except:
            tb = traceback.format_exc()
            return tb, 406

        return meta, 201
示例#4
0
    def track_changed(self):
        if not self.on:
            return
        if not bool(lib.SpPlaybackIsActiveDevice()):
            return
        self.pause()
        # Scrobble last song only if the song has been played more than half
        # of its duration or during more than 4 minutes
        if self.metadata and self.play_cumul > min(
                self.metadata["duration"] / 2000, 240):
            self.lastfm_network.scrobble(
                artist=self.metadata["artist_name"],
                title=self.metadata["track_name"],
                timestamp=int(self.metadata["time_on"]),
                album=self.metadata["album_name"],
                duration=(self.metadata["duration"] / 1000))
            print "LastFM: scrobbled track " + self.metadata[
                "track_name"] + " - " + self.metadata["artist_name"]

        # Update now playing song
        self.play_cumul = 0
        self.play()
        self.metadata = get_metadata()
        self.metadata["time_on"] = time.time()
        self.lastfm_network.update_now_playing(
            artist=self.metadata["artist_name"],
            title=self.metadata["track_name"],
            album=self.metadata["album_name"],
            duration=int(self.metadata["duration"] / 1000))
示例#5
0
    def get_data_matrix(cls,
                        feature,
                        label=None,
                        unlabelled=False,
                        ignore_metadata=False):
        min_max_scaler = MinMaxScaler()

        f = {}
        if label:
            label_images = utils.filter_images(label)
            f = {'path': {'$in': label_images}}

        # Build and scale feature matrix
        images, feature_space = utils.get_all_vectors(feature,
                                                      f=f,
                                                      unlabelled_db=unlabelled)
        feature_space = min_max_scaler.fit_transform(feature_space)
        # Not including metadata boosts accuracy of Set 2
        # Including metadata boosts accuracy of Set 1
        if ignore_metadata:
            meta = utils.get_metadata(unlabelled_db=unlabelled)
            # Mapping between image file path name and the metadata
            meta = {m['path']: m for m in meta}
            return images, meta, feature_space

        # Build and scale metadata matrix
        meta, metadata_space = cls.get_metadata_space(images,
                                                      unlabelled_db=unlabelled)
        metadata_space = min_max_scaler.fit_transform(metadata_space)

        # Column stack them
        data_matrix = np.c_[feature_space, metadata_space]

        return images, meta, data_matrix
示例#6
0
def metadata():
    ip = args.elasticIp
    print(
        f'The elastic server ip address is {args.elasticIp} elastic server port is {args.elasticPort}'
    )
    metadata = get_metadata(args.elasticIp, args.elasticPort)
    # print(metadata)
    return jsonify(metadata)
示例#7
0
def get_unlabelled_data(feature):
    u_images, u_vectors = utils.get_all_vectors(feature, unlabelled_db=True)

    # Get metadata
    meta = utils.get_metadata(unlabelled_db=True)
    meta = {m['path']: m for m in meta}

    return u_images, meta, u_vectors
示例#8
0
def get_filename(path, ext):
    with open(path, 'r') as f:
        line = f.readlines()[100]
        md5 = hashlib.md5(line).hexdigest()
        f.seek(0)
        created = get_metadata(f)['created']
        timestamp = int(time.mktime(created.timetuple()))
        filename = '%s-%s' % (timestamp, md5) + '.' + ext
    return filename, created
示例#9
0
def get_labelled_data(feature):
    # Get labelled images
    l_images, feature_space = utils.get_all_vectors(feature)

    # Get metadata
    meta = utils.get_metadata()
    meta = {m['path']: m for m in meta}

    return l_images, meta, feature_space
示例#10
0
def process_range(start, end, data_dir):
    dates = date_range(arrow.get(start), arrow.get(end))
    print('Processing dates from {} to {}'.format(start, end))

    path = os.path.join(data_dir, 'vehicle_positions') + '/{}.csv'
    paths = map(lambda day: (path.format(day), arrow.get(day)), dates)

    results = []
    for fpath, day in paths:
        stops = get_metadata(day, 'stops', data_dir)
        schedule = get_metadata(day, 'schedule', data_dir)
        now = arrow.now()
        print('Processing file:', fpath)
        df = process_day(pd.read_csv(fpath), stops, schedule)
        results.append(df)
        print('Process {} in {}s'.format(day, (arrow.now() - now).seconds))

    combined = pd.concat(results)
    combined.to_csv('{}_{}.csv'.format(start, end), index=False)
示例#11
0
文件: narps.py 项目: rotemb9/narps-2
    def __init__(self,
                 basedir,
                 metadata_file=None,
                 verbose=False,
                 overwrite=False,
                 dataurl=None,
                 testing=False):
        self.basedir = basedir
        self.dirs = NarpsDirs(basedir, dataurl=dataurl, testing=testing)
        self.verbose = verbose
        self.teams = {}
        self.overwrite = overwrite
        self.started_at = datetime.datetime.now()
        self.testing = testing

        # create the full mask image if it doesn't already exist
        if not os.path.exists(self.dirs.full_mask_img):
            print('making full image mask')
            self.mk_full_mask_img(self.dirs)
        assert os.path.exists(self.dirs.full_mask_img)

        # get input dirs for orig data
        self.image_jsons = None
        self.input_dirs = self.get_input_dirs(self.dirs)

        # check images for each team
        self.complete_image_sets = {}
        self.get_orig_images(self.dirs)
        for imgtype in ['thresh', 'unthresh']:
            log_to_file(
                self.dirs.logfile,
                'found %d teams with complete original %s datasets' %
                (len(self.complete_image_sets[imgtype]), imgtype))

        # set up metadata
        if metadata_file is None:
            self.metadata_file = os.path.join(
                self.dirs.dirs['orig'], 'analysis_pipelines_for_analysis.xlsx')
        else:
            self.metadata_file = metadata_file

        self.metadata = get_metadata(self.metadata_file)

        self.hypothesis_metadata = pandas.DataFrame(
            columns=['teamID', 'hyp', 'n_na', 'n_zero'])

        self.all_maps = {
            'thresh': {
                'resampled': None
            },
            'unthresh': {
                'resampled': None
            }
        }
        self.rectified_list = []
示例#12
0
    def upload_album(self):
        album_data = get_metadata(
            True) if self.args.metadata else self.metadata
        album = self.client.create_album(album_data)
        print('Created album named "{}"'.format(album_data.get('title')))
        self.log_upload(album)

        album_id = album['id'] if self.client.auth else album['deletehash']

        # get all images in the folder with approved file extensions
        files = [glob(os.path.join(self.args.path, '*' + ext))
                 for ext in file_extensions]
        files = sum(files, [])  # ugly way to flatten list

        for f in files:
            print('Uploading {}'.format(os.path.basename(f)))
            img_data = get_metadata() if self.args.metadata else dict()
            self.upload_pic(f, img_data, album_id)

        return album['id']  # return album if more data is needed
示例#13
0
def get_metadata_space(images):
    meta = get_metadata(master_db=True)
    # Mapping between image file path name and the metadata
    meta = {m['path']: m for m in meta}
    space = np.array([[
        meta[i]['age'], mapping[meta[i]['gender']],
        mapping[meta[i]['skinColor']], mapping[meta[i]["accessories"]],
        meta[i]["nailPolish"], meta[i]["irregularities"]
    ] for i in images])

    return meta, space
示例#14
0
def ranking_item():
    """
    Rank the items based on their predicted ratings

    Outputs:
    -------
    : predicted ratings, inference time, ordered item ids, item metadata
    """

    if request.method == 'POST':
        ids = request.json
        user_id = int(ids['uid'])
        item_ids = ids['iids']

        user_ids = np.full(100, user_id)
        item_ids = item_ids[1:-1].split(',')
        item_ids = np.array(item_ids).astype(int)

        texts_u = []
        texts_i = []
        for i in user_ids:
            texts_u.append(u_text[i].tolist())
        for j in item_ids:
            texts_i.append(i_text[j].tolist())

        user_ids = user_ids.reshape(-1, 1)
        item_ids = item_ids.reshape(-1, 1)

        # Feed the inputs to the Tensorflow Serving model
        res, time_dif = tf_serving(texts_u, texts_i, user_ids, item_ids)

        # Get the ranking results
        rating = np.array(res['final_rating/add_1:0']).reshape(-1)
        order = np.argsort(rating)[::-1]
        item_ids_new = item_ids.reshape(-1)[order]
        rating_new = rating[order]

        # Prepare the metadata for 10 suggested items
        des_meta, title_meta, price_meta, imurl_meta, categ_meta = get_metadata(
            df_meta, item_ids_new, num_top=10)

        return json.dumps({
            'rating': rating_new.tolist(),
            'infertime': time_dif.total_seconds(),
            'item_ids': item_ids_new.tolist(),
            'des_meta': des_meta,
            'title_meta': title_meta,
            'price_meta': price_meta,
            'imurl_meta': imurl_meta,
            'categ_meta': categ_meta
        })

    else:
        return render_template('candidate.html')
示例#15
0
def read_metadata(pdf_path, document_uuid, document_name):
    try:
        metadata_dict = get_metadata(pdf_path)
        return dict(
            original_document=pdf_path,
            metadata=metadata_dict,
            document_uuid=document_uuid,
            document_name=document_name,
        )
    except Exception as e:
        sentry_client(e)
def load_data(num_chord_comp=5, num_grain_comp=5):
    # Grain data
    grain_pca = np.load(stats_pca_path()+'grain_grain_pca_scores.npy')
    #load chord data
    chords = load_chords(cord_length_path())
    chords_pca = get_chords_pca(chords, use_avg=True) 
    #load labels
    metadata, class_map, subclass_map = get_metadata(stats_files())
    classes = np.array([int(x['class_num']) for x in metadata])
    #subclasses = np.array([x['subclass_num'] for x in metadata])
    #         xs          x         y
    return (grain_pca[:, :num_grain_comp] ,chords_pca[:, :num_chord_comp], classes)
示例#17
0
    def get_metadata_space(cls, images, unlabelled_db=False):
        meta = utils.get_metadata(unlabelled_db=unlabelled_db)
        # Mapping between image file path name and the metadata
        meta = {m['path']: m for m in meta}
        space = np.array([[
            meta[i]['age'], cls.mapping[meta[i]['gender']],
            cls.mapping[meta[i]['skinColor']],
            cls.mapping[meta[i]["accessories"]], meta[i]["nailPolish"],
            meta[i]["irregularities"]
        ] for i in images])

        return meta, space
def get_articles(input):
    articles = utils.get_metadata(
        input, utils.image_ext, lambda:
        [Article(id=-1, chain_id='', filename='', title='', images=[])],
        lambda x: x[0].images,
        lambda id, chain_id, filename: ArticleImage(id=id,
                                                    chain_id=str(uuid.uuid4()),
                                                    filename=filename,
                                                    regions=[],
                                                    title='',
                                                    page=0,
                                                    idx_on_page=0))
    #ItJim: ^this part didn't work because was lacking parameters.
    return articles
示例#19
0
def get_full_matrix(feature, unlabelled=False, master=False):
    # Get labelled images
    images, data = get_all_vectors(feature,
                                   unlabelled_db=unlabelled,
                                   master_db=master)

    # Get metadata
    meta = get_metadata(unlabelled_db=unlabelled, master_db=master)
    meta = {m['path']: m for m in meta}
    meta_space = np.array([[
        meta[i]['age'], mapping[meta[i]['gender']],
        mapping[meta[i]['skinColor']], mapping[meta[i]["accessories"]],
        meta[i]["nailPolish"], meta[i]["irregularities"]
    ] for i in images])

    return images, meta, np.c_[data, meta_space]
示例#20
0
def function_create():
    with utils.AtomicRequest() as atomic:

        function_id = uuid.uuid4().hex

        atomic.driver_endpoint = driver_endpoint

        user, tenant = utils.get_headers(request)

        zip_file = utils.get_zip(request)
        zip_url = utils.upload_zip(function_id, zip_file)

        if not zip_url:
            atomic.errors = True
            return critical_error('Not able to store zip.')

        atomic.zip_url = zip_url

        metadata = utils.get_metadata(request)

        if not utils.validate_json(utils.build_schema, metadata):
            atomic.errors = True
            return bad_request("Error validating json.")

        tag = "{0}_{1}_{2}".format(tenant, user, metadata.get('name'))
        payload = {
            "memory": metadata.get('memory'),
            "tags": [tag],
            "runtime": metadata.get('runtime'),
            "zip_location": zip_url,
            "name": metadata.get('name')
        }

        image_id = utils.create_image(driver_endpoint, payload)
        atomic.image_id = image_id

        function = utils.create_function(tenant, user, function_id, image_id,
                                         zip_url, tag, metadata)

        if not function:
            atomic.errors = True
            return critical_error('Error building the function.')

        return Response(function_id, status=201)
示例#21
0
    def post(self):
        drive_url = json.loads(self.request.body)['driveurl']
        logging.info("Received the drive url: %s", drive_url)
        drive_id = self.parse_url(drive_url)

        presentation = \
            Presentation.query(Presentation.drive_id == drive_id).get()
        if presentation is None:
            presentation = Presentation(drive_id=drive_id)

        slides = get_metadata(drive_id)
        slides_str = json.dumps(slides)
        logging.info(slides_str)
        presentation.slides = slides_str
        presentation_id = presentation.put().id()

        self.response.write(json.dumps({
            'presentation_id': str(presentation_id),
        }));
示例#22
0
def play(url, nid):
	utils.log('play: ' + urllib.quote(url))

	if nid == 'live':
		meta = utils.get_metadata(nid)

		# this is usually the live stream isn't currently active
		if 'error_msg' in meta:
			utils.log('cannot play stream: %s, %s' % (url, meta['error_msg']))
			utils.dialog_error(meta['error_msg'])
			return

	"""
	# XXX disabled as not currently working?

	# permission dance. if we're already logged in (have a valid cookie), no need to log in again
	perms = utils.get_perms(nid)

	if not perms:
		# login and recheck video permissions
		if not utils.wsbk_login():
			return

		perms = utils.get_perms(nid)
		if not perms:
			# we really mustn't have permission
			utils.log('no permission for video %s' % nid)
			utils.dialog_error('No permission to access this video. Check login details in plugin settings.')
			return
	"""

	(stream_url, meta) = utils.get_stream_url(nid)
	listitem = xbmcgui.ListItem(label=meta['title'], iconImage=meta['thumbnail_url'], thumbnailImage=meta['thumbnail_url'])

	utils.log("Playing stream: %s" % stream_url)

	try:
		xbmc.Player().play(stream_url, listitem)
	except:
		utils.dialog_error("Cannot play video")
示例#23
0
def plot_y():
    fig, axes = plt.subplots(3, 4, sharex=True)
    fig_2, axes_2 = plt.subplots(3, figsize=(9, 10))

    for i, num_train_episodes in enumerate([500, 1000, 3000]):
        _, y = utils.get_metadata(num_train_episodes=num_train_episodes,
                                  artificial=False)

        for j in np.arange(y.shape[1]):
            ax = axes[i][j]
            ax.set_title(num_train_episodes)
            y_cur = y.iloc[:, j]
            sns.histplot(y_cur, ax=ax, stat="density", bins=8, palette="deep")
            ax.set_ylabel("Densidade")

        aux = y.values.T.flatten()
        aux = pd.DataFrame.from_dict({
            "Algoritmo":
            np.repeat(y.columns, y.shape[0]),
            "Converged":
            aux > 0.0
        })

        sns.countplot(x="Algoritmo",
                      hue="Converged",
                      data=aux,
                      ax=axes_2[i],
                      palette="deep")

        if i != 2:
            axes_2[i].set_xlabel(None)

        ax = axes_2[i]
        ax.set_title(f"Episodios = {num_train_episodes}")
        ax.set_ylabel("Frequencia")
        legend_labels, _ = ax.get_legend_handles_labels()
        ax.legend(legend_labels, ["Não", "Sim"], title="Convergiu?")

    plt.tight_layout()
    plt.show()
示例#24
0
def decision_tree_driver(args, evaluate=False):
    images, data_matrix = utils.get_all_vectors(args.decision_model)
    # Fetch unlabelled data (as provided in the settings)
    u_images, u_meta, unlabelled = helper.get_unlabelled_data(
        args.decision_model)

    #matrix, _, _, um = reducer(data_matrix, 30, "nmf", query_vector=unlabelled)
    matrix = data_matrix
    um = unlabelled

    l_matrix = matrix[:len(images)]
    u_matrix = um[:len(u_images)]

    dm = helper.build_labelled_matrix(l_matrix, images, 'aspectOfHand')

    # prepare test data
    query = helper.prepare_matrix_for_evaluation(u_matrix)

    max_depth = args.decision_max_depth
    min_size = args.decision_min_size

    prediction = decision_tree(dm, query, max_depth, min_size)

    dorsal_symbol = 0.0
    palmar_symbol = 1.0

    if evaluate:
        master_meta = utils.get_metadata(master_db=True)
        # Mapping between image file path name and the metadata
        master_meta = {m['imageName']: m for m in master_meta}
        truth = [
            dorsal_symbol
            if master_meta[Path(image).name]['aspectOfHand'].split(' ')[0]
            == 'dorsal' else palmar_symbol for image in u_images
        ]

        print(helper.get_accuracy(truth, prediction))

    return zip(u_images, prediction)
示例#25
0
def train_mnist(project_id, epoch, train_per_epoch, interval):
    check_gpu(logger)
    project_metadata = get_metadata(project_id)
    train(
        dataset=load_mnist_dataset(project_id=project_id,
                                   buffer_size=60000,
                                   batch_size=256),
        gen=build_generator_model(),
        dis=build_discriminator_model(),
        gen_opt=keras.optimizers.Adam(1e-4),
        dis_opt=keras.optimizers.Adam(1e-4),
        logger=logger,
        epochs=epoch,
        start_epoch=0,
        interval=interval,
        train_per_epoch=train_per_epoch,
        sample_size=4,
        batch_size=32,
        visualize=visualize_mnist_sample,
        project_metadata=project_metadata,
        gen_input_generator=MnistInputGenerator(feat_dim=100),
    )
示例#26
0
def ppr_driver(args, evaluate=False):
    l_images, u_images, l_meta, u_meta, l_matrix, u_matrix = PreparePPRData.prepare_data(
        args.model, args.k_latent_semantics, args.frt, args.ignore_metadata)

    # Build training data
    labelled = helper.build_matrix_with_labels(l_matrix, l_images, l_meta)

    # prepare test data
    query = helper.prepare_matrix_for_evaluation(u_matrix)

    # Evaluate
    predictions = ppr_classifier(labelled,
                                 query,
                                 frt=args.frt,
                                 k=args.k_latent_semantics,
                                 feature=args.model,
                                 edges=args.graph_edges,
                                 alpha=args.alpha,
                                 convergence=args.convergence)

    dorsal_symbol = 0.0
    palmar_symbol = 1.0

    if evaluate:
        master_meta = utils.get_metadata(master_db=True)
        # Mapping between image file path name and the metadata
        master_meta = {m['imageName']: m for m in master_meta}
        truth = [
            dorsal_symbol
            if master_meta[Path(image).name]['aspectOfHand'].split(' ')[0]
            == 'dorsal' else palmar_symbol for image in u_images
        ]

        print(helper.get_accuracy(truth, predictions))

    # Visualization pending
    return zip(u_images, predictions)
示例#27
0
        "HC03_VC13"
    ]
    factors = list(get_factors(sources, n_factors).values())
    means = []
    diffs = []
    for i, factor in enumerate(factors):
        values = np.array(list(factor.values()))
        # values = (values - np.min(values)) / (np.max(values) - np.min(values))
        means.append(np.mean(values))
        diffs.append(np.mean(values, 1))

    # diffs = (diffs - np.min(diffs)) / (np.max(diffs) - np.min(diffs))
    fig = plt.figure()
    plt.plot(sources, 10 * lambdas_diff, label="λ")
    for n_factor, diff in zip(n_factors, diffs):
        plt.plot(sources, diff, label=get_metadata(2010, n_factor, False))
    plt.legend(loc="upper left")
    plt.show()

    X = [np.array(list(xs)) for xs in zip(*diffs)]
    # X = np.array([np.array(diffs[0]) * np.array(diffs[1])]).reshape(-1, 1)
    y = lambdas_diff
    # plt.plot(sources, 3*y)
    # plt.plot(sources, X)
    # plt.show()

    reg = LinearRegression()
    reg.fit(X, y)
    print(reg.score(X, y))
    print(reg.coef_)
    print(reg.intercept_)
示例#28
0
def new():
    if request.method == 'POST':
        original_url = str(request.form.get('url'))
        pixel_script = str(request.form.get('pixel_script'))
        keyword = str(request.form.get('keyword'))

        try:
            metadata = utils.get_metadata(original_url)

            template_name = "redirection_debug.html"
            if DEBUG == True:
                template_name = "redirection_debug.html"
            else:
                template_name = "redirection.html"

            if "title" in metadata:
                metadata_title = metadata.title
            else:
                metadata_title = ""

            if "type" in metadata:
                metadata_type = metadata.type
            else:
                metadata_type = ""

            if "image" in metadata:
                metadata_image = metadata.image
            else:
                metadata_image = ""

            if "description" in metadata:
                metadata_description = metadata.description
            else:
                metadata_description = ""
        finally:
            metadata_title = ""
            metadata_type = ""
            metadata_image = ""
            metadata_description = ""

        html_file = render_template(template_name, url=original_url, title=metadata_title, type=metadata_type, image=metadata_image, description=metadata_description, pixel_script=pixel_script)

        filename = shortuuid.ShortUUID().random(length=6)
        filename = filename + ".html"

        directory = "r/" + keyword
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(directory + "/" + filename, mode="w", encoding="utf-8") as file:
            file.write(str(html_file))
            file.close()

        # write to csv
        fp = open("static/" + "data.csv", "a")
        try:
            writer = csv.writer(fp)
            writer.writerow((str(original_url), str(filename)))
        finally:
            fp.close()


        # return redirect(SHORT_SITE + "/static/" + filename )
        return render_template("new.html", redirect_url=SHORT_SITE + directory + "/" + filename)

    return render_template("new.html")
示例#29
0
        if prediction in fname:
            passed += 1
        else:
            failed += 1
    print('*' * 50)
    print(' > passed: ', passed)
    print(' > failed: ', failed)
    ar = passed / (passed + failed)
    print(' > accuracy ratio: ', '%.2f' % (ar * 100), '%')


def get_files(dname, fpath):
    dpath = os.path.join(fpath, dname)
    return [f'{dpath}/{fname}' for fname in os.listdir(dpath)]


if __name__ == '__main__':
    model_path = sys.argv[1]
    fpath = sys.argv[2]

    # Training dataset metadata
    _, class_names, class_to_idx = get_metadata(fpath)
    num_classes = len(class_names)
    idx_to_class = {value: key for key, value in class_to_idx.items()}

    flist = [get_files(cls, fpath) for cls in class_names]
    files = list(reduce(lambda x, y: x + y, flist))

    result = predict_all(files, idx_to_class, model_path)
    analyze(result)
    # y = [ class binary vars | subclass binary vars | pct] 
    for item in metadata:
        y[c, class_map[item['class']]] = 1
        y[c, subclass_map[item['subclass']] + len(total_classes) ] = 1
        #TODO fix the filenames or write a script to handle vol frac
#        print item['volume_frac']
#        y[c, len(total_classes) + len(total_subclasses)] = item['volume_frac']
        c += 1 
    return y

if __name__ == '__main__':
    num_grain_comp = 15
    num_chord_comp = 3
    num_folds = 5

    metadata, class_map, subclass_map = get_metadata(paths.stats_files())
    if os.path.isfile(paths.stats_pca_path()+'grain_grain_pca_scores.npy'):
        print 'PCA .npy found, loading.'
        pca_scores = np.load(paths.stats_pca_path()+'grain_grain_pca_scores.npy')
    else:
        x = load_data(paths.stats_files())
        pca_scores = get_pca(x)
        np.save(paths.stats_pca_path()+'grain_grain_pca_scores.npy', pca_scores)
    chords = load_chords(paths.cord_length_path())
    chords_pca = get_chords_pca(chords, use_avg=True) 
    input_params = flatten_input_params(metadata, class_map, subclass_map)    
      

    # PLOTTING FCNS
#    plot_chords(chords[0:5,0])
#    class_labels, class_data = group_components_by_class(metadata, chords_pca)
logging.basicConfig(level=logging.DEBUG)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("input", nargs=1, help="The training hdf5 file")
    parser.add_argument("-pre", dest="pre", help="Pretrained word embeddings file in word2vec format (word <space> embedding, one per line)")
    options = parser.parse_args()

    # Load config parameters
    locals().update(config)
    logging.debug('loaded config')

    # DATA
    hdf5_file = options.input[0]
    word_to_ix, ix_to_word, morpho_to_ix, ix_to_morpho = get_metadata(hdf5_file)
    vocab_size = len(word_to_ix)
    morpho_vocab_size = len(morpho_to_ix)
    train_stream = get_stream(hdf5_file, 'train', batch_size)
    dev_stream = get_stream(hdf5_file, 'dev', batch_size)
    logging.debug('loaded data')
    print "Number of words:", vocab_size
    print "Number of morphemes:", morpho_vocab_size
    # Save the word and morpheme indices to disk
    D = { }
    D["word_to_ix"] = word_to_ix
    D["morpho_to_ix"] = morpho_to_ix
    cPickle.dump(D, open("dicts.pkl", "w"))
    logging.debug('wrote dicts')
    # Load the pretrained vectors if available
    if options.pre is not None:
示例#32
0
import numpy as np
import pandas as pd
from utils import get_dataframe, NumpyEncoder, get_metadata
import json

train_dir = "assist09_train.csv"
test_dir = "assist09_test.csv"
skill_matrix_dir = "assist09_skill_matrix.txt"

df_train = get_dataframe(train_dir)
df_test = get_dataframe(test_dir)
# use this to extract the whole q-s graph
df_total = pd.concat([df_train, df_test], ignore_index=True)
skill_matrix = np.loadtxt(skill_matrix_dir)

single_skill_cnt, skill_cnt, max_idx = get_metadata(skill_matrix, df_total)

print("single skill: 0 ~ {}, multi-skill: {} ~ {}, question: {} ~ {}, correctness: {} and {}"\
      .format(single_skill_cnt - 1, single_skill_cnt, skill_cnt - 1, skill_cnt, max_idx - 2, max_idx - 1, max_idx))

# graph -> list of dict
# node: {"type": "skill" or "question", "neighbor": [indices]}
qs_graph = []
# ?: is it feasible to get rid of multi-skills?
# init graph
node_cnt = single_skill_cnt + max_idx - 2 - skill_cnt + 1
for i in range(node_cnt):
    if i >= 0 and i < single_skill_cnt:
        qs_graph.append({"type": "skill", "neighbor": []})
    else:
        qs_graph.append({"type": "question", "neighbor": []})
        AdaBoostClassifier(),
        GaussianNB(),
        LinearDiscriminantAnalysis(),
        QuadraticDiscriminantAnalysis() ]

    for name, clf in zip(names, classifiers):
        scores = cross_val_score(clf, x, y, cv=num_folds)
        print name + ': ' + str(np.mean(scores))


if __name__ == '__main__':
    num_grain_comp = 5
    num_chord_comp = 5
    num_folds = 5

    metadata, class_map, subclass_map = get_metadata(stats_files())
    if os.path.isfile(stats_pca_path()+'grain_grain_pca_scores.npy'):
        print 'PCA .npy found, loading.'
        grain_pca = np.load(stats_pca_path()+'grain_grain_pca_scores.npy')
    else:
        x = load_data(stats_files())
        grain_pca = get_pca(x)
        np.save(stats_pca_path()+'grain_grain_pca_scores.npy', grain_pca)

    chords = load_chords(cord_length_path())
    chords_pca = get_chords_pca(chords, use_avg=True) 
    
    # show the pca plots
    #plt.scatter(grain_pca[:, 0], grain_pca[:,1], alpha=0.85)
    #plt.show()
示例#34
0
def main(settingsfname, verbose=False):

    settings = utils.get_settings(settingsfname)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [
        feature for feature in settings['FEATURES']
        if feature in list(data.keys())
    ]

    settings['FEATURES'] = features_that_parsed

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    # get model
    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe),
                        flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    accuracy_scores = {}

    for subject in subjects:
        utils.print_verbose("=====Training {0} Model=====".format(
            str(subject)),
                            flag=verbose)

        # initialise the data assembler
        assembler = utils.DataAssembler(settings, data, metadata)
        X, y = assembler.test_train_discrimination(subject)

        # get the CV iterator
        cv = utils.sklearn.cross_validation.StratifiedShuffleSplit(
            y, random_state=settings['R_SEED'], n_iter=settings['CVITERCOUNT'])

        # initialise lists for cross-val results
        predictions = []
        labels = []
        allweights = []

        # run cross validation and report results
        for train, test in cv:

            # calculate the weights
            weights = utils.get_weights(y[train])
            # fit the model to the training data
            model_pipe.fit(X[train], y[train], clf__sample_weight=weights)
            # append new predictions
            predictions.append(model_pipe.predict(X[test]))
            # append test weights to store (why?) (used to calculate auc below)
            weights = utils.get_weights(y[test])
            allweights.append(weights)
            # store true labels
            labels.append(y[test])

        # stack up the results
        predictions = utils.np.hstack(predictions)
        labels = utils.np.hstack(labels)
        weights = utils.np.hstack(allweights)

        # calculate the total accuracy
        accuracy = utils.sklearn.metrics.accuracy_score(labels,
                                                        predictions,
                                                        sample_weight=weights)

        print("Accuracy score for {1}: {0:.3f}".format(accuracy, subject))

        # add AUC scores to a subj dict
        accuracy_scores.update({subject: accuracy})

        # store results from each subject
        subject_predictions[subject] = (predictions, labels, weights)

    # stack subject results (don't worrry about this line)
    predictions, labels, weights = map(
        utils.np.hstack, zip(*list(subject_predictions.values())))

    # calculate global accuracy
    accuracy = utils.sklearn.metrics.accuracy_score(labels,
                                                    predictions,
                                                    sample_weight=weights)

    print(
        "predicted accuracy score over all subjects: {0:.2f}".format(accuracy))

    # output AUC scores to file
    accuracy_scores.update({'all': accuracy})

    settings['DISCRIMINATE'] = 'accuracy_scores.csv'
    # settings['AUC_SCORE_PATH'] = 'discriminate_scores'
    utils.output_auc_scores(accuracy_scores, settings)

    return accuracy_scores
示例#35
0
def main(settingsfname, verbose=False):

    settings = utils.get_settings(settingsfname)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [feature for feature in
                            settings['FEATURES'] if feature in list(data.keys())]

    settings['FEATURES'] = features_that_parsed

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    # get model
    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe), flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    accuracy_scores = {}

    for subject in subjects:
        utils.print_verbose(
            "=====Training {0} Model=====".format(str(subject)),
                            flag=verbose)

        # initialise the data assembler
        assembler = utils.DataAssembler(settings, data, metadata)
        X, y = assembler.test_train_discrimination(subject)

        # get the CV iterator
        cv = utils.sklearn.cross_validation.StratifiedShuffleSplit(
            y,
                               random_state=settings['R_SEED'],
                               n_iter=settings['CVITERCOUNT'])

        # initialise lists for cross-val results
        predictions = []
        labels = []
        allweights = []

        # run cross validation and report results
        for train, test in cv:

            # calculate the weights
            weights = utils.get_weights(y[train])
            # fit the model to the training data
            model_pipe.fit(X[train], y[train], clf__sample_weight=weights)
            # append new predictions
            predictions.append(model_pipe.predict(X[test]))
            # append test weights to store (why?) (used to calculate auc below)
            weights = utils.get_weights(y[test])
            allweights.append(weights)
            # store true labels
            labels.append(y[test])

        # stack up the results
        predictions = utils.np.hstack(predictions)
        labels = utils.np.hstack(labels)
        weights = utils.np.hstack(allweights)

        # calculate the total accuracy
        accuracy = utils.sklearn.metrics.accuracy_score(labels,
                                                        predictions,
                                                        sample_weight=weights)

        print("Accuracy score for {1}: {0:.3f}".format(accuracy, subject))

        # add AUC scores to a subj dict
        accuracy_scores.update({subject: accuracy})

        # store results from each subject
        subject_predictions[subject] = (predictions, labels, weights)

    # stack subject results (don't worrry about this line)
    predictions, labels, weights = map(utils.np.hstack,
                                       zip(*list(subject_predictions.values())))

    # calculate global accuracy
    accuracy = utils.sklearn.metrics.accuracy_score(labels, predictions,
                                                    sample_weight=weights)

    print(
        "predicted accuracy score over all subjects: {0:.2f}".format(accuracy))

    # output AUC scores to file
    accuracy_scores.update({'all': accuracy})

    settings['DISCRIMINATE'] = 'accuracy_scores.csv'
    # settings['AUC_SCORE_PATH'] = 'discriminate_scores'
    utils.output_auc_scores(accuracy_scores, settings)

    return accuracy_scores
示例#36
0
    def convert_to_zscores(self, map_metadata_file=None, overwrite=None):
        """
        convert rectified images to z scores
        - unthresholded images could be either t or z images
        - if they are already z then just copy
        - use metadata supplied by teams to determine image type
        """
        log_to_file(self.dirs.logfile,
                    '\n\n%s' % sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        if map_metadata_file is None:
            map_metadata_file = os.path.join(
                self.dirs.dirs['orig'], 'narps_neurovault_images_details.csv')
        unthresh_stat_type = get_map_metadata(map_metadata_file)
        metadata = get_metadata(self.metadata_file)

        n_participants = metadata[['n_participants', 'NV_collection_string']]

        n_participants.index = metadata.teamID

        unthresh_stat_type = unthresh_stat_type.merge(n_participants,
                                                      left_index=True,
                                                      right_index=True)

        for teamID in self.complete_image_sets:
            if teamID not in unthresh_stat_type.index:
                print('no map metadata for', teamID)
                continue
            # this is a bit of a kludge
            # since some contrasts include all subjects
            # but others only include some
            # we don't have the number of participants in each
            # group so we just use the entire number
            n = unthresh_stat_type.loc[teamID, 'n_participants']

            for hyp in range(1, 10):
                infile = self.teams[teamID].images['unthresh']['rectified'][
                    hyp]
                if not os.path.exists(infile):
                    print('skipping', infile)
                    continue
                self.teams[teamID].images['unthresh']['zstat'][
                    hyp] = os.path.join(self.dirs.dirs['zstat'],
                                        self.teams[teamID].datadir_label,
                                        'hypo%d_unthresh.nii.gz' % hyp)
                if not overwrite and os.path.exists(
                        self.teams[teamID].images['unthresh']['zstat'][hyp]):
                    continue

                if unthresh_stat_type.loc[teamID,
                                          'unthresh_type'].lower() == 't':
                    if not os.path.exists(
                            os.path.dirname(self.teams[teamID].
                                            images['unthresh']['zstat'][hyp])):
                        os.mkdir(
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                    print("converting %s (hyp %d) to z - %d participants" %
                          (teamID, hyp, n))
                    TtoZ(infile,
                         self.teams[teamID].images['unthresh']['zstat'][hyp],
                         n - 1)
                elif unthresh_stat_type.loc[teamID, 'unthresh_type'] == 'z':
                    if not os.path.exists(
                            os.path.dirname(self.teams[teamID].
                                            images['unthresh']['zstat'][hyp])):
                        os.mkdir(
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                    if not os.path.exists(self.teams[teamID].images['unthresh']
                                          ['zstat'][hyp]):
                        print('copying', teamID)
                        shutil.copy(
                            infile,
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                else:
                    # if it's not T or Z then we skip it as it's not usable
                    print('skipping %s - other data type' % teamID)
示例#37
0
from blocks.graph import ComputationGraph, apply_dropout
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing, saveload
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from utils import get_metadata, get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)

# DATA
ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
train_stream = get_stream(hdf5_file, 'train', batch_size)
dev_stream = get_stream(hdf5_file, 'dev', batch_size)


# MODEL
x = tensor.matrix('features', dtype='uint8')
y = tensor.matrix('targets', dtype='uint8')
y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)

# COST
cg = ComputationGraph(cost)

if dropout > 0:
    # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015)
    inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
示例#38
0
def record(record_hash):
    record = records.get_record_by_hash(record_hash)
    metadata = utils.get_metadata(record)
    context = utils.format_response(record, metadata)

    return flask.render_template('index.html', **context)
def main():
    metadata = utils.get_metadata()
    settings = utils.get_settings('probablygood.gavin.json')
    settings['R_SEED'] = None
    # settings['SUBJECTS'] = ['Patient_2']
    scaler = sklearn.preprocessing.StandardScaler()
    thresh = sklearn.feature_selection.VarianceThreshold()
    # selector = sklearn.feature_selection.SelectKBest()
    classifier = sklearn.svm.SVC(probability=True)
    pipe = sklearn.pipeline.Pipeline([('scl', scaler),
                                      ('thr', thresh),
                                      #                                  ('sel', selector),
                                      ('cls', classifier)])

    output = {}

    data = utils.get_data(settings)
    da = utils.DataAssembler(settings, data, metadata)
    global_results = {}
    for subject in list(settings['SUBJECTS']) + ['global']:
        global_results[subject] = {}

    for i in range(10):
        print("iteration {0}".format(i))

        for subject in settings['SUBJECTS']:
            print(subject)
            X, y = da.build_training(subject)
            # cv = utils.Sequence_CV(da.training_segments, metadata)
            train, test, train_results, test_results = fit_and_return_parts_and_results(
                da,
                                                                    metadata,
                                                                    pipe,
                                                                    X,
                                                                    y)
            output.update({subject: {'train': train,
                                     'test': test,
                                     'train_results': train_results,
                                     'test_results': test_results}})

    #    with open('raw_cv_data.pickle', 'wb') as fh:
    #        pickle.dump(output, fh)

        summary_stats = mean_var_calc(output)

        for subject in settings['SUBJECTS']:
            for t in summary_stats[subject]:
                try:
                    global_results[subject][t] += [summary_stats[subject][t]]
                except KeyError:
                    global_results[subject][t] = [summary_stats[subject][t]]
    print(global_results)
    for subject in settings['SUBJECTS']:
        for t in global_results[subject]:
            meanscore = np.mean(global_results[subject][t])
            varscore = np.var(global_results[subject][t])
            print("For {0} mean {1} was "
                  "{2} with sigma {3}".format(subject, t, meanscore, varscore))

    with open('summary_stats.pickle', 'wb') as fh:
        pickle.dump(global_results, fh)
示例#40
0
import sys

import torch
from PIL import Image

from utils import get_device
from utils import get_metadata
from utils import get_net
from utils import get_prediction_class
from utils import preprocess_image

if __name__ == '__main__':
    device = get_device()

    # Training dataset metadata
    _, class_names, class_to_idx = get_metadata(sys.argv[1])
    num_classes = len(class_names)
    idx_to_class = {value: key for key, value in class_to_idx.items()}

    # Data preparation
    image = Image.open(sys.argv[2])

    # Net initialization
    net = get_net(classes=num_classes)
    checkpoint_dict = torch.load(os.path.join('checkpoint', 'checkpoint.pth'),
                                 map_location=device)
    net.load_state_dict(checkpoint_dict['model_state_dict'])
    net.eval()
    net.to(device)

    # Prediction
    "alpha": 0.5,
    "lambda": 400,
    "subsample": 0.7,
    "colsample_bytree": 0.3,
    "objective": "binary:logistic",
    "scale_pos_weight": 0.9,
    "seed": 16,
    "gpu_id": 0,
    "tree_method": "gpu_hist",
}

for artificial in [False, True]:
    fig, axes = plt.subplots(1,
                             4,
                             figsize=(20, 15 / (1 + 5 * int(artificial))),
                             sharex=True)

    feat_imp = dict()
    X, y = utils.get_metadata(500, artificial=artificial)

    y = y > 0

    for i in np.arange(4):
        model = xgboost.XGBClassifier(**params).fit(X, y.iloc[:, i])
        imp = pd.Series(
            model.get_booster().get_fscore()).sort_values(ascending=True)
        imp.plot(kind="barh", ax=axes[i])

    fig.tight_layout()
    plt.show()
示例#42
0
def rating_review():
    """
    Predict personalized review-usefulness

    Outputs:
    -------
    : predicted ratings, inference time, top reviews with ratings, other reviews with ratings, item metadata
    """
    if request.method == 'POST':
        ids = request.json
        user_id = int(ids['uid'])
        item_id = int(ids['iid'])

        # Feed the inputs to the Tensorflow Serving model
        res, time_dif = tf_serving([u_text[user_id].tolist()],
                                   [i_text[item_id].tolist()],
                                   np.array([[user_id]]),
                                   np.array([[item_id]]))

        # Get the rating and ordered reviews based on their review-usefulness
        rating = np.array(res['final_rating/add_1:0']).reshape(-1)
        item_rev_weights = np.array(
            res['item_rev_weights/transpose_1:0']).reshape(-1)

        order = np.argsort(item_rev_weights)[::-1]
        rev_texts = item_rev_original[item_id][:review_num_i]
        if len(rev_texts) < review_num_i:
            rev_texts = rev_texts + [''] * (review_num_i - len(rev_texts))
        rev_texts = np.array(rev_texts)[order]

        # Top-3 reviews and other reviews
        toprevs = []
        otherrevs = []

        for i, rev_text in enumerate(rev_texts):
            if rev_text:
                if i < 3 or len(toprevs) < 3:
                    toprevs.append(rev_text)
                else:
                    otherrevs.append(rev_text)

        rev_rate_top = [int(float(df_revrate[toprev])) for toprev in toprevs]
        rev_rate_other = [
            int(float(df_revrate[otherrev])) for otherrev in otherrevs
        ]

        # Prepare the metadata for the item
        des_meta, title_meta, price_meta, imurl_meta, categ_meta = get_metadata(
            df_meta, item_id, single_pred=True)

        return json.dumps({
            'rating': rating.tolist(),
            'infertime': time_dif.total_seconds(),
            'toprevs': toprevs,
            'otherrevs': otherrevs,
            'rev_rate_top': rev_rate_top,
            'rev_rate_other': rev_rate_other,
            'des_meta': des_meta,
            'title_meta': title_meta,
            'price_meta': price_meta,
            'imurl_meta': imurl_meta,
            'categ_meta': categ_meta
        })

    else:
        return render_template('candidate.html')
示例#43
0
def info_metadata():
    res = get_metadata()
    res['volume'] = lib.SpPlaybackGetVolume()
    return jsonify(res)
示例#44
0
def info_metadata():
    res = get_metadata()
    res['volume'] = lib.SpPlaybackGetVolume()
    return jsonify(res)
示例#45
0
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from blocks.extensions import saveload
from utils import get_metadata, get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)

# DATA
ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
train_stream = get_stream(hdf5_file, 'train', batch_size)
dev_stream = get_stream(hdf5_file, 'dev', batch_size)


# MODEL
x = tensor.matrix('features', dtype='uint8')
y = tensor.matrix('targets', dtype='uint8')
y_hat, cost = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)

# COST
cg = ComputationGraph(cost)

if dropout > 0:
    # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015)
    inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
示例#46
0
def api_record(record_hash):
    metadata = utils.get_metadata(records.get_record_by_hash(record_hash))

    return flask.jsonify(**metadata)