Exemplo n.º 1
0
def mk_figures(narps, logfile, thresh=0.95):

    func_name = sys._getframe().f_code.co_name
    log_to_file(logfile, '%s' % func_name)

    fig, ax = plt.subplots(7, 1, figsize=(12, 24))
    cut_coords = [-24, -10, 4, 18, 32, 52, 64]

    for i, hyp in enumerate(hypnums):
        pmap = os.path.join(narps.dirs.dirs['consensus'],
                            'hypo%d_1-fdr.nii.gz' % hyp)
        tmap = os.path.join(narps.dirs.dirs['consensus'],
                            'hypo%d_t.nii.gz' % hyp)
        pimg = nibabel.load(pmap)
        timg = nibabel.load(tmap)
        pdata = pimg.get_fdata()
        tdata = timg.get_fdata()[:, :, :, 0]
        threshdata = (pdata > thresh) * tdata
        threshimg = nibabel.Nifti1Image(threshdata, affine=timg.affine)
        nilearn.plotting.plot_stat_map(threshimg,
                                       threshold=0.1,
                                       display_mode="z",
                                       colorbar=True,
                                       title='hyp %d:' % hyp + hypotheses[hyp],
                                       vmax=8,
                                       cmap='jet',
                                       cut_coords=cut_coords,
                                       axes=ax[i])

    plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'consensus_map.pdf'))
    plt.close(fig)
Exemplo n.º 2
0
    def check_image_values(self, overwrite=None):
        """
        get # of nonzero and NA voxels for each image
        """
        log_to_file(self.dirs.logfile,
                    '\n\n%s' % sys._getframe().f_code.co_name)
        if overwrite is None:
            overwrite = self.overwrite
        image_metadata_file = os.path.join(self.dirs.dirs['metadata'],
                                           'image_metadata_df.csv')
        if os.path.exists(image_metadata_file) and not overwrite:
            print('using cached image metdata')
            image_metadata_df = pandas.read_csv(image_metadata_file)
            return (image_metadata_df)
        # otherwise load from scractch
        image_metadata = []
        masker = nilearn.input_data.NiftiMasker(mask_img=self.dirs.MNI_mask)
        for teamID in self.complete_image_sets:
            for hyp in self.teams[teamID].images['thresh']['resampled']:
                threshfile = self.teams[teamID].images['thresh']['resampled'][
                    hyp]
                threshdata = masker.fit_transform(threshfile)
                image_metadata.append([
                    teamID, hyp,
                    numpy.sum(numpy.isnan(threshdata)),
                    numpy.sum(threshdata == 0.0)
                ])

        image_metadata_df = pandas.DataFrame(
            image_metadata, columns=['teamID', 'hyp', 'n_na', 'n_nonzero'])

        image_metadata_df.to_csv(image_metadata_file)
        return (image_metadata_df)
Exemplo n.º 3
0
    def get_input_dirs(self, dirs, verbose=True, load_json=True):
        """
        get orig dirs
        - assumes that images.json is present for each valid dir
        """

        input_files = glob.glob(
            os.path.join(dirs.dirs['orig'], '*/hypo1_thresh.nii.gz'))
        input_dirs = [os.path.dirname(i) for i in input_files]

        log_to_file(self.dirs.logfile,
                    'found %d input directories' % len(input_dirs))
        for i in input_dirs:
            collection_id = os.path.basename(i)
            NV_collection_id, teamID = collection_id.split('_')
            if teamID not in self.teams:
                self.teams[teamID] = NarpsTeam(teamID,
                                               NV_collection_id,
                                               dirs,
                                               verbose=self.verbose)
                if os.path.exists(os.path.join(i, 'images.json')):
                    self.teams[teamID].jsonfile = os.path.join(
                        i, 'images.json')
                    with open(self.teams[teamID].jsonfile) as f:
                        self.teams[teamID].image_json = json.load(f)
Exemplo n.º 4
0
    def get_figi_history(
        self, figi: str, start: dt.datetime, end: dt.datetime, interval: str
    ):
        """
        Get history for a given figi identifier
        :param figi:
        :param start:
        :param end:
        :param interval:
        :return:
        """
        hist = None
        count = 0

        while not hist and count < SLEEP_TRIES:
            count += 1
            try:
                hist = self.market.get_candles(
                    figi=figi,
                    _from=start.isoformat(),
                    to=end.isoformat(),
                    interval=interval,
                )

            except Exception as e:
                log_to_file(e)
                log_to_file(f"Sleep {SLEEP_TIME} seconds")
                time.sleep(SLEEP_TIME)

        if self.verbose:
            print("Received market response:", hist.payload.candles)
        candles = hist.payload.candles
        candles_dicts = [candles[i].to_dict() for i in range(len(candles))]
        df = pd.DataFrame(candles_dicts)
        return df
Exemplo n.º 5
0
def order_view(request, pk):
    t = TranslationRequest.objects.get(pk=pk)
    check_stage(t.status, 'selected_quote')

    t.reference = "%s-%s" % (str(pk), str(uuid.uuid4()))
    t.order_name = "Aldryn Translator Order"

    # COPY OLD LANG PAGE TREE TO NEW ONE
    if t.copy_content:
        copy_pages(t.from_lang, t.to_lang, t.pages)

    data = prepare_data(t, t.from_lang, t.to_lang, plugin_source_lang=t.to_lang)
    data.update(prepare_order_data(request, t))
    order = json.loads(get_order(t.provider, data))

    if log_to_file_enabled():
        log_to_file(data)

    t.sent_content = json.dumps(data, ensure_ascii=False).encode('ascii', 'xmlcharrefreplace')
    t.status = 'requested'
    t.save()

    # TODO: save other stuff from response to model (deadline, price, more?)

    if t.provider == 'supertext':
        return render_to_response(
            'aldryn_translator/confirmation.html', {'r': order},
            context_instance=RequestContext(request))

    else:
        raise NotImplementedError()
Exemplo n.º 6
0
def run_all_analyses(narps, simulate_noise=False):
    logfile = os.path.join(narps.dirs.dirs['logs'], 'ThresholdSimulation.log')
    log_to_file(logfile, 'Running thresholding simulation', flush=True)

    # get team results to add to table

    all_metadata = get_all_metadata(narps)
    mean_decision = all_metadata.groupby('varnum').Decision.mean()

    all_results = []
    for hyp in range(1, 10):
        results, mean_fdr_thresh, meta_results, roisize = get_activations(
            narps, hyp, logfile, simulate_noise=simulate_noise)
        mean_results = (results > 0).mean(0)
        r = [
            hyp, roisize, mean_decision.loc[hyp], mean_results[0],
            mean_results[1], meta_results[0], meta_results[1]
        ]
        all_results.append(r)

    results_df = pandas.DataFrame(
        all_results,
        columns=[
            'Hypothesis', 'N voxels in ROI',
            'proportion of teams reporting act.',
            'proportion of teams w/  act. (%s)' % results.columns[0],
            'proportion of teams w/  act. (%s)' % results.columns[1],
            'CBMA (n voxels in ROI)', 'IBMA (n voxels in ROI)'
        ])
    results_df.to_csv(os.path.join(narps.dirs.dirs['ThresholdSimulation'],
                                   'simulation_results.csv'),
                      index=False)
    return (results_df)
Exemplo n.º 7
0
    def estimate_smoothness(self, overwrite=None, imgtype='zstat'):
        """
        estimate smoothness of Z maps using FSL's smoothness estimation
        """
        log_to_file(
            self.dirs.logfile,
            sys._getframe().f_code.co_name,
            headspace=2)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        output_file = os.path.join(self.dirs.dirs['metadata'],
                                   'smoothness_est.csv')
        if os.path.exists(output_file) and not overwrite:
            if self.verbose:
                print('using existing smoothness file')
            smoothness_df = pandas.read_csv(output_file)
            return(smoothness_df)

        # use nipype's interface to the FSL smoothest command
        est = SmoothEstimate()
        smoothness = []
        for teamID in self.complete_image_sets['unthresh']:
            for hyp in range(1, 10):
                if hyp not in self.teams[teamID].images['unthresh'][imgtype]:
                    # fill missing data with nan
                    print('no zstat present for', teamID, hyp)
                    smoothness.append([teamID, hyp, numpy.nan,
                                       numpy.nan, numpy.nan])
                    continue
                infile = self.teams[teamID].images['unthresh'][imgtype][hyp]
                if not os.path.exists(infile):
                    print('no image present:', infile)
                    continue
                else:
                    if self.verbose:
                        print('estimating smoothness for hyp', hyp)

                    est.inputs.zstat_file = infile
                    est.inputs.mask_file = self.dirs.MNI_mask
                    est.terminal_output = 'file_split'
                    smoothest_output = est.run()
                    smoothness.append([teamID, hyp,
                                       smoothest_output.outputs.dlh,
                                       smoothest_output.outputs.volume,
                                       smoothest_output.outputs.resels])
                    self.teams[teamID].logs['smoothest'] = (
                        smoothest_output.runtime.stdout,
                        smoothest_output.runtime.stderr)

        smoothness_df = pandas.DataFrame(
            smoothness,
            columns=['teamID', 'hyp', 'dhl', 'volume', 'resels'])
        smoothness_df.to_csv(output_file)
        return(smoothness_df)
Exemplo n.º 8
0
    def compute_image_stats(self, datatype='zstat', overwrite=None):
        """
        compute std and range on statistical images
        """
        log_to_file(
            self.dirs.logfile,
            sys._getframe().f_code.co_name,
            headspace=2)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite

        # set up directories
        unthresh_concat_dir = self.dirs.get_output_dir(
            'unthresh_concat_%s' % datatype)
        unthresh_range_dir = self.dirs.get_output_dir(
            'unthresh_range_%s' % datatype)
        unthresh_std_dir = self.dirs.get_output_dir(
            'unthresh_std_%s' % datatype)

        for hyp in range(1, 10):

            unthresh_file = os.path.join(
                unthresh_concat_dir,
                'hypo%d.nii.gz' % hyp)

            range_outfile = os.path.join(
                unthresh_range_dir,
                'hypo%d.nii.gz' % hyp)

            std_outfile = os.path.join(
                unthresh_std_dir,
                'hypo%d.nii.gz' % hyp)

            if not os.path.exists(range_outfile) \
                    or not os.path.exists(std_outfile) \
                    or overwrite:
                unthresh_img = nibabel.load(unthresh_file)
                unthresh_data = unthresh_img.get_data()
                concat_data = numpy.nan_to_num(unthresh_data)

                # compute range
                datarange = numpy.max(concat_data, axis=3) \
                    - numpy.min(concat_data, axis=3)
                range_img = nibabel.Nifti1Image(
                    datarange,
                    affine=unthresh_img.affine)
                range_img.to_filename(range_outfile)

                # compute standard deviation
                datastd = numpy.std(concat_data, axis=3)
                std_img = nibabel.Nifti1Image(
                    datastd,
                    affine=unthresh_img.affine)
                std_img.to_filename(std_outfile)
Exemplo n.º 9
0
    def __init__(self, basedir, data_url=None,
                 force_download=False):

        # set up a dictionary to contain all of the
        # directories
        self.dirs = {}

        # check to make sure home of basedir exists
        assert os.path.exists(os.path.dirname(basedir))
        self.dirs['base'] = basedir
        if not os.path.exists(basedir):
            os.mkdir(basedir)
        self.force_download = force_download
        if data_url is None:
            self.data_url = DATA_URL

        dirs_to_add = ['output', 'metadata',
                       'cached', 'figures', 'logs', 'orig']
        for d in dirs_to_add:
            self.dirs[d] = os.path.join(self.dirs['base'], d)

        self.dirs['templates'] = os.path.join(
            os.environ['FSLDIR'],
            'data/standard')

        # autogenerate all of the directories
        # except for the orig dir
        for d in dirs_to_add:
            if d != 'orig' and not os.path.exists(self.dirs[d]):
                os.mkdir(self.dirs[d])

        self.logfile = os.path.join(self.dirs['logs'], 'narps.txt')
        log_to_file(self.logfile, 'Running Narps main class', flush=True)

        output_dirs = ['resampled', 'rectified', 'zstat',
                       'thresh_mask_orig']

        for o in output_dirs:
            self.get_output_dir(o)

        # if raw data don't exist, download them
        if self.force_download and os.path.exists(self.dirs['orig']):
            shutil.rmtree(self.dirs['orig'])
        if not os.path.exists(self.dirs['orig']):
            self.get_orig_data()
        assert os.path.exists(self.dirs['orig'])

        # make sure the necessary templates are present
        # these should be downloaded with the raw data
        self.MNI_mask = os.path.join(self.dirs['templates'],
                                     'MNI152_T1_2mm_brain_mask.nii.gz')
        assert os.path.exists(self.MNI_mask)

        self.MNI_template = os.path.join(self.dirs['templates'],
                                         'MNI152_T1_2mm.nii.gz')
        assert os.path.exists(self.MNI_template)

        self.full_mask_img = os.path.join(self.dirs['templates'],
                                          'MNI152_all_voxels.nii.gz')
Exemplo n.º 10
0
 def get_binarized_thresh_masks(self):
     """
     create binarized thresholded maps for each team
     """
     log_to_file(self.dirs.logfile,
                 '\n\n%s' % sys._getframe().f_code.co_name)
     for teamID in self.complete_image_sets:
         self.teams[teamID].create_binarized_thresh_masks()
Exemplo n.º 11
0
    def compute_image_stats(self, datatype='zstat', overwrite=None):
        """
        compute std and range on statistical images
        """
        log_to_file(
            self.dirs.logfile, '\n\n%s' %
            sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        for hyp in range(1, 10):

            unthresh_file = os.path.join(
                self.dirs.dirs['output'],
                'unthresh_concat_%s/hypo%d.nii.gz' % (datatype, hyp))

            range_outfile = os.path.join(
                self.dirs.dirs['output'],
                'unthresh_range_%s/hypo%d.nii.gz' % (datatype, hyp))
            if not os.path.exists(os.path.join(
                self.dirs.dirs['output'],
                    'unthresh_range_%s' % datatype)):
                os.mkdir(os.path.join(
                    self.dirs.dirs['output'],
                    'unthresh_range_%s' % datatype))

            std_outfile = os.path.join(
                self.dirs.dirs['output'],
                'unthresh_std_%s/hypo%d.nii.gz' % (datatype, hyp))
            if not os.path.exists(os.path.join(
                    self.dirs.dirs['output'],
                    'unthresh_std_%s' % datatype)):
                os.mkdir(os.path.join(
                    self.dirs.dirs['output'],
                    'unthresh_std_%s' % datatype))

            if not os.path.exists(range_outfile) \
                    or not os.path.exists(std_outfile) \
                    or overwrite:
                unthresh_img = nibabel.load(unthresh_file)
                unthresh_data = unthresh_img.get_data()
                concat_data = numpy.nan_to_num(unthresh_data)
                datarange = numpy.max(concat_data, axis=3) \
                    - numpy.min(concat_data, axis=3)
                range_img = nibabel.Nifti1Image(
                    datarange,
                    affine=unthresh_img.affine)
                range_img.to_filename(range_outfile)
                datastd = numpy.std(concat_data, axis=3)
                std_img = nibabel.Nifti1Image(
                    datastd,
                    affine=unthresh_img.affine)
                std_img.to_filename(std_outfile)
Exemplo n.º 12
0
def verify_service_content(url, request_body, content):
    headers = {'content-type': 'text/xml'}
    try:
        start = datetime.datetime.now()
        r = requests.post(url, data=request_body, headers=headers)
        end = datetime.datetime.now()
    except Exception, e:
        utils.log_to_file(e)
        return 0
Exemplo n.º 13
0
    def __init__(self, basedir, data_url=None,
                 force_download=False):
        # set up directories and template files
        self.dirs = {}
        # check to make sure home of basedir exists
        assert os.path.exists(os.path.dirname(basedir))
        self.dirs['base'] = basedir
        if not os.path.exists(basedir):
            os.mkdir(basedir)
        self.force_download = force_download
        if data_url is None:
            self.data_url = DATA_URL

        self.dirs['orig'] = os.path.join(self.dirs['base'], 'orig')
        self.dirs['templates'] = os.path.join(self.dirs['base'], 'templates')

        self.dirs['output'] = os.path.join(self.dirs['base'], 'maps')
        self.dirs['metadata'] = os.path.join(self.dirs['base'], 'metadata')
        self.dirs['cached'] = os.path.join(self.dirs['base'], 'cached')
        self.dirs['figures'] = os.path.join(self.dirs['base'], 'figures')
        self.dirs['logs'] = os.path.join(self.dirs['base'], 'logs')

        dirs_to_make = ['output', 'metadata', 'cached', 'figures', 'logs']
        for d in dirs_to_make:
            if not os.path.exists(self.dirs[d]):
                os.mkdir(self.dirs[d])

        self.logfile = os.path.join(self.dirs['logs'], 'narps.txt')
        log_to_file(self.logfile, 'Running Narps main class', flush=True)

        output_dirs = ['resampled', 'rectified', 'zstat',
                       'thresh_mask_orig', 'concat_thresh']

        for o in output_dirs:
            self.dirs[o] = os.path.join(self.dirs['output'], o)
            if not os.path.exists(self.dirs[o]):
                os.mkdir(self.dirs[o])

        # if raw data don't exist, download them
        if not os.path.exists(self.dirs['orig']) or self.force_download:
            self.get_orig_data()
        assert os.path.exists(self.dirs['orig'])

        self.MNI_mask = os.path.join(self.dirs['templates'],
                                     'MNI152_T1_2mm_brain_mask.nii.gz')
        assert os.path.exists(self.MNI_mask)

        self.MNI_template = os.path.join(self.dirs['templates'],
                                         'MNI152_T1_2mm.nii.gz')
        assert os.path.exists(self.MNI_template)

        self.full_mask_img = os.path.join(self.dirs['templates'],
                                          'MNI152_all_voxels.nii.gz')

        # templates should also be downloaded with orig data
        assert os.path.exists(self.dirs['templates'])
Exemplo n.º 14
0
    def get_etfs_daily_history(
        self,
        end=dt.datetime.now(MOSCOW_TIMEZONE) - dt.timedelta(days=1),
        start=dt.datetime.now(MOSCOW_TIMEZONE) - dt.timedelta(weeks=52, days=1),
    ):
        """
        Get daily market history (1 point per day) with exactly the requested interval.

        Note:
        Due to API restrictions, an interval longer than a year must be divided into
        years when fetched
        """

        interval = "day"
        interval_dt = dt.timedelta(days=1)
        print(
            f"Requesting ETF history from {start} till {end} with an interval={interval}"
        )

        length = end.astimezone(MOSCOW_TIMEZONE) - start.astimezone(MOSCOW_TIMEZONE)
        # The server bugs if the time period is smaller than 1 interval
        if length < interval_dt:
            start = end - interval_dt
            length = interval_dt

        one_period = dt.timedelta(weeks=52)
        n_periods = int(math.ceil(length / one_period))
        periods = [
            [end - one_period * (i + 1), end - one_period * i] for i in range(n_periods)
        ]
        periods[-1][0] = start
        periods = periods[::-1]

        dfs = []
        out, tickers = pd.DataFrame(), []
        for period in tqdm(periods, desc=f"Getting forecast with interval={interval}"):
            if self.verbose:
                print(f"Requesting history from {period[0]} till {period[1]}.")
            df, tickers = self.get_etfs_history(
                start=period[0], end=period[1], freq=interval
            )
            if df.empty:
                print(
                    f"Server returned an empty reply for the following period: {period}!"
                )
                continue
            dfs.append(df)
        if dfs:
            out = pd.concat(dfs, axis=0, ignore_index=True)

        l1 = len(out)
        out.drop(out[pd.isna(out.time)].index, inplace=True)
        if len(out) < l1:
            log_to_file(f"{l1 - len(out)} NaN time stamps dropped.")

        return out, tickers
Exemplo n.º 15
0
 def get_resampled_images(self, overwrite=None):
     """
     resample all images into FSL MNI space
     """
     log_to_file(self.dirs.logfile,
                 '\n\n%s' % sys._getframe().f_code.co_name)
     if overwrite is None:
         overwrite = self.overwrite
     for teamID in self.complete_image_sets:
         self.teams[teamID].get_resampled_images()
Exemplo n.º 16
0
    def __init__(self,
                 basedir,
                 metadata_file=None,
                 verbose=False,
                 overwrite=False,
                 dataurl=None,
                 testing=False):
        self.basedir = basedir
        self.dirs = NarpsDirs(basedir, dataurl=dataurl, testing=testing)
        self.verbose = verbose
        self.teams = {}
        self.overwrite = overwrite
        self.started_at = datetime.datetime.now()
        self.testing = testing

        # create the full mask image if it doesn't already exist
        if not os.path.exists(self.dirs.full_mask_img):
            print('making full image mask')
            self.mk_full_mask_img(self.dirs)
        assert os.path.exists(self.dirs.full_mask_img)

        # get input dirs for orig data
        self.image_jsons = None
        self.input_dirs = self.get_input_dirs(self.dirs)

        # check images for each team
        self.complete_image_sets = {}
        self.get_orig_images(self.dirs)
        for imgtype in ['thresh', 'unthresh']:
            log_to_file(
                self.dirs.logfile,
                'found %d teams with complete original %s datasets' %
                (len(self.complete_image_sets[imgtype]), imgtype))

        # set up metadata
        if metadata_file is None:
            self.metadata_file = os.path.join(
                self.dirs.dirs['orig'], 'analysis_pipelines_for_analysis.xlsx')
        else:
            self.metadata_file = metadata_file

        self.metadata = get_metadata(self.metadata_file)

        self.hypothesis_metadata = pandas.DataFrame(
            columns=['teamID', 'hyp', 'n_na', 'n_zero'])

        self.all_maps = {
            'thresh': {
                'resampled': None
            },
            'unthresh': {
                'resampled': None
            }
        }
        self.rectified_list = []
Exemplo n.º 17
0
 def get_resampled_images(self, overwrite=None):
     """
     resample all images into FSL MNI space
     """
     log_to_file(self.dirs.logfile,
                 sys._getframe().f_code.co_name,
                 headspace=2)
     if overwrite is None:
         overwrite = self.overwrite
     for imgtype in ['thresh', 'unthresh']:
         for teamID in self.complete_image_sets[imgtype]:
             self.teams[teamID].get_resampled_images(imgtype=imgtype)
Exemplo n.º 18
0
    def create_concat_images(self,
                             datatype='resampled',
                             imgtypes=None,
                             overwrite=None):
        """
        create images concatenated across teams
        ordered by self.complete_image_sets
        """
        log_to_file(self.dirs.logfile,
                    '\n\n%s' % sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))

        if imgtypes is None:
            imgtypes = ['thresh', 'unthresh']
        if overwrite is None:
            overwrite = self.overwrite
        for imgtype in imgtypes:
            self.dirs.dirs['concat_%s' % imgtype] = os.path.join(
                self.dirs.dirs['output'], '%s_concat_%s' % (imgtype, datatype))
            for hyp in range(1, 10):
                outfile = os.path.join(self.dirs.dirs['concat_%s' % imgtype],
                                       'hypo%d.nii.gz' % hyp)
                if not os.path.exists(os.path.dirname(outfile)):
                    os.mkdir(os.path.dirname(outfile))
                if not os.path.exists(outfile) or overwrite:
                    if self.verbose:
                        print('%s - hypo %d: creating concat file' %
                              (imgtype, hyp))
                    concat_teams = [
                        teamID for teamID in self.complete_image_sets
                        if os.path.exists(self.teams[teamID].images[imgtype]
                                          [datatype][hyp])
                    ]
                    self.all_maps[imgtype][datatype] = [
                        self.teams[teamID].images[imgtype][datatype][hyp]
                        for teamID in concat_teams
                    ]

                    # use nilearn NiftiMasker to load data
                    # and save to a new file
                    masker = nilearn.input_data.NiftiMasker(
                        mask_img=self.dirs.MNI_mask)
                    concat_data = masker.fit_transform(
                        self.all_maps[imgtype][datatype])
                    concat_img = masker.inverse_transform(concat_data)
                    concat_img.to_filename(outfile)
                else:
                    if self.verbose:
                        print('%s - hypo %d: using existing file' %
                              (imgtype, hyp))
        return (self.all_maps)
Exemplo n.º 19
0
def get_thresh_similarity(narps, dataset='resampled'):
    """
    For each pair of thresholded images, compute the similarity
    of the thresholded/binarized maps using the Jaccard coefficient.
    Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa
    also add computation of jaccard on only nonzero pairs
    (ala scipy)
    """

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(
        narps.dirs.dirs['logs'],
        '%s-%s.txt' % (sys.argv[0].split('.')[0], func_name))
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    output_dir = os.path.join(narps.dirs.dirs['output'], 'jaccard_thresh')
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    for hyp in hypnums:
        print('creating Jaccard map for hypothesis', hyp)
        maskdata, labels = get_masked_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           imgtype='thresh',
                                           dataset=dataset)
        jacsim = 1 - pairwise_distances(maskdata, metric="hamming")
        jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard'))
        df = pandas.DataFrame(jacsim, index=labels, columns=labels)
        df.to_csv(os.path.join(output_dir, 'jacsim_thresh_hyp%d.csv' % hyp))
        df_nonzero = pandas.DataFrame(jacsim_nonzero,
                                      index=labels,
                                      columns=labels)
        df_nonzero.to_csv(
            os.path.join(output_dir, 'jacsim_nonzero_thresh_hyp%d.csv' % hyp))
        seaborn.clustermap(df, cmap='jet', figsize=(16, 16), method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_map_thresh.pdf' % hyp))
        plt.close()
        seaborn.clustermap(df_nonzero,
                           cmap='jet',
                           figsize=(16, 16),
                           method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_nonzero_map_thresh.pdf' % hyp))
        plt.close()
Exemplo n.º 20
0
def plot_distance_from_mean(narps):

    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)

    median_corr_df = pandas.read_csv(
        os.path.join(narps.dirs.dirs['metadata'], 'median_pattern_corr.csv'))

    # Plot distance from mean across teams
    plt.bar(median_corr_df.index, median_corr_df.median_corr)
    plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                             'median_corr_sorted.pdf'),
                bbox_inches='tight')
    plt.close()

    # This plot is limited to the teams with particularly
    # low median correlations (<.2)
    median_corr_low = median_corr_df.query('median_corr < 0.2')
    log_to_file(
        logfile, 'found %d teams with r<0.2 with mean pattern' %
        median_corr_low.shape[0])
    log_to_file(logfile, median_corr_low.iloc[:, 0].values)

    median_corr_high = median_corr_df.query('median_corr > 0.7')
    log_to_file(
        logfile, 'found %d teams with r>0.7 with mean pattern' %
        median_corr_high.shape[0])
Exemplo n.º 21
0
def evaluate(weights_file,
             image_model_name='vgg16',
             embedding_type='glove',
             embedding_dim=300,
             batch_size=200):

    # create data loader
    data_path = os.path.join(os.path.dirname(__file__), 'data')
    option = {
        'data_root': data_path,  # MODIFY PATH ACCORDINGLY
        'fine_size': 224,
        'word_embedding_length': 1024,
        'randomize': False
    }
    data_loader = DataLoaderDisk(**option)

    word_index = data_loader.tokenizer.word_index

    embedding_path = ''
    if embedding_type == 'glove':
        embedding_path = os.path.join(
            data_path, 'glove.6B', 'glove.6B.{0}d.txt'.format(embedding_dim))
    embedding_matrix = get_embedding_matrix(word_index, embedding_type,
                                            embedding_path)

    seq_length = 25
    model_val = vqa_model(image_model_name,
                          embedding_matrix,
                          seq_length,
                          dropout_rate=0.5,
                          num_classes=3131)
    model_val.load_weights(weights_file)

    epochs = 1
    iters = int(data_loader.val_num * epochs / batch_size)
    val_accuracy = 0
    for iteration in tqdm(range(iters)):
        img_batch_val, que_batch_val, y_batch_val = data_loader.next_batch(
            batch_size, mode='val')
        val_score = model_val.test_on_batch([img_batch_val, que_batch_val],
                                            y_batch_val)
        val_accuracy += float(val_score[1])

        msg = 'iter = {0}, val acc: {1:03f}'.format(iteration,
                                                    float(val_score[1]))
        log_to_file(msg)

    msg = 'Overall Accuracy on Validation-Set: {0}'.format(val_accuracy /
                                                           iters)
    log_to_file(msg)
Exemplo n.º 22
0
    def _load_data(self):
        try:
            self._data = pd.read_csv(self.data_file)

            # Drop nans in time stamps
            l1 = len(self._data)
            self._data.drop(self._data[pd.isna(self._data.time)].index, inplace=True)
            if len(self._data) < l1:
                log_to_file(f"{l1 - len(self._data)} NaN time stamps dropped.")

            self._data.time = pd.to_datetime(self._data.time)
            with open(self.tickers_file, "r") as f:
                self._tickers = f.read().split()
            print("Local history data loaded successfully")
        except FileNotFoundError:
            print("No saved ETF history found locally.")
Exemplo n.º 23
0
def get_thresh_similarity(narps, dataset='resampled'):
    """
    For each pair of thresholded images, compute the similarity
    of the thresholded/binarized maps using the Jaccard coefficient.
    Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa
    also add computation of jaccard on only nonzero pairs
    (ala scipy)
    """

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    for hyp in hypnums:
        print('analyzing thresh similarity for hypothesis', hyp)
        maskdata, labels = get_concat_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           imgtype='thresh',
                                           dataset=dataset)

        pctagree = matrix_pct_agreement(maskdata)
        median_pctagree = numpy.median(pctagree[numpy.triu_indices_from(
            pctagree, 1)])
        log_to_file(
            logfile,
            'hyp %d: median pctagree similarity: %f' % (hyp, median_pctagree))

        df_pctagree = pandas.DataFrame(pctagree, index=labels, columns=labels)
        df_pctagree.to_csv(
            os.path.join(narps.dirs.dirs['metadata'],
                         'pctagree_hyp%d.csv' % hyp))

        seaborn.clustermap(df_pctagree,
                           cmap='jet',
                           figsize=(16, 16),
                           method='ward')
        plt.title(hypotheses_full[hyp])
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_pctagree_map_thresh.pdf' % hyp),
                    bbox_inches='tight')
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_pctagree_map_thresh.png' % hyp),
                    bbox_inches='tight')
        plt.close()

        # get jaccard for nonzero voxels
        jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard'))
        median_jacsim_nonzero = numpy.median(
            jacsim_nonzero[numpy.triu_indices_from(jacsim_nonzero, 1)])
        log_to_file(
            logfile, 'hyp %d: median jacaard similarity (nonzero): %f' %
            (hyp, median_jacsim_nonzero))
Exemplo n.º 24
0
def setup_simulated_data(
        narps,
        verbose=False,
        overwrite=False):
    """create directories for simulated data"""

    # consensus analysis must exist
    assert os.path.exists(narps.dirs.dirs['consensus'])

    basedir = narps.basedir + '_simulated'
    if verbose:
        print("writing files to new directory:", basedir)
    if not os.path.exists(os.path.join(basedir, 'logs')):
        os.makedirs(os.path.join(basedir, 'logs'))

    log_to_file(os.path.join(basedir, 'logs/simulated_data.log'),
                'Creating simulated dataset', flush=True)
    # copy data from orig/templates
    origdir = narps.dirs.dirs['orig']
    new_origdir = os.path.join(basedir, 'orig')
    templatedir = narps.dirs.dirs['templates']
    if verbose:
        print('using basedir:', basedir)
    if os.path.exists(basedir) and overwrite:
        shutil.rmtree(basedir)
    if not os.path.exists(basedir):
        os.mkdir(basedir)
    if not os.path.exists(new_origdir) or overwrite:
        if verbose:
            print('copying template data to new basedir')
        shutil.copytree(
            templatedir,
            os.path.join(basedir, 'templates'))
        if not os.path.exists(new_origdir):
            os.mkdir(new_origdir)
        # copy metadata files from orig
        for f in glob.glob(os.path.join(
                origdir, '*.*')):
            if os.path.isfile(f):
                if verbose:
                    print('copying', f, 'to', new_origdir)
                shutil.copy(f, new_origdir)
    else:
        print('using existing new basedir')

    return(basedir)
Exemplo n.º 25
0
def log_data(download_dir, logfile, verbose=True):
    """record manifest and file hashes"""
    imgfiles = {}
    # traverse root directory, and list directories as dirs and files as files
    for root, _, files in os.walk(download_dir):
        path = root.split(os.sep)
        for file in files:
            if file.find('.nii.gz') < 0:
                # skip non-nifti files
                continue
            fname = os.path.join(root, file)
            filehash = hashlib.md5(open(fname, 'rb').read()).hexdigest()
            short_fname = os.path.join('/'.join(path[-2:]), file)
            imgfiles[short_fname] = filehash
            if verbose:
                print(short_fname, filehash)
            log_to_file(logfile, '%s %s' % (short_fname, filehash))
Exemplo n.º 26
0
    def create_mean_thresholded_images(self, datatype='resampled',
                                       overwrite=None, thresh=1e-5):
        """
        create overlap maps for thresholded images
        """
        log_to_file(
            self.dirs.logfile,
            sys._getframe().f_code.co_name,
            headspace=2)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        imgtype = 'thresh'
        if overwrite is None:
            overwrite = self.overwrite
        output_dir = self.dirs.get_output_dir('overlap_binarized_thresh')
        concat_dir = self.dirs.get_output_dir(
            '%s_concat_%s' % (imgtype, datatype))

        for hyp in range(1, 10):
            outfile = os.path.join(
                output_dir,
                'hypo%d.nii.gz' % hyp)
            if not os.path.exists(outfile) or overwrite:
                if self.verbose:
                    print('%s - hypo %d: creating overlap file' % (
                        imgtype, hyp))
                concat_file = os.path.join(
                    concat_dir,
                    'hypo%d.nii.gz' % hyp)
                concat_img = nibabel.load(concat_file)
                concat_data = concat_img.get_data()
                concat_data = (concat_data > thresh).astype('float')
                concat_mean = numpy.mean(concat_data, 3)
                concat_mean_img = nibabel.Nifti1Image(concat_mean,
                                                      affine=concat_img.affine)
                concat_mean_img.to_filename(outfile)

            else:
                if self.verbose:
                    print('%s - hypo %d: using existing file' % (
                        imgtype, hyp))
Exemplo n.º 27
0
def copy_renamed_files(collectionIDs, download_dir, logfile):
    """change file names based on info in images.json"""
    # setup target directory
    orig_dir = os.path.join(os.path.dirname(download_dir), 'orig')
    if not os.path.exists(orig_dir):
        os.mkdir(orig_dir)

    for teamID in collectionIDs:
        collectionID = '%s_%s' % (collectionIDs[teamID], teamID)
        collection_dir = os.path.join(download_dir, collectionID)
        fixed_dir = os.path.join(orig_dir, collectionID)
        if not os.path.exists(fixed_dir):
            os.mkdir(fixed_dir)

        jsonfile = os.path.join(collection_dir, 'images.json')
        if not os.path.exists(jsonfile):
            print('no json file for ', collectionID)
            continue
        with open(jsonfile) as f:
            image_info = json.load(f)
        for img in image_info:
            origname = os.path.basename(img['file'])
            # fix various issues with names
            newname = img['name'].replace('tresh', 'thresh').replace(
                ' ', '_') + '.nii.gz'
            newname = newname.replace('hypo_', 'hypo').replace(
                'uthresh', 'unthresh').replace('_LR', '')

            # skip unthresh images if necessary
            if newname.find('unthresh') > -1 and \
                    teamID in TEAMS_TO_REMOVE_UNTHRESH:
                continue

            if origname.find('sub') > -1 or \
                    not newname.find('thresh') > -1:  # skip sub files
                continue
            else:
                log_to_file(
                    logfile, 'copying %s/%s to %s/%s' %
                    (collectionID, origname, collectionID, newname))
                shutil.copy(os.path.join(collection_dir, origname),
                            os.path.join(fixed_dir, newname))
    return (orig_dir)
Exemplo n.º 28
0
    def get_input_dirs(self, dirs, verbose=True, load_json=True):
        """
        get orig dirs
        - assumes that images.json is present for each valid dir
        """
        input_jsons = glob.glob(
            os.path.join(dirs.dirs['orig'], '*/images.json'))

        log_to_file(
            self.dirs.logfile,
            'found %d input directories' % len(input_jsons))
        if load_json:
            for i in input_jsons:
                collection_id = os.path.basename(os.path.dirname(i))
                NV_collection_id, teamID = collection_id.split('_')
                if teamID not in self.teams:
                    self.teams[teamID] = NarpsTeam(
                        teamID, NV_collection_id, dirs, verbose=self.verbose)
                    self.teams[teamID].jsonfile = i
                with open(i) as f:
                    self.teams[teamID].image_json = json.load(f)
Exemplo n.º 29
0
def export_plugins(from_lang, plugin_list, plugin_selection=None):
    field_blacklist = get_blacklist()
    plugin_data = []
    for plugin in plugin_list:
        try:
            instance = plugin.get_plugin_instance()[0]
        except KeyError as e:
            # Nasty fix for StackPlugins still straying around
            if str(e) != "u'StackPlugin'":
                raise KeyError(str(e))
            continue

        if instance is None:
            continue
        elif plugin_selection and str(type(instance)) not in plugin_selection:
            continue

        if getattr(instance, "language") == from_lang:  # TODO: check: could this break at some point?
            plugin_contents = plugin.get_plugin_instance()[0].get_translatable_content()
            if plugin_contents:
                if not isinstance(plugin_contents, list):
                    plugin_contents = [plugin_contents]

                for item in plugin_contents:
                    plugin_dict = {
                        'plugin_pk': getattr(plugin, "pk"),
                        'plugin_type': "%s%s" % (instance.__class__.__name__,  " (%s)" % str(type(instance)) or ""),
                        'fields': {}
                    }

                    for key, value in item.items():
                        if not key in field_blacklist:
                            plugin_dict['fields'][key] = value

                    plugin_data.append(plugin_dict)

    if log_to_file_enabled():
        log_to_file(plugin_data)

    return plugin_data
Exemplo n.º 30
0
def save_violation(v):
    try:
        file = open("./violations/violations.json", 'r')
        violations_json = json.loads(file.read())
        file.close()

        if v in violations_json.keys():
            violations_json[v] = int(violations_json.get(v)) + 1

            try:
                file = open("./violations/violations.json", 'w')
                file.write(json.dumps(violations_json))
                file.close()
            except:
                log_to_file("Failed to write to violations.json at" +
                            time.strftime("%b %d, %Y - %I:%M:%S"))

        else:
            violations_json[v] = 1

            try:
                file = open("./violations/violations.json", 'w')
                file.write(json.dumps(violations_json))
                file.close()
            except:
                log_to_file("Failed to write to violations.json at" +
                            time.strftime("%b %d, %Y - %I:%M:%S"))
    except:
        log_to_file("Failed to read violations.json at" +
                    time.strftime("%b %d, %Y - %I:%M:%S"))
        file = open("./violations/violations.json", 'w')
        file.write("{}")
        file.close()
Exemplo n.º 31
0
def save_user(user):
    try:
        file = open("./users/users.json", 'r')
        users_json = json.loads(file.read())
        file.close()

        if user in users_json.keys():
            users_json[user] = int(users_json.get(user)) + 1

            try:
                file = open("./users/users.json", 'w')
                file.write(json.dumps(users_json))
                file.close()
            except:
                log_to_file("Failed to write to users.json at " +
                            time.strftime("%b %d, %Y - %I:%M:%S"))

        else:
            users_json[user] = 1

            try:
                file = open("./users/users.json", 'w')
                file.write(json.dumps(users_json))
                file.close()
            except:
                log_to_file("Failed to write to users.json at" +
                            time.strftime("%b %d, %Y - %I:%M:%S"))
    except:
        log_to_file("Failed to read users.json at" +
                    time.strftime("%b %d, %Y - %I:%M:%S"))
        file = open("./users/users.json", 'w')
        file.write("{}")
        file.close()
Exemplo n.º 32
0
    def get_orig_data(self):
        """
        download original data from repository
        """
        log_to_file(self.logfile, '\n\nget_orig_data')
        log_to_file(self.logfile, 'DATA_URL: %s' % DATA_URL)
        MAX_TRIES = 5

        if self.data_url is None:
            print('no URL for original data, cannot download')
            print('should be specified in info.json')
            return

        print('orig data do not exist, downloading...')
        output_directory = self.dirs['base']
        no_dl = True
        ntries = 0
        # try several times in case of http error
        while no_dl:
            try:
                filename = wget.download(self.data_url, out=output_directory)
                no_dl = False
            except HTTPError:
                ntries += 1
                time.sleep(1)  # wait a second
            if ntries > MAX_TRIES:
                raise Exception('Problem downloading original data')

        # save a hash of the tarball for data integrity
        filehash = hashlib.md5(open(filename, 'rb').read()).hexdigest()
        log_to_file(self.logfile, 'hash of tar file: %s' % filehash)
        tarfile_obj = tarfile.open(filename)
        tarfile_obj.extractall(path=self.dirs['base'])
        os.remove(filename)
Exemplo n.º 33
0
def parse_comment(c):
    try:
        file = open("./data-analyzation/words.json", 'r')
        words_json = json.loads(file.read())
        file.close()

        print("Reading comment " + c.id + " at " +
              time.strftime("%b %d, %Y - %I:%M:%S") + " by " + str(c.author))

        body = str(c.body.encode('utf-8'))

        for w in body.split(' '):
            word = str(w.encode('utf-8'))

            if "http" in word or "/u/" in word or "/r/" in word or "\\" in word:
                continue

            for ch in replace_chars:
                word = word.replace(ch, '')

            if word is not "" and word is not " " and word is not "'" and word is not "," and word is not '\n' and word[
                    0] is not "'":

                if word in words_json.keys():
                    words_json[word] = int(words_json.get(word)) + 1
                else:
                    words_json[word] = 1

                file = open("./data-analyzation/words.json", 'w')
                file.write(json.dumps(words_json))

                file.close()

    except:
        log_to_file("Failed to read words.json at " +
                    time.strftime("%b %d, %Y - %I:%M:%S"))
        return
Exemplo n.º 34
0
def get_quote_view(request, pk):
    t = TranslationRequest.objects.get(pk=pk)
    check_stage(t.status, 'selected_content')

    if request.method == 'POST':
        if request.POST.get('opt'):
            t.order_choice = request.POST.get('opt')  # TODO: possible security issue?
            t.status = 'selected_quote'
            t.save()
            return HttpResponseRedirect(reverse('admin:order', kwargs={'pk': pk}))

    else:
        data = prepare_data(t, t.from_lang, t.to_lang)
        quote = get_quote(t.provider, data=data)
        if log_to_file_enabled():
            log_to_file(data)
        if t.provider == 'supertext':
            res = json.loads(quote)
            return render_to_response(
                'aldryn_translator/quote.html', {'res': res, 'dev': is_dev()},
                context_instance=RequestContext(request))

        else:
            raise NotImplementedError()
Exemplo n.º 35
0
category = sys.argv[2]
test_type = sys.argv[3]
url = sys.argv[4]
status_code = None
result = None
content = None
request_body = None
if category == 'web':
    if test_type == 'status_code':
        status_code = sys.argv[5]
        result = verify_web_status_code(url, status_code)
    elif test_type == 'content':
        content = sys.argv[5]
        result = verify_web_content(url, content)
    else:
        utils.log_to_file('test type invalid')
elif category == 'service':
    request_body = sys.argv[6]
    if test_type == 'status_code':
        status_code = sys.argv[5]
        result = verify_service_status_code(url, request_body, status_code)
    elif test_type == 'content':
        content = sys.argv[5]
        result = verify_service_content(url, request_body, content)
else:
    utils.log_to_file('category is invalid')
# write result to respective results file
if not result is None:
    write_result(task_id, result)

    if result == 0: