Пример #1
0
def test_studies(db):
    study = Study(pmid=345345, title='test study',
        authors='Jokkin, Eumast',
        journal='Journal of Nonexistent Findings',
        year=2008)
    study.peaks = [Peak(x=-12, y=14, z=40), Peak(x=22, y=22, z=22)]
    db.session.add(study)
    db.session.commit()
    assert Peak.query.count() == 2
    assert Study.query.count() == 1
Пример #2
0
def get_studies(val=None):
    x, y, z, radius = get_params(val)
    points = Peak.closestPeaks(radius, x, y, z)

    # Track number of peaks and study details for each found study,
    # keeping only peaks that haven't been previously seen for current
    # study/x/y/z combination.
    seen = {}
    study_counts = defaultdict(list)
    for p in points:
        key = hash((p.pmid, round(p.x, 2), round(p.y, 2), round(p.z, 2)))
        if key in seen:
            next
        study_counts[p.pmid].append(p)
        seen[key] = 1

    if 'dt' in request.args:
        data = []
        for pmid, peaks in study_counts.items():
            s = peaks[0].study
            link = '<a href={0}>{1}</a>'.format(url_for('studies.show',
                                                        val=pmid), s.title)
            data.append([link, s.authors, s.journal, len(peaks)])
    else:
        data = [{'pmid': pmid, 'peaks': len(peaks)}
                for pmid, peaks in study_counts.items()]
    return jsonify(data=data)
Пример #3
0
def location_api(val):
    args = [int(i) for i in val.split('_')]
    if len(args) == 3: args.append(10)
    x, y, z, radius = args

    ### PEAKS ###
    # Limit search to 20 mm to keep things fast
    if radius > 20: radius = 20
    points = Peak.closestPeaks(radius,x,y,z)
    points = points.group_by(Peak.pmid) #prevents duplicate studies
    points = points.add_columns(sqlalchemy.func.count(Peak.id)) #counts duplicate peaks

    ### IMAGES ###
    location = Location.query.filter_by(x=x, y=y, z=z).first()
    images = [] if location is None else location.images
    images = [{'label': i.label, 'id': i.id} for i in images if i.display]

    if 'draw' in request.args:
        data = []
        for p in points:
            s = p[0].study
            link = '<a href={0}>{1}</a>'.format(url_for('studies.show',val=s.pmid),s.title)
            data.append([link, s.authors, s.journal,p[1]])
        data = jsonify(data=data)
    else:
        data = {
            'studies': [{'pmid':p[0].study.pmid,'peaks':p[1] } for p in points],
            'images': images
        }
        data = jsonify(data=data)
    return data
Пример #4
0
def get_location():
    """
    Retrieve location data
    ---
    tags:
        - locations
    responses:
        200:
            description: Location data
        default:
            description: No locations found
    parameters:
        - in: query
          name: x
          description: x-coordinate
          required: true
          type: integer
        - in: query
          name: y
          description: y-coordinate
          required: true
          type: integer
        - in: query
          name: z
          description: z-coordinate
          required: true
          type: integer
        - in: query
          name: r
          description: Radius of sphere within which to search for study activations, in mm (default = 6, max = 20).
          required: false
          type: integer
    """
    x = int(request.args['x'])
    y = int(request.args['y'])
    z = int(request.args['z'])
    #  Radius: 6 mm by default, max 2 cm
    r = min(int(request.args.get('r', 6)), 20)

    # Check validity of coordinates and redirect if necessary
    check_xyz(x, y, z)

    loc = Location.query.filter_by(x=x, y=y, z=z).first()
    if loc is None:
        from nsweb.controllers.locations import make_location
        loc = make_location(x, y, z)

    peaks = Peak.closestPeaks(r, x, y, z)
    peaks = peaks.group_by(Peak.pmid)
    peaks = peaks.add_columns(func.count(Peak.id))

    loc.studies = [p[0].study for p in peaks]

    schema = LocationSchema()
    return jsonify(data=schema.dump(loc).data)
Пример #5
0
def get_studies(val=None):
    x, y, z, radius = get_params(val)
    points = Peak.closestPeaks(radius, x, y, z)
    # prevents duplicate studies
    points = points.group_by(Peak.pmid)
    # counts duplicate peaks
    points = points.add_columns(sqlalchemy.func.count(Peak.id))

    if 'dt' in request.args:
        data = []
        for p in points:
            s = p[0].study
            link = '<a href={0}>{1}</a>'.format(url_for('studies.show',
                                                        val=s.pmid), s.title)
            data.append([link, s.authors, s.journal, p[1]])
        data = jsonify(data=data)
    else:
        data = [{'pmid': p[0].study.pmid, 'peaks':p[1]} for p in points]
        data = jsonify(data=data)
    return data
Пример #6
0
    def add_studies(self, analyses=None, threshold=0.001, limit=None,
                    reset=False):
        """ Add studies to the DB.
        Args:
            analyses: list of names of analyses to map studies onto. If None,
                use all available.
            threshold: Float or integer; minimum value in AnalysisTable data
                array for inclusion.
            limit: integer; maximum number of studies to add (order will be
                randomized).
            reset: Drop all existing records before populating.
        Notes:
            By default, will not create new Study records if an existing one
            matches. This ensures that we can gracefully add new analysis
            associations without mucking up the DB. To explicitly replace old
            records, pass reset=True.
        """
        if reset:
            Study.query.delete()

        # For efficiency, get all analysis data up front, so we only need to
        # densify array once
        if analyses is None:
            analyses = self._get_feature_names()

        feature_data = self.dataset.get_feature_data(features=analyses)

        study_inds = self.dataset.activations['id'].unique()

        if limit is not None:
            random.shuffle(study_inds)
            study_inds = study_inds[:limit]

        # SQL DBs generally don't like numpy dtypes
        study_inds = [int(ind) for ind in study_inds]

        all_rows = self.dataset.activations.query('id in @study_inds')
        all_rows[['doi', 'table_num']] = all_rows[['doi', 'table_num']] \
                                            .astype(str).replace('nan', '')

        # Create Study records
        for i, pmid in enumerate(study_inds):

            activ = all_rows.query('id == @pmid')

            study = Study.query.get(pmid)

            if study is None:
                peaks = [Peak(x=p['x'], y=p['y'], z=p['z'],
                              table=p['table_num'])
                         for (ind, p) in activ.iterrows()]

                # Track in Python to avoid issuing SQL count() queries
                n_peaks = len(peaks)

                data = activ.iloc[0, :]
                study = Study(
                    pmid=int(pmid),
                    space=data['space'],
                    doi=data['doi'],
                    title=data['title'],
                    journal=data['journal'],
                    authors=data['authors'],
                    year=int(data['year']))
                study.peaks.extend(peaks)
                self.db.session.add(study)

            # Map analyses onto studies via a Frequency join table that also
            # stores frequency info
            pmid_frequencies = feature_data.loc[pmid, :]
            to_keep = pmid_frequencies[pmid_frequencies >= threshold]
            for analysis_name, freq in to_keep.iteritems():
                freq_inst = Frequency(
                    study=study, analysis=self.analyses[analysis_name][0],
                    frequency=freq)
                self.db.session.add(freq_inst)

                # Track number of studies and peaks so we can update
                # Analysis table more efficiently later
                self.analyses[analysis_name][1] += 1
                self.analyses[analysis_name][2] += n_peaks

        # Commit records in batches to conserve memory and speed up querying.
            if (i + 1) % 100 == 0:
                print("Saving study %d..." % i)
                self.db.session.commit()

        self.db.session.commit()  # Commit any remaining studies

        # Update all analysis counts
        self._update_analysis_counts()
Пример #7
0
    def add_studies(self, analyses=None, threshold=0.001, limit=None,
                    reset=False):
        """ Add studies to the DB.
        Args:
            analyses: list of names of analyses to map studies onto. If None,
                use all available.
            threshold: Float or integer; minimum value in AnalysisTable data
                array for inclusion.
            limit: integer; maximum number of studies to add (order will be
                randomized).
            reset: Drop all existing records before populating.
        Notes:
            By default, will not create new Study records if an existing one
            matches. This ensures that we can gracefully add new analysis
            associations without mucking up the DB. To explicitly replace old
            records, pass reset=True.
        """
        if reset:
            Study.query.delete()

        # For efficiency, get all analysis data up front, so we only need to
        # densify array once
        if analyses is None:
            analyses = self._get_feature_names()

        feature_data = self.dataset.get_feature_data(features=analyses)
        analysis_names = list(feature_data.columns)

        study_inds = range(len(self.dataset.mappables))
        if limit is not None:
            random.shuffle(study_inds)
            study_inds = study_inds[:limit]

        # Create Study records
        for i in study_inds:

            m = self.dataset.mappables[i]
            id = int(m.id)

            study = Study.query.get(id)
            if study is None:
                peaks = [Peak(x=float(p.x),
                              y=float(p.y),
                              z=float(p.z),
                              table=str(p.table_num).replace('nan', '')
                              ) for (ind, p) in m.data.iterrows()]
                data = m.data.iloc[0]
                study = Study(
                    pmid=id,
                    space=data['space'],
                    doi=str(data['doi']).replace('nan', ''),
                    title=data['title'],
                    journal=data['journal'],
                    authors=data['authors'],
                    year=data['year'])
                study.peaks.extend(peaks)
                self.db.session.add(study)

            # Map analyses onto studies via a Frequency join table that also
            # stores frequency info
            pmid_frequencies = list(feature_data.ix[m.id, :])

            for (y, analysis_name) in enumerate(analysis_names):
                freq = pmid_frequencies[y]
                if pmid_frequencies[y] >= threshold:
                    freq_inst = Frequency(
                        study=study, analysis=self.analyses[analysis_name][0],
                        frequency=freq)
                    self.db.session.add(freq_inst)

                    # Track number of studies and peaks so we can update
                    # Analysis table more efficiently later
                    self.analyses[analysis_name][1] += 1
                    self.analyses[analysis_name][2] += study.peaks.count()

        # Commit records in batches to conserve memory.
        # This is very slow because we're relying on the declarative base.
        # Ideally should replace this with use of SQLAlchemy core, but probably
        # not worth the trouble considering we only re-create the DB once in a
        # blue moon.
            if (i + 1) % 100 == 0:
                self.db.session.commit()

        self.db.session.commit()  # Commit any remaining studies

        # Update all analysis counts
        self._update_analysis_counts()