示例#1
0
def index():
	""" The index page """
	
	# get parameters
	cond = bottle.request.query.get('cond')
	if cond is not None and len(cond) < 1:
		cond = None
	criteria = None
	csv_name = None
	trials = []
	num_trials = 0
	
	# if we got a condition
	if cond is not None:
		dump = True if bottle.request.query.get('criteria') is not None else False
		csv = True if bottle.request.query.get('csv') is not None else False
		
		lilly = LillyCOI()
		args = None if not dump and not csv else ['id', 'eligibility']
		found_trials = lilly.search_for_condition(cond, True, args)
		num_trials = len(found_trials)
		
		# list criteria
		if dump:
			trials = found_trials
		
		# return CSV
		elif csv:
			csv_name = 'criteria-%s.csv' % datetime.now().isoformat()[:-7]
			with codecs.open(csv_name, 'w', 'utf-8') as handle:
				heads = ["format","num in","num ex","w age","w gender","w pregnancy","incomplete","overly complex","sub-populations","negated inclusions","labs","scores","acronyms","temporal components","patient behavior/abilities","investigator-subjective components","sum"]
				headers = ','.join('""' for h in heads)
				
				# CSV header
				handle.write('"NCT","first received yrs ago","last update yrs ago","has completion","completion and status compatible","criteria",%s\n' % ','.join(['"%s"' % h for h in heads]))
				
				# CSV rows
				i = 0;
				every = 1;
				for study in found_trials:
					if 0 == i % every:
						study.load()
						handle.write('"%s","","","","","%s",%s\n' % (study.nct, study.criteria_text.replace('"', '""'), headers))
					i += 1;
	
	# render index
	template = _jinja_templates.get_template('index.html')
	return template.render(cond=cond, trials=trials, csv=csv_name, num=num_trials)
示例#2
0
def get_trials(condition, recruiting=True, filename='years.csv'):
    lilly = LillyCOI()
    fields = [
        'id', 'lastchanged_date', 'firstreceived_date',
        'primary_completion_date', 'completion_date', 'verification_date'
    ]
    found = lilly.search_for_condition(condition, recruiting, fields)
    if len(found) > 0:
        print "Found %d" % len(found)
        now = datetime.datetime.now()

        # list trials
        with codecs.open(filename, 'w') as csv:
            csv.write(
                'NCT,"first received yrs ago","last update yrs ago",primary,completion,veri,"has completion","completion and status compatible",criteria\n'
            )

            if len(found) > 150:
                found = random.sample(found, len(found) / 4)

            for trial in found:

                # date comparison
                first = trial.date('firstreceived_date')
                first_y = round((now - first[1]).days / 365.25 *
                                10) / 10 if first[1] else 99
                last = trial.date('lastchanged_date')
                last_y = round(
                    (now - last[1]).days / 365.25 * 10) / 10 if last[1] else 99
                comp = trial.date('primary_completion_date')
                comp_y = round(
                    (now - comp[1]).days / 365.25 * 10) / 10 if comp[1] else 99
                done = trial.date('completion_date')
                done_y = round(
                    (now - done[1]).days / 365.25 * 10) / 10 if done[1] else 99
                veri = trial.date('verification_date')
                veri_y = round(
                    (now - veri[1]).days / 365.25 * 10) / 10 if veri[1] else 99

                csv.write('"%s",%.1f,%.1f,%.1f,%.1f,%.1f,%s,%s,""\n' %
                          (trial.nct, first_y, last_y, comp_y, done_y, veri_y,
                           'TRUE' if done[1] else 'FALSE',
                           'TRUE' if done[1] and done[1] > now else 'FALSE'))
        print 'Written to "%s"' % filename
    else:
        print "None found"
示例#3
0
def get_trials(condition, recruiting=True, filename='years.csv'):
	lilly = LillyCOI()
	fields = [
		'id',
		'lastchanged_date',
		'firstreceived_date',
		'primary_completion_date',
		'completion_date',
		'verification_date'
	]
	found = lilly.search_for_condition(condition, recruiting, fields)
	if len(found) > 0:
		print "Found %d" % len(found)
		now = datetime.datetime.now()
		
		# list trials
		with codecs.open(filename, 'w') as csv:
			csv.write('NCT,"first received yrs ago","last update yrs ago",primary,completion,veri,"has completion","completion and status compatible",criteria\n')
			
			if len(found) > 150:
				found = random.sample(found, len(found) / 4)
			
			for trial in found:
				
				# date comparison
				first = trial.date('firstreceived_date')
				first_y = round((now - first[1]).days / 365.25 * 10) / 10 if first[1] else 99
				last = trial.date('lastchanged_date')
				last_y = round((now - last[1]).days / 365.25 * 10) / 10 if last[1] else 99
				comp = trial.date('primary_completion_date')
				comp_y = round((now - comp[1]).days / 365.25 * 10) / 10 if comp[1] else 99
				done = trial.date('completion_date')
				done_y = round((now - done[1]).days / 365.25 * 10) / 10 if done[1] else 99
				veri = trial.date('verification_date')
				veri_y = round((now - veri[1]).days / 365.25 * 10) / 10 if veri[1] else 99
				
				csv.write('"%s",%.1f,%.1f,%.1f,%.1f,%.1f,%s,%s,""\n' % (trial.nct, first_y, last_y, comp_y, done_y, veri_y, 'TRUE' if done[1] else 'FALSE', 'TRUE' if done[1] and done[1] > now else 'FALSE'))
		print 'Written to "%s"' % filename
	else:
		print "None found"
示例#4
0
	# ask for NCT list
	if _list_path is None:
		_list_path = raw_input('Path to the NCT list: ')
	
	# look for the list
	if not os.path.exists(_list_path):
		print 'x>  The list file at %s does not exist' % _list_path
		sys.exit(1)
	
	# read list
	with codecs.open(_list_path, 'r') as handle:
		ncts = [nct.strip() if len(nct.strip()) > 0 else None for nct in handle.readlines()]
		assert len(ncts) > 0
		trials = {}
		rows_and_years = []
		lilly = LillyCOI()
		
		# retrieve from our database
		if not _force_update:
			existing = Study.retrieve(ncts)
			for ex in existing:
				trials[ex.nct] = ex
		
		# loop trials
		for nct in ncts:
			if not nct:
				continue
			
			# get the trial fresh via web
			if nct in trials:
				trial = trials[nct]
示例#5
0
# main
if __name__ == "__main__":
	Study.setup_ctakes(CTAKES)
	Study.setup_tables()
	UMLS.setup_umls(UMLS_FILE)
	UMLS.setup_tables()
	
	# ask for a condition
	condition = raw_input("Condition: ")
	if condition is None or len(condition) < 1:
		condition = 'spondylitis'
	
	# search for studies
	print "Fetching %s studies..." % condition
	lilly = LillyCOI()
	results = lilly.search_for(condition)
	
	# process all studies
	run_ctakes = False
	i = 0
	for study in results:
		i += 1
		print 'Processing %d of %d...' % (i, len(results))
		study.sync_with_db()
		study.process_eligibility_from_text()
		study.codify_eligibility()
		if study.waiting_for_ctakes():
			run_ctakes = True
	
	Study.sqlite_commit_if_needed()
示例#6
0
	def _run(self, fields=None, callback=None):
		""" Runs the whole toolchain.
		Currently writes all status to a file associated with run_id. If the
		first word in that file is "error", the process is assumed to have
		stopped. If it is "done" the work here is done.
		"""
		
		# check prerequisites
		if self.condition is None and self.term is None:
			raise Exception("No 'condition' and no 'term' provided")
		
		self.assure_run_directory()
		self.status = "Searching for %s trials..." % (self.condition if self.condition is not None else self.term)
		
		# anonymous callback for progress reporting
		def cb(inst, progress):
			if progress > 0:
				self.status = "Fetching (%d%%)" % (100 * progress)
		
		# make sure we retrieve the properties that we want to analyze
		if self.analyze_keypaths:
			if fields is None:
				fields = []
			fields.extend(self.analyze_keypaths)
			fields.append('eligibility')
		
		# start the search
		self.status = "Fetching %s trials..." % (self.condition if self.condition is not None else self.term)
		
		lilly = LillyCOI()
		trials = []
		if self.condition is not None:
			trials = lilly.search_for_condition(self.condition, True, fields, cb)
		else:
			trials = lilly.search_for_term(self.term, True, fields, cb)
		
		if self.limit and len(trials) > self.limit:
			trials = trials[:self.limit]
		
		# process found trials
		self.status = "Processing..."
		sqlite = SQLite.get(self.sqlite_db)
		
		progress = 0
		progress_tot = len(trials)
		progress_each = max(5, progress_tot / 25)
		ncts = []
		num_nlp_trials = 0
		nlp_to_run = set()
		for trial in trials:
			ncts.append(trial.nct)
			trial.analyze_keypaths = self.analyze_keypaths
			
			if self.catch_exceptions:
				try:
					trial.codify_analyzables(self.nlp_pipelines, self.discard_cached)
				except Exception as e:
					self.status = 'Error processing trial: %s' % e
					return
			else:
				trial.codify_analyzables(self.nlp_pipelines, self.discard_cached)
			
			trial.store()
			self.write_trial(sqlite, trial)
			
			# make sure we run the NLP pipeline if needed
			to_run = trial.waiting_for_nlp(self.nlp_pipelines)
			if len(to_run) > 0:
				nlp_to_run.update(to_run)
				num_nlp_trials = num_nlp_trials + 1
			
			# progress
			progress = progress + 1
			if 0 == progress % progress_each:
				self.status = "Processing (%d %%)" % (float(progress) / progress_tot * 100)
		
		sqlite.commit()
		
		# run the needed NLP pipelines
		success = True
		for nlp in self.nlp_pipelines:
			if nlp.name in nlp_to_run:
				self.status = "Running %s for %d trials (this may take a while)" % (nlp.name, num_nlp_trials)
				if self.catch_exceptions:
					try:
						nlp.run()
					except Exception as e:
						self.status = "Running %s failed: %s" % (nlp.name, str(e))
						success = False
						break
				else:
					nlp.run()
		
		# make sure we codified all criteria
		if success:
			for trial in trials:
				trial.codify_analyzables(self.nlp_pipelines, False)
			
			self.status = 'done'
		
		# run the callback
		if callback is not None:
			callback(success, trials)
示例#7
0
def index():
    """ The index page """

    # get parameters
    cond = bottle.request.query.get('cond')
    if cond is not None and len(cond) < 1:
        cond = None
    criteria = None
    csv_name = None
    trials = []
    num_trials = 0

    # if we got a condition
    if cond is not None:
        dump = True if bottle.request.query.get(
            'criteria') is not None else False
        csv = True if bottle.request.query.get('csv') is not None else False

        lilly = LillyCOI()
        args = None if not dump and not csv else ['id', 'eligibility']
        found_trials = lilly.search_for_condition(cond, True, args)
        num_trials = len(found_trials)

        # list criteria
        if dump:
            trials = found_trials

        # return CSV
        elif csv:
            csv_name = 'criteria-%s.csv' % datetime.now().isoformat()[:-7]
            with codecs.open(csv_name, 'w', 'utf-8') as handle:
                heads = [
                    "format", "num in", "num ex", "w age", "w gender",
                    "w pregnancy", "incomplete", "overly complex",
                    "sub-populations", "negated inclusions", "labs", "scores",
                    "acronyms", "temporal components",
                    "patient behavior/abilities",
                    "investigator-subjective components", "sum"
                ]
                headers = ','.join('""' for h in heads)

                # CSV header
                handle.write(
                    '"NCT","first received yrs ago","last update yrs ago","has completion","completion and status compatible","criteria",%s\n'
                    % ','.join(['"%s"' % h for h in heads]))

                # CSV rows
                i = 0
                every = 1
                for study in found_trials:
                    if 0 == i % every:
                        study.load()
                        handle.write(
                            '"%s","","","","","%s",%s\n' %
                            (study.nct, study.criteria_text.replace(
                                '"', '""'), headers))
                    i += 1

    # render index
    template = _jinja_templates.get_template('index.html')
    return template.render(cond=cond,
                           trials=trials,
                           csv=csv_name,
                           num=num_trials)
示例#8
0
    # read CSV
    with codecs.open(csv_path, 'r') as handle:
        reader = csv.reader(handle)
        header = reader.next()

        idx_nct = header.index('NCT')
        idx_drop = header.index('criteria')
        idx_first = header.index('first received yrs ago')
        idx_last = header.index('last update yrs ago')

        # open output file
        csv_new = "%s-auto-updated.csv" % os.path.splitext(
            csv_path)[0].replace('-manual', '')
        with codecs.open(csv_new, 'w') as w_handle:
            lilly = LillyCOI()
            # ref_date = datetime.datetime(2013, 7, 30)		# this can NOT be used against date last updated, of course
            ref_date = datetime.datetime.now()

            writer = csv.writer(w_handle)
            header.pop(idx_drop)
            writer.writerow(header)

            # loop trials
            for row in reader:
                trial = lilly.get_trial(row[idx_nct])

                # date calculations
                first = trial.date('firstreceived_date')
                first_y = round((ref_date - first[1]).days / 365.25 *
                                10) / 10 if first[1] else 99
示例#9
0
    # look for the list
    if not os.path.exists(_list_path):
        print 'x>  The list file at %s does not exist' % _list_path
        sys.exit(1)

    # read list
    with codecs.open(_list_path, 'r') as handle:
        ncts = [
            nct.strip() if len(nct.strip()) > 0 else None
            for nct in handle.readlines()
        ]
        assert len(ncts) > 0
        trials = {}
        rows_and_years = []
        lilly = LillyCOI()

        # retrieve from our database
        if not _force_update:
            existing = Study.retrieve(ncts)
            for ex in existing:
                trials[ex.nct] = ex

        # loop trials
        for nct in ncts:
            if not nct:
                continue

            # get the trial fresh via web
            if nct in trials:
                trial = trials[nct]
示例#10
0
		raise Exception("There is no such file (%s)" % csv_path)
	
	# read CSV
	with codecs.open(csv_path, 'r') as handle:
		reader = csv.reader(handle)
		header = reader.next()
		
		idx_nct = header.index('NCT')
		idx_drop = header.index('criteria')
		idx_first = header.index('first received yrs ago')
		idx_last = header.index('last update yrs ago')
		
		# open output file
		csv_new = "%s-auto-updated.csv" % os.path.splitext(csv_path)[0].replace('-manual', '')
		with codecs.open(csv_new, 'w') as w_handle:
			lilly = LillyCOI()
			# ref_date = datetime.datetime(2013, 7, 30)		# this can NOT be used against date last updated, of course
			ref_date = datetime.datetime.now()
			
			writer = csv.writer(w_handle)
			header.pop(idx_drop)
			writer.writerow(header)
			
			# loop trials
			for row in reader:
				trial = lilly.get_trial(row[idx_nct])
				
				# date calculations
				first = trial.date('firstreceived_date')
				first_y = round((ref_date - first[1]).days / 365.25 * 10) / 10 if first[1] else 99
				last = trial.date('lastchanged_date')
示例#11
0
    def _run(self, fields=None, callback=None):
        """ Runs the whole toolchain.
		Currently writes all status to a file associated with run_id. If the
		first word in that file is "error", the process is assumed to have
		stopped. If it is "done" the work here is done.
		"""

        # check prerequisites
        if self.condition is None and self.term is None:
            raise Exception("No 'condition' and no 'term' provided")

        self.assure_run_directory()
        self.status = "Searching for %s trials..." % (
            self.condition if self.condition is not None else self.term)

        # anonymous callback for progress reporting
        def cb(inst, progress):
            if progress > 0:
                self.status = "Fetching (%d%%)" % (100 * progress)

        # make sure we retrieve the properties that we want to analyze
        if self.analyze_keypaths:
            if fields is None:
                fields = []
            fields.extend(self.analyze_keypaths)
            fields.append('eligibility')

        # start the search
        self.status = "Fetching %s trials..." % (
            self.condition if self.condition is not None else self.term)

        lilly = LillyCOI()
        trials = []
        if self.condition is not None:
            trials = lilly.search_for_condition(self.condition, True, fields,
                                                cb)
        else:
            trials = lilly.search_for_term(self.term, True, fields, cb)

        if self.limit and len(trials) > self.limit:
            trials = trials[:self.limit]

        # process found trials
        self.status = "Processing..."
        sqlite = SQLite.get(self.sqlite_db)

        progress = 0
        progress_tot = len(trials)
        progress_each = max(5, progress_tot / 25)
        ncts = []
        num_nlp_trials = 0
        nlp_to_run = set()
        for trial in trials:
            ncts.append(trial.nct)
            trial.analyze_keypaths = self.analyze_keypaths

            if self.catch_exceptions:
                try:
                    trial.codify_analyzables(self.nlp_pipelines,
                                             self.discard_cached)
                except Exception as e:
                    self.status = 'Error processing trial: %s' % e
                    return
            else:
                trial.codify_analyzables(self.nlp_pipelines,
                                         self.discard_cached)

            trial.store()
            self.write_trial(sqlite, trial)

            # make sure we run the NLP pipeline if needed
            to_run = trial.waiting_for_nlp(self.nlp_pipelines)
            if len(to_run) > 0:
                nlp_to_run.update(to_run)
                num_nlp_trials = num_nlp_trials + 1

            # progress
            progress = progress + 1
            if 0 == progress % progress_each:
                self.status = "Processing (%d %%)" % (float(progress) /
                                                      progress_tot * 100)

        sqlite.commit()

        # run the needed NLP pipelines
        success = True
        for nlp in self.nlp_pipelines:
            if nlp.name in nlp_to_run:
                self.status = "Running %s for %d trials (this may take a while)" % (
                    nlp.name, num_nlp_trials)
                if self.catch_exceptions:
                    try:
                        nlp.run()
                    except Exception as e:
                        self.status = "Running %s failed: %s" % (nlp.name,
                                                                 str(e))
                        success = False
                        break
                else:
                    nlp.run()

        # make sure we codified all criteria
        if success:
            for trial in trials:
                trial.codify_analyzables(self.nlp_pipelines, False)

            self.status = 'done'

        # run the callback
        if callback is not None:
            callback(success, trials)