def get_start_end_date(environ, start_response): status = "200 OK" headers = [("Content-type", "text/html; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) start_date, end_date = start_end_date(db, config, start_date=request.start_date, end_date=request.end_date) request.metadata["year"] = "{}-{}".format(start_date, end_date) request["start_date"] = "" request["end_date"] = "" hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) total_results = 0 hits.finish() total_results = len(hits) yield json.dumps({"start_date": start_date, "end_date": end_date, "total_results": total_results}).encode("utf8")
def bibliography(environ, start_response): config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('reports', '')) request = WSGIHandler(environ, config) headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response('200 OK', headers) bibliography_object, hits = bibliography_results(request, config) yield simplejson.dumps(bibliography_object)
def get_bibliography(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) request = WSGIHandler(environ, config) results = landing_page_bibliography(request, config) yield json.dumps(results).encode('utf8')
def alignment_to_text(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig(os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) link = byte_range_to_link(db, config, request) yield simplejson.dumps({"link": link})
def metadata_list(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) metadata = request.term field = request.field yield autocomplete_metadata(metadata, field, db).encode("utf8")
def get_start_end_date(environ, start_response): status = '200 OK' headers = [('Content-type', 'text/html; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) start_date, end_date = start_end_date(db, config, start_date=request.start_date, end_date=request.end_date) yield json.dumps({ "start_date": start_date, "end_date": end_date }).encode('utf8')
def landing_page_content(environ, start_response): status = "200 OK" headers = [("Content-type", "application/json; charset=UTF-8"), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) request = WSGIHandler(environ, config) if request.is_range == "true": if isinstance(request.query, bytes): request_range = request.query.decode("utf8") request_range = request.query.lower().split("-") if len(request_range) == 1: request_range.append(request_range[0]) results = group_by_range(request_range, request, config) else: results = group_by_metadata(request, config) yield results.encode("utf8")
def access_request(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) access, headers = login_access(environ, request, config, headers) start_response(status, headers) if access: yield json.dumps({'access': True}) else: incoming_address, domain_name = access_control.get_client_info(environ) yield json.dumps({ 'access': False, "incoming_address": incoming_address, "domain_name": domain_name }).encode('utf8')
def resolve_cite_service(environ, start_response): config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace("scripts", "")) db = DB(config.db_path + "/data/") request = WSGIHandler(environ, config) c = db.dbh.cursor() q = request.q best_url = config["db_url"] if " - " in q: milestone = q.split(" - ")[0] else: milestone = q milestone_segments = [] last_segment = 0 milestone_prefixes = [] for separator in re.finditer(r" (?!\.)|\.(?! )", milestone): milestone_prefixes += [milestone[:separator.start()]] milestone_segments += [milestone[last_segment:separator.start()]] last_segment = separator.end() milestone_segments += [milestone[last_segment:]] milestone_prefixes += [milestone] print("SEGMENTS", repr(milestone_segments), file=sys.stderr) print("PREFIXES", repr(milestone_prefixes), file=sys.stderr) abbrev_match = None for pos, v in enumerate(milestone_prefixes): print("QUERYING for abbrev = ", v, file=sys.stderr) abbrev_q = c.execute("SELECT * FROM toms WHERE abbrev = ?;", (v, )).fetchone() if abbrev_q: abbrev_match = abbrev_q print("ABBREV", abbrev_match["abbrev"], abbrev_match["philo_id"], file=sys.stderr) doc_obj = ObjectWrapper(abbrev_match["philo_id"].split(), db) nav = nav_query(doc_obj, db) best_match = None for n in nav: if n["head"] == request.q: print("MATCH", n["philo_id"], n["n"], n["head"], file=sys.stderr) best_match = n break if best_match: type_offsets = {"doc": 1, "div1": 2, "div2": 3, "div3": 4, "para": 5} t = best_match["philo_type"] short_id = best_match["philo_id"].split()[:type_offsets[t]] best_url = f.make_absolute_object_link(config, short_id) print("BEST_URL", best_url, file=sys.stderr) status = "302 Found" redirect = config["db_url"] headers = [("Location", best_url)] start_response(status, headers) return ""
def get_neighboring_words(environ, start_response): status = '200 OK' headers = [('Content-type', 'application/json; charset=UTF-8'), ("Access-Control-Allow-Origin", "*")] start_response(status, headers) config = WebConfig( os.path.abspath(os.path.dirname(__file__)).replace('scripts', '')) db = DB(config.db_path + '/data/') request = WSGIHandler(environ, config) try: index = int(request.hits_done) except: index = 0 max_time = int(request.max_time) kwic_words = [] start_time = timeit.default_timer() hits = db.query(request["q"], request["method"], request["arg"], **request.metadata) cursor = db.dbh.cursor() for hit in hits[index:]: word_id = ' '.join([str(i) for i in hit.philo_id]) query = 'select rowid, philo_name, parent from words where philo_id="%s" limit 1' % word_id cursor.execute(query) results = cursor.fetchone() highlighted_text = kwic_hit_object(hit, config, db)["highlighted_text"] highlighted_text = highlighted_text.translate(remove_punctuation_map) highlighted_text = highlighted_text.strip() result_obj = { "left": "", "right": "", "index": index, "q": highlighted_text } left_rowid = results["rowid"] - 10 right_rowid = results["rowid"] + 10 cursor.execute( 'select philo_name, philo_id from words where rowid between ? and ?', (left_rowid, results['rowid'] - 1)) result_obj["left"] = [] for i in cursor: result_obj["left"].append(i['philo_name']) result_obj["left"].reverse() result_obj["left"] = ' '.join(result_obj["left"]) cursor.execute( 'select philo_name, philo_id from words where rowid between ? and ?', (results['rowid'] + 1, right_rowid)) result_obj["right"] = [] for i in cursor: result_obj["right"].append(i['philo_name']) result_obj["right"] = ' '.join(result_obj["right"]) for metadata in config.kwic_metadata_sorting_fields: result_obj[metadata] = hit[metadata].lower() kwic_words.append(result_obj) index += 1 elapsed = timeit.default_timer() - start_time if elapsed > max_time: # avoid timeouts by splitting the query if more than 10 seconds has been spent in the loop break yield json.dumps({ "results": kwic_words, "hits_done": index }).encode('utf8')