예제 #1
0
def restart_dataset(key):
	"""
	Run a dataset's query again

	Deletes all underlying datasets, marks dataset as unfinished, and queues a
	job for it.

	:param str key:  Dataset key
	:return:
	"""
	try:
		dataset = DataSet(key=key, db=db)
	except TypeError:
		return error(404, message="Dataset not found.")

	if current_user.get_id() != dataset.parameters.get("user", "") and not current_user.is_admin:
		return error(403, message="Not allowed.")

	if not dataset.is_finished():
		return render_template("error.html", message="This dataset is not finished yet - you cannot re-run it.")

	if "type" not in dataset.parameters:
		return render_template("error.html",
							   message="This is an older dataset that unfortunately lacks the information necessary to properly restart it.")

	for child in dataset.children:
		child.delete()

	dataset.unfinish()
	queue = JobQueue(logger=log, database=db)
	queue.add_job(jobtype=dataset.parameters["type"], remote_id=dataset.key)

	flash("Dataset queued for re-running.")
	return redirect("/results/" + dataset.key + "/")
예제 #2
0
def delete_dataset(key=None):
    """
	Delete a dataset

	Only available to administrators. Deletes a dataset, as well as any
	children linked to it, from 4CAT. Calling this on a dataset that is
	currently being executed is undefined behaviour.

	:request-param str query_key:  ID of the dataset for which to return the status
    :request-param str ?access_token:  Access token; only required if not
                                       logged in currently.

	:return: A dictionary with a successful `status`.

	:return-schema: {type=object,properties={status={type=string}}}

	:return-error 404:  If the dataset does not exist.
	"""
    if not current_user.is_admin():
        return error(403, message="Not allowed")

    dataset_key = request.form.get("key", "") if not key else key

    try:
        dataset = DataSet(key=dataset_key, db=db)
    except TypeError:
        return error(404, error="Dataset does not exist.")

    dataset.delete()
    return jsonify({"status": "success"})
예제 #3
0
파일: api_tool.py 프로젝트: p-charis/4cat
def datasource_script(datasource_id):
	"""
	Get data source query form HTML

	The data source needs to have been loaded as a module with a
	`ModuleCollector`, and also needs to be present in `config.py`. If so, this
	endpoint returns the data source's tool javascript file, if it exists as
	`tool.js` in the data source's `webtool` folder.

	:param datasource_id:  Datasource ID, as specified in the datasource and
						   config.py
	:return: A javascript file
	:return-error 404: If the datasource does not exist.
	"""
	if datasource_id not in backend.all_modules.datasources:
		return error(404, message="Datasource '%s' does not exist" % datasource_id)

	if datasource_id not in config.DATASOURCES:
		return error(404, message="Datasource '%s' does not exist" % datasource_id)

	datasource = backend.all_modules.datasources[datasource_id]
	script_path = datasource["path"].joinpath("webtool", "tool.js")

	if not script_path.exists():
		return error(404, message="Datasource '%s' does not exist" % datasource_id)

	return send_file(str(script_path))
예제 #4
0
def api_thread(datasource, board, thread_id):
    """
	Emulate 4chan thread.json API endpoint

	:param str datasource:  Data source ID
	:param str board:  Board name
	:param int thread_id:  Thread ID

	:request-param str format:  Data format. Can be `json` (default) or `html`.

	:return: Thread data, as a list of `posts`.

	:return-schema: {type=object,properties={posts={type=object,additionalProperties={}}}}

	:return-error 404:  If the thread ID does not exist for the given data source.
	"""
    if datasource not in config.DATASOURCES:
        return error(404, error="Invalid data source")

    thread = db.fetchone(
        "SELECT * FROM threads_" + datasource +
        " WHERE board = %s AND id = %s", (board, thread_id))

    if thread == None:
        return "Thread is not anymore available on the server."

    response = get_thread(datasource, board, thread, db)

    def strip_html(post):
        post["com"] = strip_tags(post.get("com", ""))
        return post

    response["posts"] = [strip_html(post) for post in response["posts"]]

    if not response:
        return error(404, error="No posts available for this datasource")

    elif request.args.get("format", "json") == "html":

        def format(post):
            post["com"] = format_post(post.get("com",
                                               "")).replace("\n", "<br>")
            return post

        response["posts"] = [format(post) for post in response["posts"]]
        metadata = {
            "subject":
            "".join([post.get("sub", "") for post in response["posts"]]),
            "id": response["posts"][0]["no"]
        }
        return render_template("thread.html",
                               datasource=datasource,
                               board=board,
                               posts=response["posts"],
                               thread=thread,
                               metadata=metadata)
    else:
        return jsonify(response)
예제 #5
0
def queue_dataset():
    """
	Queue a 4CAT search query for processing into a dataset

	Requires authentication by logging in or providing a valid access token.
	Request parameters vary by data source. The ones mandated constitute the
	minimum but more may be required.

	:request-param str board:  Board ID to query
	:request-param str datasource:  Data source ID to query
	:request-param str body_match:  String to match in the post body
	:request-param str subject_match:  String to match in the post subject
    :request-param int min_date:  Timestamp marking the beginning of the match
                                  period
    :request-param int max_date:  Timestamp marking the end of the match period
    :request-param str ?access_token:  Access token; only required if not
                                       logged in currently.

	:return str:  The dataset key, which may be used to later retrieve dataset
	              status and results.
	:return-error 404: If the datasource does not exist.
	"""

    datasource_id = request.form.get("datasource", "")
    if datasource_id not in backend.all_modules.datasources:
        return error(404,
                     message="Datasource '%s' does not exist" % datasource_id)

    search_worker_id = datasource_id + "-search"
    if search_worker_id not in backend.all_modules.workers:
        return error(404,
                     message="Datasource '%s' has no search interface" %
                     datasource_id)

    search_worker = backend.all_modules.workers[search_worker_id]

    if hasattr(search_worker["class"], "validate_query"):
        try:
            sanitised_query = search_worker["class"].validate_query(
                request.form.to_dict(), request, current_user)
        except QueryParametersException as e:
            return "Invalid query. %s" % e
    else:
        sanitised_query = request.form.to_dict()

    sanitised_query["user"] = current_user.get_id()
    sanitised_query["datasource"] = datasource_id
    sanitised_query["type"] = search_worker_id

    dataset = DataSet(parameters=sanitised_query, db=db, type="search")

    if hasattr(search_worker["class"], "after_create"):
        search_worker["class"].after_create(sanitised_query, dataset, request)

    queue.add_job(jobtype=search_worker_id, remote_id=dataset.key)

    return dataset.key
예제 #6
0
def api_board_catalog(datasource, board):
    """
	Emulate 4chan API /[board]/catalog.json endpoint

    :param str datasource:  Data source ID
	:param str board:  Board to get index for
	:return:  Board catalog, up to 150 threads divided over a list of
	          20-thread pages, each page having a `page` number and a
	          list of `threads`, each thread containing the first post.

	:return-schema:{type=array,items={type=object,properties={
		page={type=integer},
		threads={type=array,items={type=object,properties={
			no={type=integer},
			last_modified={type=integer},
			replies={type=integer}
		}}}
	}}}

	:return-error 404:  If the board does not exist for the given datasource.
	"""
    if datasource not in config.DATASOURCES:
        return error(404, error="Invalid data source")

    threads = db.fetchall(
        "SELECT * FROM threads_" + datasource +
        " WHERE board = %s ORDER BY is_sticky DESC, timestamp_modified DESC LIMIT 150",
        (board, ))

    if not threads:
        return error(404, error="No threads available for this datasource")

    response = []
    page = 1
    while len(threads) > 0:
        threads = threads[20:]
        page_threads = []

        for thread in threads:
            thread = get_thread(datasource, board, thread, db, limit=6)
            if not thread:
                log.error(
                    "Thread %s is in database and was requested via API but has no posts."
                    % thread)
                continue

            thread = thread["posts"]
            first_post = thread[0]
            if len(thread) > 1:
                first_post["last_replies"] = thread[1:6]

            page_threads.append(first_post)

        response.append({"page": page, "threads": page_threads})

    return jsonify(response)
예제 #7
0
def datasource_form(datasource_id):
    """
	Get data source query form HTML

	The data source needs to have been loaded as a module with a
	`ModuleCollector`, and also needs to be present in `config.py`. If so, this
	endpoint returns the HTML form configured by the template in the
	data source's folder, or a default tool template if that one is not
	available.

	If a file `tool.js` is available in the data source's `webtool` folder, the
	response will indicate that a javascript file is available for this data
	source.

	:param datasource_id:  Data source ID, as specified in the data source and
						   config.py
	:return: A JSON object with the `html` of the template,
	         a boolean `javascript` determining whether javascript should be
	         loaded for this template, a `status` code and the `datasource` ID.

	:return-error 404: If the datasource does not exist.
	"""
    if datasource_id not in backend.all_modules.datasources:
        return error(404,
                     message="Datasource '%s' does not exist" % datasource_id)

    if datasource_id not in config.DATASOURCES:
        return error(404,
                     message="Datasource '%s' does not exist" % datasource_id)

    datasource = backend.all_modules.datasources[datasource_id]
    template_path = datasource["path"].joinpath("webtool", "query-form.html")

    if not template_path.exists():
        template_path = Path("tool_default.html")

    javascript_path = datasource["path"].joinpath("webtool", "tool.js")
    has_javascript = javascript_path.exists()

    if not template_path.exists():
        return error(404,
                     message="No interface exists for datasource '%s'" %
                     datasource_id)

    html = render_template_string(
        template_path.read_text(),
        datasource_id=datasource_id,
        datasource_config=config.DATASOURCES[datasource_id],
        datasource=datasource)

    return jsonify({
        "status": "success",
        "datasource": datasource_id,
        "has_javascript": has_javascript,
        "html": html
    })
예제 #8
0
def datasource_call(datasource, action):
    """
	Call datasource function

	Datasources may define custom API calls as functions in a file
	'webtool/views.py'. These are then available as 'actions' with this API
	endpoint. Any GET parameters are passed as keyword arguments to the
	function.

	:param str action:  Action to call
	:return:  A JSON object
	"""
    # allow prettier URLs
    action = action.replace("-", "_")

    if datasource not in backend.all_modules.datasources:
        return error(404, error="Datasource not found.")

    forbidden_call_name = re.compile(r"[^a-zA-Z0-9_]")
    if forbidden_call_name.findall(action) or action[0:2] == "__":
        return error(406,
                     error="Datasource '%s' has no call '%s'" %
                     (datasource, action))

    folder = backend.all_modules.datasources[datasource]["path"]
    views_file = folder.joinpath("webtool", "views.py")
    if not views_file.exists():
        return error(406,
                     error="Datasource '%s' has no call '%s'" %
                     (datasource, action))

    datasource_id = backend.all_modules.datasources[datasource]["id"]
    datasource_calls = importlib.import_module("datasources.%s.webtool.views" %
                                               datasource_id)

    if not hasattr(datasource_calls, action) or not callable(
            getattr(datasource_calls, action)):
        return error(406,
                     error="Datasource '%s' has no call '%s'" %
                     (datasource, action))

    parameters = request.args if request.method == "GET" else request.form
    response = getattr(datasource_calls,
                       action).__call__(request, current_user, **parameters)

    if not response:
        return jsonify({"success": False})
    elif response is True:
        return jsonify({"success": True})
    else:
        return jsonify({"success": True, "data": response})
예제 #9
0
파일: api_tool.py 프로젝트: p-charis/4cat
def delete_dataset(key=None):
	"""
	Delete a dataset

	Only available to administrators and dataset owners. Deletes a dataset, as
	well as any children linked to it, from 4CAT. Also tells the backend to stop
	any jobs dealing with the dataset.

	:request-param str key:  ID of the dataset to delete
    :request-param str ?access_token:  Access token; only required if not
    logged in currently.

	:return: A dictionary with a successful `status`.

	:return-schema: {type=object,properties={status={type=string}}}

	:return-error 404:  If the dataset does not exist.
	"""
	dataset_key = request.form.get("key", "") if not key else key

	try:
		dataset = DataSet(key=dataset_key, db=db)
	except TypeError:
		return error(404, error="Dataset does not exist.")

	if not current_user.is_admin() and not current_user.get_id() == dataset.parameters.get("user"):
		return error(403, message="Not allowed")

	# if there is an active or queued job for some child dataset, cancel and
	# delete it
	children = dataset.get_all_children()
	for child in children:
		try:
			job = Job.get_by_remote_ID(child.key, database=db, jobtype=child.type)
			call_api("cancel-job", {"remote_id": child.key, "jobtype": dataset.type, "level": BasicWorker.INTERRUPT_CANCEL})
			job.finish()
		except JobNotFoundException:
			pass

	# now cancel and delete the job for this one (if it exists)
	try:
		job = Job.get_by_remote_ID(dataset.key, database=db, jobtype=dataset.type)
		call_api("cancel-job", {"remote_id": dataset.key, "jobtype": dataset.type, "level": BasicWorker.INTERRUPT_CANCEL})
	except JobNotFoundException:
		pass

	# and delete the dataset and child datasets
	dataset.delete()

	return jsonify({"status": "success", "key": dataset.key})
예제 #10
0
def api_board(datasource, board):
    """
	Emulate 4chan API /[board]/threads.json endpoint

    :param str datasource:  Data source ID
	:param str board:  Board to get index for
	:return:  Thread index for board, as a list of pages, each page containing
	          a page number `page` and a list of `threads`, each thread having
	          the keys `no` and `last_modified`.

	:return-schema:{type=array,items={type=object,properties={
		page={type=integer},
		threads={type=array,items={type=object,properties={
			no={type=integer},
			last_modified={type=integer},
			replies={type=integer}
		}}}
	}}}

	:return-error 404:  If the board does not exist for the given datasource.
	"""
    if datasource not in config.DATASOURCES:
        return error(404, error="Invalid data source")

    threads = db.fetchall(
        "SELECT * FROM threads_" + datasource +
        " WHERE board = %s ORDER BY is_sticky DESC, timestamp_modified DESC LIMIT 200",
        (board, ))

    if not threads:
        return error(404, error="No threads available for this datasource")

    response = []
    page = 1
    while len(threads) > 0:
        chunk = threads[:20]
        threads = threads[20:]

        response.append({
            "page":
            page,
            "threads": [{
                "no": thread["id"],
                "last_modified": thread["timestamp_modified"]
            } for thread in chunk]
        })

        page += 1

    return jsonify(response)
예제 #11
0
파일: api_tool.py 프로젝트: p-charis/4cat
def datasource_form(datasource_id):
	"""
	Get data source query form HTML

	The data source needs to have been loaded as a module with a
	`ModuleCollector`, and also needs to be present in `config.py`. If so, this
	endpoint returns the HTML form configured by the template in the
	data source's folder.

	If a file `tool.js` is available in the data source's `webtool` folder, the
	response will indicate that a javascript file is available for this data
	source.

	If the data source has no search worker or its search worker does not have
	any parameters defined, this returns a 404 Not Found status.

	:param datasource_id:  Data source ID, as specified in the data source and
						   config.py
	:return: A JSON object with the `html` of the template,
	         a boolean `has_javascript` determining whether javascript should be
	         loaded for this template, a `status` code and the `datasource` ID.

	:return-error 404: If the datasource does not exist.
	"""
	if datasource_id not in backend.all_modules.datasources:
		return error(404, message="Datasource '%s' does not exist" % datasource_id)

	if datasource_id not in config.DATASOURCES:
		return error(404, message="Datasource '%s' does not exist" % datasource_id)

	datasource = backend.all_modules.datasources[datasource_id]
	worker = backend.all_modules.workers.get(datasource_id + "-search")

	if not worker:
		return error(404, message="Datasource '%s' has no search worker" % datasource_id)

	worker_class = backend.all_modules.load_worker_class(worker)

	if not hasattr(worker_class, "options"):
		return error(404, message="Datasource '%s' has no dataset parameter options defined" % datasource_id)

	form = render_template("create-dataset-option.html", options=worker_class.options)
	javascript_path = datasource["path"].joinpath("webtool", "tool.js")
	has_javascript = javascript_path.exists()

	html = render_template_string(form, datasource_id=datasource_id,
								  datasource_config=config.DATASOURCES[datasource_id], datasource=datasource)

	return jsonify({"status": "success", "datasource": datasource_id, "has_javascript": has_javascript, "html": html})
예제 #12
0
def preview_csv(key):
    """
	Preview a CSV file

	Simply passes the first 25 rows of a dataset's csv result file to the
	template renderer.

	:param str key:  Dataset key
	:return:  HTML preview
	"""
    try:
        dataset = DataSet(key=key, db=db)
    except TypeError:
        return error(404, "Dataset not found.")

    try:
        with dataset.get_results_path().open(encoding="utf-8") as csvfile:
            rows = []
            reader = csv.reader(csvfile)
            while len(rows) < 25:
                try:
                    row = next(reader)
                    rows.append(row)
                except StopIteration:
                    break
    except FileNotFoundError:
        abort(404)

    return render_template("result-csv-preview.html",
                           rows=rows,
                           filename=dataset.get_results_path().name)
예제 #13
0
파일: api_tool.py 프로젝트: p-charis/4cat
def available_processors():
	"""
	Get processors available for a dataset

	:request-param string key:  Dataset key to get processors for
	:return: An object containing the `error` if the request failed, or a list
	         of processors, each with a `name`, a `type` ID, a
	         `description` of what it does, the `extension` of the file it
	         produces, a `category` name, what types of datasets it `accepts`,
	         and a list of `options`, if applicable.

	:return-schema: {type=array,items={type=object,properties={
		name={type=string},
		type={type=string},
		description={type=string},
		extension={type=string},
		category={type=string},
		accepts={type=array,items={type=string}}
	}}}

	:return-error 404:  If the dataset does not exist.
	"""
	try:
		dataset = DataSet(key=request.args.get("key"), db=db)
	except TypeError:
		return error(404, error="Dataset does not exist.")

	# Class type is not JSON serialisable
	processors = dataset.get_available_processors()
	for key, value in processors.items():
		if "class" in value:
			del value["class"]

	return jsonify(processors)
예제 #14
0
파일: api_tool.py 프로젝트: p-charis/4cat
def toggle_favourite(key):
	"""
	'Like' a dataset

	Marks the dataset as being liked by the currently active user, which can be
	used for organisation in the front-end.

	:param str key: Key of the dataset to mark as favourite.

	:return: A JSON object with the status of the request
	:return-schema: {type=object,properties={success={type=boolean},favourite_status={type=boolean}}}

	:return-error 404:  If the dataset key was not found
	"""
	try:
		dataset = DataSet(key=key, db=db)
	except TypeError:
		return error(404, error="Dataset does not exist.")

	current_status = db.fetchone("SELECT * FROM users_favourites WHERE name = %s AND key = %s",
								 (current_user.get_id(), dataset.key))
	if not current_status:
		db.insert("users_favourites", data={"name": current_user.get_id(), "key": dataset.key})
		return jsonify({"success": True, "favourite_status": True})
	else:
		db.delete("users_favourites", where={"name": current_user.get_id(), "key": dataset.key})
		return jsonify({"success": True, "favourite_status": False})
예제 #15
0
def check_processor():
    """
	Check processor status

	:request-param str subqueries:  A JSON-encoded list of dataset keys to get
	                                the status of
	:return: A list of dataset data, with each dataset an item with a `key`,
	        whether it had `finished`, a `html` snippet containing details, and
	        a `url` at which the result may be downloaded when finished.

	:return-schema:{type=array,items={type=object,properties={
		key={type=string},
		finished={type=boolean},
		html={type=string},
		url={type=string}
	}}}

	:return-error 406:  If the list of subqueries could not be parsed.
	"""
    try:
        keys = json.loads(request.args.get("subqueries"))
    except (TypeError, json.decoder.JSONDecodeError):
        return error(406,
                     error="Unexpected format for child dataset key list.")

    children = []

    for key in keys:
        try:
            dataset = DataSet(key=key, db=db)
        except TypeError:
            continue

        genealogy = dataset.get_genealogy()
        parent = genealogy[-2]
        top_parent = genealogy[0]

        children.append({
            "key":
            dataset.key,
            "finished":
            dataset.is_finished(),
            "html":
            render_template("result-child.html",
                            child=dataset,
                            dataset=parent,
                            query=dataset.get_genealogy()[0],
                            parent_key=top_parent.key,
                            processors=backend.all_modules.processors),
            "resultrow_html":
            render_template("result-result-row.html", dataset=top_parent),
            "url":
            "/result/" + dataset.data["result_file"]
        })

    return jsonify(children)
예제 #16
0
def api_board_page(datasource, board, page):
    """
	Emulate 4chan API /[board]/[page].json endpoint

    :param str datasource:  Data source ID
	:param str board:  Board to get index for
	:param int page:  Page to show
	:return:  A page containing a list of `threads`, each thread a list of
	          `posts`.

	:return-schema:{type=object,properties={
		threads={type=array,items={type=object,properties={
			posts={type=array,items={type=object,additionalProperties={}}}
		}}}
	}}

	:return-error 404:  If the board does not exist for the given datasource.
	"""
    if datasource not in config.DATASOURCES:
        return error(404, error="Invalid data source")

    try:
        page = int(page)
    except ValueError:
        return error(404, error="Invalid page number")

    limit = "LIMIT 15 OFFSET %i" % ((int(page) - 1) * 15)
    threads = db.fetchall(
        "SELECT * FROM threads_" + datasource +
        " WHERE board = %s ORDER BY is_sticky DESC, timestamp_modified DESC " +
        limit, (board, ))

    if not threads:
        return error(404, error="No threads available for this datasource")

    response = {
        "threads":
        [get_thread(datasource, board, thread, db) for thread in threads]
    }

    return jsonify(response)
예제 #17
0
def request_token():
    """
	Request an access token

	Requires that the user is currently logged in to 4CAT.

	:return: An object with one item `token`

	:return-schema={type=object,properties={token={type=string}}}

	:return-error 403:  If the user is logged in with an anonymous account.
	"""
    if current_user.get_id() == "autologin":
        # access tokens are only for 'real' users so we can keep track of who
        # (ab)uses them
        return error(403,
                     error="Anonymous users may not request access tokens.")

    token = db.fetchone(
        "SELECT * FROM access_tokens WHERE name = %s AND (expires = 0 OR expires > %s)",
        (current_user.get_id(), int(time.time())))

    if token:
        token = token["token"]
    else:
        token = current_user.get_id() + str(time.time())
        token = hashlib.sha256(token.encode("utf8")).hexdigest()
        token = {
            "name": current_user.get_id(),
            "token": token,
            "expires": int(time.time()) + (365 * 86400)
        }

        # delete any expired tokens
        db.delete("access_tokens", where={"name": current_user.get_id()})

        # save new token
        db.insert("access_tokens", token)

    if request.args.get("forward"):
        # show HTML page
        return redirect(url_for("show_access_tokens"))
    else:
        # show JSON response (by default)
        return jsonify(token)
예제 #18
0
def get_archive(datasource, board):
	"""
	Emulate 4chan API /[board]/archive.json endpoint

	:param str datasource:  Data source ID
	:param board: Board to get list of archived thread IDs for
	:return:  Thread archive, a list of threads IDs of threads within this
	          board.

	:return-schema: {type=array,items={type=integer}}

	:return-error 404: If the datasource does not exist.
	"""
	if datasource not in config.DATASOURCES:
		return error(404, error="Invalid data source")

	threads = db.fetchall(
		"SELECT id FROM threads_" + datasource + " WHERE board = %s AND timestamp_archived > 0 ORDER BY timestamp_archived ASC",
		(board,))
	return jsonify([thread["id"] for thread in threads])
예제 #19
0
def live_stats():
	if not current_user.is_admin():
		return error(403, message="This page is off-limits to you.")

	worker_stats = call_api("workers")["response"]
	datasources = all_modules.datasources

	for id in datasources:
		del datasources[id]["path"]

	workers = {}
	for worker in worker_stats:
		if worker not in all_modules.workers or worker_stats[worker] == 0:
			continue
		workers[worker] = {
			"id": worker,
			"name": all_modules.workers[worker]["name"],
			"active": worker_stats[worker]
		}

	return jsonify({"workers": workers, "datasources": datasources})
예제 #20
0
def get_boards(datasource):
    """
	Get available boards in datasource

	:param datasource:  The datasource for which to acquire the list of available
	                  boards.
	:return:  A list containing a list of `boards`, as string IDs.

	:return-schema: {type=object,properties={
		boards={type=array,items={type=object,properties={
			board={type=string}
		}}}
	}}

	:return-error 404: If the datasource does not exist.
	"""
    if datasource not in config.DATASOURCES:
        return error(404, error="Invalid data source")

    boards = db.fetchall("SELECT DISTINCT board FROM threads_" + datasource)
    return jsonify({"boards": [{"board": board["board"]} for board in boards]})
예제 #21
0
def add_user():
	if not current_user.is_admin():
		return error(403, message="This page is off-limits to you.")

	response = {"success": False}

	email = request.form.get("email", request.args.get("email", "")).strip()

	if not email or not re.match(r"[^@]+\@.*?\.[a-zA-Z]+", email):
		response = {**response, **{"message": "Please provide a valid e-mail address."}}
	else:
		username = email
		try:
			db.insert("users", data={"name": username, "timestamp_token": int(time.time())})

			user = User.get_by_name(username)
			if user is None:
				response = {**response, **{"message": "User was created but could not be instantiated properly."}}
			else:
				try:
					user.email_token(new=True)
					response["success"] = True
					response = {**response, **{
						"message": "An e-mail containing a link through which the registration can be completed has been sent to %s." % username}}
				except RuntimeError as e:
					response = {**response, **{
						"message": "User was created but the registration e-mail could not be sent to them (%s)." % e}}
		except psycopg2.IntegrityError:
			db.rollback()
			response = {**response, **{"message": "Error: User %s already exists." % username}}

	if request.args.get("format", None) == "html":
		return render_template("error.html", message=response["message"],
							   title=("New account created" if response["success"] else "Error"))
	else:
		return jsonify(response)
예제 #22
0
def cp_index():
	if not current_user.is_admin():
		return error(403, message="This page is off-limits to you.")

	return render_template("controlpanel/index.html")
예제 #23
0
def add_user():
    """
	Create a new user

	Sends the user an e-mail with a link through which they can set their
	password.

	:return: Either an html page with a message, or a JSON response, depending
	on whether ?format == html
	"""
    if not current_user.is_authenticated or not current_user.is_admin():
        return error(403, message="This page is off-limits to you.")

    response = {"success": False}

    email = request.form.get("email", request.args.get("email", "")).strip()
    fmt = request.form.get("format", request.args.get("format", "")).strip()
    force = request.form.get("force", request.args.get("force", None))

    if not email or not re.match(r"[^@]+\@.*?\.[a-zA-Z]+", email):
        response = {
            **response,
            **{
                "message": "Please provide a valid e-mail address."
            }
        }
    else:
        username = email
        try:
            db.insert("users",
                      data={
                          "name": username,
                          "timestamp_token": int(time.time())
                      })

            user = User.get_by_name(username)
            if user is None:
                response = {
                    **response,
                    **{
                        "message":
                        "User was created but could not be instantiated properly."
                    }
                }
            else:
                try:
                    user.email_token(new=True)
                    response["success"] = True
                    response = {
                        **response,
                        **{
                            "message":
                            "An e-mail containing a link through which the registration can be completed has been sent to %s." % username
                        }
                    }
                except RuntimeError as e:
                    response = {
                        **response,
                        **{
                            "message":
                            "User was created but the registration e-mail could not be sent to them (%s)." % e
                        }
                    }
        except psycopg2.IntegrityError:
            db.rollback()
            if not force:
                response = {
                    **response,
                    **{
                        "message":
                        'Error: User %s already exists. If you want to re-create the user and re-send the registration e-mail, use [this link](/admin/add-user?email=%s&force=1&format=%s).' % (username, username, fmt)
                    }
                }
            else:
                # if a user does not use their token in time, maybe you want to
                # be a benevolent admin and give them another change, without
                # having them go through the whole signup again
                user = User.get_by_name(username)
                db.update("users",
                          data={"timestamp_token": int(time.time())},
                          where={"name": username})

                try:
                    user.email_token(new=True)
                    response["success"] = True
                    response = {
                        **response,
                        **{
                            "message":
                            "A new registration e-mail has been sent to %s." % username
                        }
                    }
                except RuntimeError as e:
                    response = {
                        **response,
                        **{
                            "message":
                            "Token was reset registration e-mail could not be sent to them (%s)." % e
                        }
                    }

    if fmt == "html":
        return render_template(
            "error.html",
            message=response["message"],
            title=("New account created" if response["success"] else "Error"))
    else:
        return jsonify(response)
예제 #24
0
def reject_user():
    """
	(Politely) reject an account request

	Sometimes, account requests need to be rejected. If you want to let the
	requester know of the rejection, this is the route to use :-)

	:return: HTML form, or message containing the e-mail send status
	"""
    if not current_user.is_authenticated or not current_user.is_admin():
        return error(403, message="This page is off-limits to you.")

    email_address = request.form.get("email", request.args.get("email",
                                                               "")).strip()
    name = request.form.get("name", request.args.get("name", "")).strip()
    form_message = request.form.get("message", request.args.get("message",
                                                                "")).strip()

    incomplete = []
    if not email_address:
        incomplete.append("email")

    if not name:
        incomplete.append(name)

    if not form_message:
        incomplete.append(form_message)

    if incomplete:
        if not form_message:
            form_answer = Path(
                config.PATH_ROOT,
                "webtool/templates/account/reject-template.html")
            form_message = "" if not form_answer.exists() else render_template(
                "account/reject-template.html", email=email_address, name=name)

        return render_template("account/reject.html",
                               email=email_address,
                               name=name,
                               message=form_message,
                               incomplete=incomplete)

    email = MIMEMultipart("alternative")
    email["From"] = config.NOREPLY_EMAIL
    email["To"] = email_address
    email["Subject"] = "Your %s account request" % config.TOOL_NAME

    try:
        html_message = markdown.markdown(form_message)

        email.attach(MIMEText(form_message, "plain"))
        email.attach(MIMEText(html_message, "html"))

        with smtplib.SMTP(config.MAILHOST) as smtp:
            smtp.sendmail(config.NOREPLY_EMAIL, [email_address],
                          email.as_string())
    except (smtplib.SMTPException, ConnectionRefusedError) as e:
        return render_template("error.html",
                               message="Could not send e-mail to %s: %s" %
                               (email_address, e),
                               title="Error sending rejection")

    return render_template("error.html",
                           message="Rejection sent to %s." % email_address,
                           title="Rejection sent")
예제 #25
0
파일: api_tool.py 프로젝트: p-charis/4cat
def check_dataset():
	"""
	Check dataset status

	Requires authentication by logging in or providing a valid access token.

	:request-param str key:  ID of the dataset for which to return the status
	:return: Dataset status, containing the `status`, `query`, number of `rows`,
	         the dataset `key`, whether the dataset is `done`, the `path` of the
	         result file and whether the dataset is `empty`.

	:return-schema: {
		type=object,
		properties={
			status={type=string},
			query={type=string},
			rows={type=integer},
			key={type=string},
			done={type=boolean},
			path={type=string},
			empty={type=boolean},
			is_favourite={type=boolean}
		}
	}

	:return-error 404:  If the dataset does not exist.
	"""
	dataset_key = request.args.get("key")
	try:
		dataset = DataSet(key=dataset_key, db=db)
	except TypeError:
		return error(404, error="Dataset does not exist.")

	results = dataset.check_dataset_finished()
	if results == 'empty':
		dataset_data = dataset.data
		dataset_data["parameters"] = json.loads(dataset_data["parameters"])
		path = False
	elif results:
		# Return absolute folder when using localhost for debugging
		path = results.name
		dataset_data = dataset.data
		dataset_data["parameters"] = json.loads(dataset_data["parameters"])
	else:
		path = ""

	status = {
		"status": dataset.get_status(),
		"status_html": render_template("result-status.html", dataset=dataset),
		"label": dataset.get_label(),
		"query": dataset.data["query"],
		"rows": dataset.data["num_rows"],
		"key": dataset_key,
		"done": True if dataset.is_finished() else False,
		"path": path,
		"empty": (dataset.data["num_rows"] == 0),
		"is_favourite": (db.fetchone("SELECT COUNT(*) AS num FROM users_favourites WHERE name = %s AND key = %s",
									 (current_user.get_id(), dataset.key))["num"] > 0)
	}

	return jsonify(status)
예제 #26
0
파일: api_tool.py 프로젝트: p-charis/4cat
def queue_dataset():
	"""
	Queue a 4CAT search query for processing into a dataset

	Requires authentication by logging in or providing a valid access token.
	Request parameters vary by data source. The ones mandated constitute the
	minimum but more may be required.

	:request-param str board:  Board ID to query
	:request-param str datasource:  Data source ID to query
	:request-param str body_match:  String to match in the post body
	:request-param str subject_match:  String to match in the post subject
    :request-param int min_date:  Timestamp marking the beginning of the match
                                  period
    :request-param int max_date:  Timestamp marking the end of the match period
    :request-param str ?access_token:  Access token; only required if not
                                       logged in currently.

	:return str:  The dataset key, which may be used to later retrieve dataset
	              status and results.
	:return-error 404: If the datasource does not exist.
	"""

	datasource_id = request.form.get("datasource", "")
	if datasource_id not in backend.all_modules.datasources:
		return error(404, message="Datasource '%s' does not exist" % datasource_id)

	search_worker_id = datasource_id + "-search"
	if search_worker_id not in backend.all_modules.workers:
		return error(404, message="Datasource '%s' has no search interface" % datasource_id)

	search_worker = backend.all_modules.workers[search_worker_id]
	worker_class = backend.all_modules.load_worker_class(search_worker)

	if hasattr(worker_class, "validate_query"):
		try:
			# first sanitise values
			sanitised_query = UserInput.parse_all(worker_class.options, request.form.to_dict(), silently_correct=False)

			# then validate for this particular datasource
			sanitised_query = worker_class.validate_query(sanitised_query, request, current_user)
		except QueryParametersException as e:
			return "Invalid query. %s" % e
	else:
		raise NotImplementedError("Data sources MUST sanitise input values with validate_query")

	sanitised_query["user"] = current_user.get_id()
	sanitised_query["datasource"] = datasource_id
	sanitised_query["type"] = search_worker_id

	sanitised_query["pseudonymise"] = bool(request.form.to_dict().get("pseudonymise", False))

	extension = worker_class.extension if hasattr(worker_class, "extension") else "csv"
	dataset = DataSet(parameters=sanitised_query, db=db, type=search_worker_id, extension=extension)

	if hasattr(worker_class, "after_create"):
		worker_class.after_create(sanitised_query, dataset, request)

	queue.add_job(jobtype=search_worker_id, remote_id=dataset.key)

	return dataset.key
예제 #27
0
def process_standalone(processor):
	"""
	Run a standalone processor

	This bypasses the usual 4CAT query-processor structure and allows running
	any available processor (see the `/api/get-standalone-processors/`
	endpoint) with one API call. The data is returned immediately and not saved
	server-side.

	Requires authentication.

	:param str processor:  ID of the processor to run on incoming data

	:request-body object data:  Data to process, a JSON-formatted list of
	objects with each object having at least they keys `post_id`,
	`thread_id`, body`, and `author`.

	:request-schema data: {
		type=object,
		properties={
			post_id={type=string},
			thread_id={type=string},
			body={type=string},
			author={type=string}
		}
	}

    :request-param str ?access_token:  Access token; only required if not
                                       logged in currently.

	:return:  A JSON object containing the processed data, with a
	processor-specific structure.

	:return-schema: {
		type=object,
		additionalProperties={}
	}

	:return-error 402: If an invalid processor is requested, or if the input is
	not properly-formatted JSON.
	:return-error 503: If too many other requests are currently being handled,
	so that the server does not have the capacity to deal with this request
	"""
	processors = get_standalone_processors().get_json()

	if processor not in processors:
		return error(402, error="Processor '%s' is not available" % processor)

	if not request.is_json:
		return error(402, error="This API endpoint only accepts JSON-formatted data as input")

	try:
		input = request.get_json(force=True)
	except json.JSONDecodeError:
		return error(402, error="JSON decoding error")

	# check file integrity
	required = ("id", "thread_id", "body", "author")
	try:
		for row in input:
			for field in required:
				if field not in row:
					return error(402, error="Input is valid JSON, but not a list of data objects (missing field '%s')" % field)
	except TypeError:
		return error(402, error="Input is valid JSON, but not a list of data objects")

	if not input:
		return error(402, error="Input is empty")

	# ok, valid input!
	temp_dataset = DataSet(extension="csv", type="standalone", parameters={"user": current_user.get_id(), "after": [processor]}, db=db)
	temp_dataset.finish(len(input))

	# make sure the file is deleted later, whichever way this request is
	# ultimately handled
	@after_this_request
	def delete_temporary_dataset(response):
		temp_dataset.delete() # also deletes children!
		return response

	# write the input as a csv file so it can be accessed as normal by
	# processors
	result_file = temp_dataset.get_results_path()
	with result_file.open("w") as temp_csv:
		writer = csv.DictWriter(temp_csv, fieldnames=required)
		writer.writeheader()
		for row in input:
			writer.writerow({field: row[field] for field in required})

	# queue the postprocessor
	metadata = processors[processor]
	processed = DataSet(extension=metadata["extension"], type=processor, parent=temp_dataset.key, db=db)

	queue = JobQueue(database=db, logger=log)
	job = queue.add_job(processor, {}, processed.key)
	place_in_queue = queue.get_place_in_queue(job)
	if place_in_queue > 5:
		job.finish()
		return error(code=503, error="Your request could not be handled as there are currently %i other jobs of this type in the queue. Please try again later." % place_in_queue)

	# wait up to half a minute for the job to be taken up
	# if not, tell the user to try again later

	start = time.time()
	while True:
		if time.time() > start + 30:
			job.finish()
			return error(code=503, error="The server is currently too busy to handle your request. Please try again later.")

		if queue.get_place_in_queue(job) != 0:
			time.sleep(2)
			continue
		else:
			break

	# job currently being processed, wait for it to finish
	while True:
		try:
			job = Job.get_by_remote_ID(job.data["remote_id"], db, processor)
		except JobNotFoundException:
			break

		if not job.is_finished:
			time.sleep(2)
		else:
			break

	# job finished, send file - temporary datasets will be cleaned up by
	# after_this_request function defined earlier
	return send_file(processed.get_results_path(), as_attachment=True)