def _cached_model_path(cls): env_path = os.environ.get("FORMASAURUS_MODEL") if env_path: return os.path.expanduser(env_path) path = "formasaurus-%s.joblib" % dependencies_string() return at_root(path)
def _cached_model_path(cls): env_path = os.environ.get("FORMASAURUS_MODEL") if env_path: return os.path.expanduser(env_path) path = "formasaurus-%s.joblib" % dependencies_string() return at_root(path)
# -*- coding: utf-8 -*- from __future__ import absolute_import import os import six import joblib from formasaurus import formtype_model, fieldtype_model from formasaurus.html import get_forms, get_fields_to_annotate, load_html from formasaurus.storage import Storage from formasaurus.utils import dependencies_string, at_root, thresholded DEFAULT_DATA_PATH = at_root('data') def extract_forms(tree_or_html, proba=False, threshold=0.05, fields=True): """ Given a lxml tree or HTML source code, return a list of ``(form_elem, form_info)`` tuples. ``form_info`` dicts contain results of :meth:`classify` or :meth:`classify_proba`` calls, depending on ``proba`` parameter. When ``fields`` is False, field type information is not computed. """ return get_instance().extract_forms( tree_or_html=tree_or_html, proba=proba, threshold=threshold, fields=fields, )
# -*- coding: utf-8 -*- from __future__ import absolute_import import os import six from sklearn.externals import joblib from formasaurus import formtype_model, fieldtype_model from formasaurus.html import get_forms, get_fields_to_annotate, load_html from formasaurus.storage import Storage from formasaurus.utils import dependencies_string, at_root, thresholded DEFAULT_DATA_PATH = at_root('data') def extract_forms(tree_or_html, proba=False, threshold=0.05, fields=True): """ Given a lxml tree or HTML source code, return a list of ``(form_elem, form_info)`` tuples. ``form_info`` dicts contain results of :meth:`classify` or :meth:`classify_proba`` calls, depending on ``proba`` parameter. When ``fields`` is False, field type information is not computed. """ return get_instance().extract_forms( tree_or_html=tree_or_html, proba=proba, threshold=threshold, fields=fields, )