Exemplo n.º 1
0
import md5
import cPickle as pickle
import alchemy
from collections import Counter
from Levenshtein import jaro_winkler
from alchemy import get_config, match
from alchemy.schema import *
from alchemy.match import commit_inserts, commit_updates
from handlers.xml_util import normalize_utf8
from datetime import datetime
from sqlalchemy.sql import or_
from sqlalchemy.sql.expression import bindparam
from unidecode import unidecode
from tasks import bulk_commit_inserts, bulk_commit_updates
import sys
config = get_config()

THRESHOLD = config.get("lawyer").get("threshold")

# bookkeeping for blocks
blocks = defaultdict(list)
id_map = defaultdict(list)

nodigits = re.compile(r'[^\d]+')

lawyer_dict = {}


def get_lawyer_id(obj):
    """
    Returns string representing an lawyer object. Returns obj.organization if
Exemplo n.º 2
0
import sqlalchemy
import sqlalchemy.orm as orm
import sqlalchemy.ext.declarative as declarative
import sqlalchemy.sql.expression as expression
import geoalchemy_util
import itertools
import os
import datetime
import re

import alchemy
#The config file alchemy uses to store information
alchemy_config = alchemy.get_config()
#Used to query the database used for input and output
alchemy_session = alchemy.fetch_session()
#The path to the database which holds geolocation data
geo_data_dbpath = os.path.join(
    alchemy_config.get("location").get('path'),
    alchemy_config.get("location").get('database'))
geo_data_engine = sqlalchemy.create_engine('sqlite:///%s' % geo_data_dbpath)
geo_data_session_class = orm.sessionmaker(bind=geo_data_engine)
#Used to query the database that holds the data from google
#As well as a MaxMinds database containing every city in the world
geo_data_session = geo_data_session_class()
base = declarative.declarative_base()


#Stores an address disambiguated by the Google API
class RawGoogle(base):
    __tablename__ = 'raw_google'
    id = sqlalchemy.Column("rowid", sqlalchemy.Integer, primary_key=True)
from Levenshtein import jaro_winkler
from alchemy import get_config, match
from alchemy.schema import *
from alchemy.match import commit_inserts, commit_updates
from handlers.xml_util import normalize_utf8
from datetime import datetime
from sqlalchemy.sql import or_
from sqlalchemy.sql.expression import bindparam
from unidecode import unidecode
from tasks import bulk_commit_inserts, bulk_commit_updates
import multiprocessing
import itertools
import sys
import json

config = get_config()

THRESHOLD = config.get("assignee").get("threshold")

uuid_to_object = {}
uuid_to_cleanid = {}
letter_to_cleanid = {}
uuids_by_cleanidletter = defaultdict(list)

grant_uuids = set()
app_uuids = set()
grantsessiongen = alchemy.session_generator(dbtype="grant")
appsessiongen = alchemy.session_generator(dbtype="application")

nodigits = re.compile(r"[a-z ]")
stoplist = ["the", "of", "and", "a", "an", "at"]
Exemplo n.º 4
0
import sys
from collections import defaultdict, Counter
import pandas as pd
import csv
import codecs


import alchemy
from alchemy.match import commit_inserts, commit_updates
from tasks import bulk_commit_inserts, bulk_commit_updates

global doctype
doctype = ''

#The config file alchemy uses to store information
alchemy_config = alchemy.get_config()
#The path to the database which holds geolocation data
geo_data_dbpath = os.path.join(
    alchemy_config.get("location").get('path'),
    alchemy_config.get("location").get('database'))
geo_data_engine = sqlalchemy.create_engine('sqlite:///%s' % geo_data_dbpath)
geo_data_session_class = orm.sessionmaker(bind=geo_data_engine)
#Used to query the database that holds the data from google
#As well as a MaxMinds database containing every city in the world
geo_data_session = geo_data_session_class()
base = declarative.declarative_base()
commit_freq = alchemy_config.get("location").get("commit_frequency")

#Stores an address disambiguated by the Google API
class RawGoogle(base):
    __tablename__ = 'raw_google'