import md5 import cPickle as pickle import alchemy from collections import Counter from Levenshtein import jaro_winkler from alchemy import get_config, match from alchemy.schema import * from alchemy.match import commit_inserts, commit_updates from handlers.xml_util import normalize_utf8 from datetime import datetime from sqlalchemy.sql import or_ from sqlalchemy.sql.expression import bindparam from unidecode import unidecode from tasks import bulk_commit_inserts, bulk_commit_updates import sys config = get_config() THRESHOLD = config.get("lawyer").get("threshold") # bookkeeping for blocks blocks = defaultdict(list) id_map = defaultdict(list) nodigits = re.compile(r'[^\d]+') lawyer_dict = {} def get_lawyer_id(obj): """ Returns string representing an lawyer object. Returns obj.organization if
import sqlalchemy import sqlalchemy.orm as orm import sqlalchemy.ext.declarative as declarative import sqlalchemy.sql.expression as expression import geoalchemy_util import itertools import os import datetime import re import alchemy #The config file alchemy uses to store information alchemy_config = alchemy.get_config() #Used to query the database used for input and output alchemy_session = alchemy.fetch_session() #The path to the database which holds geolocation data geo_data_dbpath = os.path.join( alchemy_config.get("location").get('path'), alchemy_config.get("location").get('database')) geo_data_engine = sqlalchemy.create_engine('sqlite:///%s' % geo_data_dbpath) geo_data_session_class = orm.sessionmaker(bind=geo_data_engine) #Used to query the database that holds the data from google #As well as a MaxMinds database containing every city in the world geo_data_session = geo_data_session_class() base = declarative.declarative_base() #Stores an address disambiguated by the Google API class RawGoogle(base): __tablename__ = 'raw_google' id = sqlalchemy.Column("rowid", sqlalchemy.Integer, primary_key=True)
from Levenshtein import jaro_winkler from alchemy import get_config, match from alchemy.schema import * from alchemy.match import commit_inserts, commit_updates from handlers.xml_util import normalize_utf8 from datetime import datetime from sqlalchemy.sql import or_ from sqlalchemy.sql.expression import bindparam from unidecode import unidecode from tasks import bulk_commit_inserts, bulk_commit_updates import multiprocessing import itertools import sys import json config = get_config() THRESHOLD = config.get("assignee").get("threshold") uuid_to_object = {} uuid_to_cleanid = {} letter_to_cleanid = {} uuids_by_cleanidletter = defaultdict(list) grant_uuids = set() app_uuids = set() grantsessiongen = alchemy.session_generator(dbtype="grant") appsessiongen = alchemy.session_generator(dbtype="application") nodigits = re.compile(r"[a-z ]") stoplist = ["the", "of", "and", "a", "an", "at"]
import sys from collections import defaultdict, Counter import pandas as pd import csv import codecs import alchemy from alchemy.match import commit_inserts, commit_updates from tasks import bulk_commit_inserts, bulk_commit_updates global doctype doctype = '' #The config file alchemy uses to store information alchemy_config = alchemy.get_config() #The path to the database which holds geolocation data geo_data_dbpath = os.path.join( alchemy_config.get("location").get('path'), alchemy_config.get("location").get('database')) geo_data_engine = sqlalchemy.create_engine('sqlite:///%s' % geo_data_dbpath) geo_data_session_class = orm.sessionmaker(bind=geo_data_engine) #Used to query the database that holds the data from google #As well as a MaxMinds database containing every city in the world geo_data_session = geo_data_session_class() base = declarative.declarative_base() commit_freq = alchemy_config.get("location").get("commit_frequency") #Stores an address disambiguated by the Google API class RawGoogle(base): __tablename__ = 'raw_google'