import requests import config import store CONFIG_FILE = os.environ.get("PYPI_CONFIG", os.path.join(prefix, 'config.ini')) conf = config.Config(CONFIG_FILE) if conf.database_releases_index_name is None or conf.database_releases_index_url is None: sys.exit() new_index = "trove-%s-%s" % (conf.database_releases_index_name, int(time.time())) print("creating new index %s" % (new_index,)) store = store.Store(conf) store.open() cursor = store._conn.cursor(cursor_factory=RealDictCursor) cursor.execute("BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE") cursor.execute("SET statement_timeout = '600s'") cursor.execute("select r.name as name, rl.summary as summary, array_agg(distinct trove_id) as trove_classifiers, array_agg(distinct t.l2) || array_agg(distinct t.l3) || array_agg(distinct t.l4) || array_agg(distinct t.l5) as categories from release_classifiers r join releases rl on (rl.name=r.name and rl.version=r.version) join trove_classifiers t on r.trove_id=t.id where not rl._pypi_hidden group by r.name, rl.summary") while True: packages = cursor.fetchmany(1000) if len(packages) == 0: break operations = [] for package in packages: operations.append(json.dumps({"index": {"_index": new_index, "_type": "release_classifiers", "_id": package['name']}})) operations.append(json.dumps(package)) r = requests.post(conf.database_releases_index_url + "/_bulk", data="\n".join(operations))
import sys import itertools # Workaround current bug in docutils: # http://permalink.gmane.org/gmane.text.docutils.devel/6324 import docutils.utils root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path = [root] + sys.path import store import config c = config.Config("config.ini") store = store.Store(c) store.open() cursor = store.get_cursor() cursor.execute( "SELECT LOWER(name) FROM users GROUP BY LOWER(name) HAVING COUNT(*) > 1") duplicated = set([x[0] for x in cursor.fetchall()]) duplicates = {} users = {} for username in duplicated: cursor.execute( "SELECT name, email, last_login FROM users WHERE LOWER(name)=LOWER(%s)", (username, )) dups = cursor.fetchall()