def handle(self, *args, **options): """Perform import.""" engine = create_engine(URL("mysql", username=options["dbuser"], password=options["dbpass"], host=options["dbhost"], database=options["database"], port=options["dbport"], query=dict( charset="utf8", use_unicode=0 ) )) init_models(engine) self.session = models.Session() # random selection for now repos = [] #repos.extend(self.session.query(models.Repository)\ # .filter(models.Repository.identifier=='ehri1691None').all()) repos.extend(self.session.query(models.Repository).all()) self.stdout.write("Adding %s repos\n" % len(repos)) for repo in repos: if not repo.identifier: self.stderr.write("\n\nCannot index repository with no identifier\n") continue self.stderr.write("\n\nIndexing repo: %s\n" % repo.identifier) self.import_icaatom_repo(repo)
def __init__(self, database=None, username=None, password=None, hostname="localhost", port=None, atomuser=None, rowfunc=None, donefunc=None): engine = create_engine(URL("mysql", username=username, password=password, host=hostname, database=database, port=port, query=dict( charset="utf8", use_unicode=0 ) )) init_models(engine) self.session = models.Session() self.donefunc = donefunc self.rowfunc = rowfunc self.timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") self.user = self.session.query(models.User).filter( models.User.username == atomuser).one() # load default status and detail... this is where # SQLAlchemy gets horrible self.status = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .DESCRIPTION_STATUS_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Draft").one() self.detail = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .DESCRIPTION_DETAIL_LEVEL_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Partial").one() self.actorroot = self.session.query(models.Actor).filter( models.Actor.id==keys.ActorKeys.ROOT_ID).one() self.termroot = self.session.query(models.Term).filter( models.Term.id==keys.TermKeys.ROOT_ID).one() # running count of slugs used so far in the import transaction self.slugs = {} self.ids = {}
def __init__(self): self.options, self.args = get_options() engine = create_engine(URL("mysql", username=self.options.dbuser, password=self.options.dbpass, host=self.options.dbhost, database=self.options.database, port=self.options.dbport, query=dict( charset="utf8", use_unicode=0 ) )) init_models(engine) self.session = models.Session() self.solrurl = "http://%s:%d/%s/update/json" % ( self.options.solrhost, self.options.solrport, self.options.solrcontext, )
def __init__(self): """Initialise importer.""" parser = OptionParser(usage="usage: %prog [options] <csvfile>", version="%prog 1.0") parser.add_option( "-f", "--from", action="store", dest="fromrec", type="int", default=1, help="Import records from this offset") parser.add_option( "-t", "--to", action="store", dest="to", type="int", default=-1, help="Import records up to this offset") parser.add_option( "-U", "--dbuser", action="store", dest="dbuser", default="qubit", help="Database user") parser.add_option( "-p", "--dbpass", action="store", dest="dbpass", help="Database password") parser.add_option( "-H", "--dbhost", action="store", dest="dbhost", default="localhost", help="Database host name") parser.add_option( "-P", "--dbport", action="store", dest="dbport", help="Database host name") parser.add_option( "-D", "--database", action="store", dest="database", default="qubit", help="Database name") parser.add_option( "-u", "--user", action="store", dest="user", default="qubit", help="User to own imported records") parser.add_option( "-l", "--lang", action="store", dest="lang", default="en", help="Language for imported i18n fields") self.options, self.args = parser.parse_args() if len(self.args) != 1: parser.error("No CSV file provided") engine = create_engine(URL("mysql", username=self.options.dbuser, password=self.options.dbpass, host=self.options.dbhost, database=self.options.database, port=self.options.dbport, query=dict( charset="utf8", use_unicode=0 ) )) init_models(engine) self.session = models.Session() self.user = self.session.query(models.User).filter( models.User.username == self.options.user).one() self.parent = self.session.query(models.Actor)\ .filter(models.Actor.id==keys.ActorKeys.ROOT_ID).one() # load default status and detail... this is where # SQLAlchemy gets horrible self.status = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .DESCRIPTION_STATUS_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Draft").one() self.detail = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .DESCRIPTION_DETAIL_LEVEL_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Partial").one() # running count of slugs used so far in the import transaction self.slugs = {}
PATH = "eag" #print(repo.identifier) #for prop in repo.properties: # for k, v in prop.get_i18n("en").iteritems(): # print("%-20s : %s" % (k, phpserialize.loads(v))) #for address in repo.contacts: # for k, v in address.get_i18n("en").iteritems(): # print " %-20s : %s" % (k, v) #for k, v in repo.get_i18n("en").iteritems(): # print "%-20s : %s" % (k, v) try: country_code, doc = get_doc_base(repo) dirpath = os.path.join(PATH, country_code.lower()) out = etree.tostring(doc, pretty_print=True) if not os.path.exists(dirpath): os.mkdir(dirpath) with open(os.path.join(dirpath, str(repo.id) + ".xml"), "w") as f: f.write(out) except NoCountryCode, e: print(e.message, file=sys.stderr) engine = create_engine( "mysql://*****:*****@localhost/ehri_icaatom?charset=utf8") init_models(engine) session = models.Session() for repo in session.query(models.Repository).all(): dump_repo(repo)
def __init__(self): """Initialise importer.""" parser = OptionParser(usage="usage: %prog [options] <repository_name> <jsonfile>", version="%prog 1.0") parser.add_option( "-f", "--from", action="store", dest="fromrec", type="int", default=1, help="Import records from this offset") parser.add_option( "-t", "--to", action="store", dest="to", type="int", default=-1, help="Import records up to this offset") parser.add_option( "-U", "--dbuser", action="store", dest="dbuser", default="qubit", help="Database user") parser.add_option( "-p", "--dbpass", action="store", dest="dbpass", help="Database password") parser.add_option( "-H", "--dbhost", action="store", dest="dbhost", default="localhost", help="Database host name") parser.add_option( "-P", "--dbport", action="store", dest="dbport", help="Database host name") parser.add_option( "-D", "--database", action="store", dest="database", default="qubit", help="Database name") parser.add_option( "-u", "--user", action="store", dest="user", default="qubit", help="User to own imported records") parser.add_option( "-l", "--lang", action="store", dest="lang", default="en", help="Language for imported i18n fields") self.options, self.args = parser.parse_args() if len(self.args) != 2: parser.error() self.reponame = self.args[0] self.jsonfile = self.args[1] engine = create_engine(URL("mysql", username=self.options.dbuser, password=self.options.dbpass, host=self.options.dbhost, database=self.options.database, port=self.options.dbport, query=dict( charset="utf8", use_unicode=0 ) )) init_models(engine) self.session = models.Session() try: self.repo = self.session.query(models.Repository)\ .join(models.ActorI18N, models.ActorI18N.id == models.Repository.id)\ .filter(models.ActorI18N.authorized_form_of_name==self.reponame)\ .one() except NoResultFound: print >> sys.stderr, "No repository found for name: %s" % self.reponame sys.exit(1) try: self.user = self.session.query(models.User).filter( models.User.username == self.options.user).one() except NoResultFound: print >> sys.stderr, "No user found for name: %s" % self.options.user sys.exit(1) self.parent = self.session.query(models.InformationObject)\ .filter(models.InformationObject.id==keys.InformationObjectKeys.ROOT_ID)\ .one() # load default status and detail... this is where # SQLAlchemy gets horrible self.status = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .DESCRIPTION_STATUS_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Draft").one() self.detail = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .DESCRIPTION_DETAIL_LEVEL_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Partial").one() self.pubtype = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .STATUS_TYPE_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "publication").one() self.pubstatus = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .PUBLICATION_STATUS_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "published").one() self.lod_fonds = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .LEVEL_OF_DESCRIPTION_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Fonds").one() self.lod_coll = self.session.query(models.Term)\ .filter(models.Term.taxonomy_id == keys.TaxonomyKeys\ .LEVEL_OF_DESCRIPTION_ID)\ .join(models.TermI18N, models.Term.id == models.TermI18N.id)\ .filter(models.TermI18N.name == "Collection").one() self.slugs = {}
# for k, v in prop.get_i18n("en").iteritems(): # print("%-20s : %s" % (k, phpserialize.loads(v))) #for address in repo.contacts: # for k, v in address.get_i18n("en").iteritems(): # print " %-20s : %s" % (k, v) #for k, v in repo.get_i18n("en").iteritems(): # print "%-20s : %s" % (k, v) try: country_code, doc = get_doc_base(repo) dirpath = os.path.join(PATH, country_code.lower()) out = etree.tostring(doc, pretty_print=True) if not os.path.exists(dirpath): os.mkdir(dirpath) with open(os.path.join(dirpath, str(repo.id) + ".xml"), "w") as f: f.write(out) except NoCountryCode, e: print(e.message, file = sys.stderr) engine = create_engine("mysql://*****:*****@localhost/ehri_icaatom?charset=utf8") init_models(engine) session = models.Session() for repo in session.query(models.Repository).all(): dump_repo(repo)