示例#1
0
def migrate_contacts(source, batch_size=1000):
    # read in the content
    f = open(source)
    xml = etree.parse(f)
    f.close()
    contacts = xml.getroot()
    print "migrating", str(len(contacts)), "contact records from", source
    
    # first thing to do is locate all the duplicates in the logins
    record = []
    duplicates = []
    for element in contacts:
        login = element.find("login")
        if login is not None and login.text is not None and login.text != "":
            if login.text in record:
                duplicates.append(login.text)
            record.append(login.text)
    
    # now go through and load all the user accounts
    batch = []
    for element in contacts:
        login = element.find("login")
        password = element.find("password")
        name = element.find("name")
        email = element.find("email")
        issns = element.findall("issn")
        
        if login is None or login.text is None or login.text == "":
            print "ERROR: contact without login - providing login"
            if len(issns) == 0:
                # make a random 8 character login name
                login = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(8))
            else:
                # select the first issn
                login = issns[0].text
        else:
            login = login.text
        
        if password is None or password.text is None or password.text == "":
            print "ERROR: contact without password", login, "- providing one"
            # make a random 8 character password
            password = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(8))
        else:
            password = password.text
        
        # check to see if this is a duplicate
        if login in duplicates:
            if len(issns) == 0:
                print "INFO: duplicate detected, has no ISSNs, so skipping", login
                continue
            else:
                print "INFO: duplicate detected, with ISSNs, so keeping", login
                
        a = Account()
        a.set_id(login)
        a.set_password(password)
        
        if name is not None and name.text is not None and name.text != "":
            a.set_name(name.text)
        
        if email is not None and email.text is not None and email.text != "":
            a.set_email(email.text)
        
        for issn in issns:
            if issn is not None and issn.text is not None and issn.text != "":
                jid = _get_journal_id_from_issn(issn.text)
                a.add_journal(jid)
        
        a.prep() # prep for saving, since we're not actually going to call save()
        batch.append(a.data)
        
        if len(batch) >= batch_size:
            Account.bulk(batch, refresh=True)
            del batch[:]
    
    if len(batch) > 0:
        Account.bulk(batch)