def fix_collections(options): """Make HTTP requests to fix the order of forms in the OLD's collections. """ # Get raw LingSync JSON data. ls_json_file = getattr(options, 'ls_json_file') try: ls_data = json.load(open(ls_json_file)) except: sys.exit(u'%sUnable to locate file %s. Aborting.%s' % (ANSI_FAIL, ls_json_file, ANSI_ENDC)) # Get converted OLD data. old_json_file = getattr(options, 'old_json_file') try: old_data = json.load(open(old_json_file)) except: sys.exit(u'%sUnable to locate file %s. Aborting.%s' % (ANSI_FAIL, old_json_file, ANSI_ENDC)) forms = old_data['forms'] # `datums` holds the raw LingSync dicts representing all of the datums. datumid2dateentered = {} for datum in (r['doc'] for r in ls_data['rows'] if get_collection_for_lingsync_doc(r['doc']) == 'datums'): datumid2dateentered[datum['_id']] = datum['dateEntered'] # Get an OLD client. old_url = getattr(options, 'old_url', None) old_username = getattr(options, 'old_username', None) old_password = getattr(options, 'old_password', None) c = OLDClient(old_url) # Log in to the OLD. logged_in = c.login(old_username, old_password) if not logged_in: sys.exit(u'%sUnable to log in to %s with username %s and password %s.' u' Aborting.%s' % (ANSI_FAIL, old_url, old_username, old_password, ANSI_ENDC)) # Populate the `formid2dateentered` dict, so that it maps OLD form ids to # date entered values taken from the raw LingSync data. formid2dateentered = {} patt3 = re.compile('This form was created from LingSync datum (\w+)') for form in c.get('forms'): form_id = form['id'] datum_id = patt3.findall(form['comments']) if len(datum_id) == 0: print '%sUnable to find LingSync datum id for OLD form %d: %s.%s' % ( ANSI_WARNING, form_id, form['transcription'], ANSI_ENDC) datum_id = None else: if len(datum_id) > 1: print ('%sWarning: found multiple LingSync datum ids for OLD' ' form %d.%s' % (ANSI_WARNING, form_id, ANSI_ENDC)) datum_id = datum_id[0] if datum_id: date_entered = datumid2dateentered[datum_id] else: date_entered = '0' formid2dateentered[form_id] = date_entered # Issue the requests to fix each of the OLD collections, in turn. collections = c.get('collections') # print len(collections) patt1 = re.compile('^(form\[\d+\])*$') patt2 = re.compile('form\[(\d+)\]') manualfix = {} for collection in collections: # print collection['contents'] # If there's anything besides form references in the collection, then # we know the user has manually updated it and we can't fix it # automatedly; best we can do is tell the user the order of form # references that matches the LingSync version. tmp = collection['contents'].replace(' ', '').replace('\n', '') if patt1.search(tmp) or collection['contents'].strip() == '': contents_modified = False else: contents_modified = True # print '\n%d' % collection['id'] current_form_ids = map(int, patt2.findall(tmp)) sorted_form_ids = [x[1] for x in sorted( [(formid2dateentered[id_], id_) for id_ in current_form_ids])] new_contents = '\n'.join(['form[%d]' % x for x in sorted_form_ids]) if contents_modified: manualfix[collection['id']] = new_contents else: if current_form_ids == sorted_form_ids: print ('Collection %d already has its forms in the correct' ' order.' % collection['id']) else: print 'Fixing collection %d.' % collection['id'] collection['contents'] = new_contents # We must fix any relational data and or date elicited values # for the update request. if collection['elicitor']: collection['elicitor'] = collection['elicitor']['id'] if collection['speaker']: collection['speaker'] = collection['speaker']['id'] if collection['source']: collection['source'] = collection['source']['id'] if collection['tags']: collection['tags'] = [t['id'] for t in collection['tags']] if collection['files']: collection['files'] = [t['id'] for t in collection['files']] if collection['date_elicited']: # Convert yyyy-mm-dd to mm/dd/yyyy format parts = collection['date_elicited'].split('-') collection['date_elicited'] = '%s/%s/%s' % (parts[1], parts[2], parts[0]) resp = c.put('collections/%d' % collection['id'], collection) if resp.get('contents') != new_contents: print ('Something went wrong when attempting to update the' ' contents of collection %d. It should have the following' ' contents value\n%s' % (collection['id'], new_contents)) p(resp) for id in manualfix: new_contents = manualfix[id] print ('Collection %d has been altered by a user on the OLD so we' ' can\'t fix its form order here. You will have to do it. Please make' ' sure that the order of form references matches the following:\n%s.' % ( id, new_contents)) print 'Done.'