def process(filt): filt = int(filt) f_okato = open("res/okato_codes.csv", 'rb') csvreader = csv.DictReader(f_okato) for row in csvreader: if filt == row['OKATO1'] or filt == row['OKATO2'] or filt == row[ 'OKATO3'] or filt == 0: if str(row['OKATO3']) != '': final = "http://112.ru/publish/00/00/nearOrg/mvd/" + str( row['OKATO1']) + "/f" + str(row['OKATO2']) + "/" + str( row['OKATO3']) else: final = "http://112.ru/publish/00/00/nearOrg/mvd/" + str( row['OKATO1']) + "/f" + str(row['OKATO2']) try: res = urllib2.urlopen(final + ".shtml") parse_org(final, str(row['OKATO2'])) except urllib2.URLError, e: #import pdb;pdb.set_trace() get_photo_status = False if hasattr(e, 'reason'): print 'We failed to reach a server.' print 'Reason: ', e.reason elif hasattr(e, 'code'): print 'The server couldn\'t fulfill the request.' print 'Error code: ' + str( e.code) + " Page: " + final + ".shtml"
def handle(self, *args, **options): file_name = args[0] f = open(file_name, 'rb') d = csv.DictReader(f) for row in d: username = row['username'] if User.objects.filter(username=username).exists(): print 'User %s exists.' % (username) else: first_name = row.get('first_name', '') last_name = row.get('last_name', '') email = row.get('email', '') locality = row.get('locality', '') gender = row.get('gender', '') password = row.get('password', '') user = User( username=username, email=email, first_name=first_name, last_name=last_name, ) user.set_password(password) user.save() user.profile.gender = gender try: user.profile.locality = Entity.objects.get(id=locality) except ObjectDoesNotExist: print 'user %s locality id %s does not exist' % (username, locality) user.profile.save()
def GetOrgnrAndKommunIDForGroup(groupname): groupname = groupname.lower() with open('data/kommunid.csv', 'rb') as f: reader = ucsv.DictReader(f, delimiter=',', quoting=ucsv.QUOTE_ALL, fieldnames=['namn', 'id', 'orgnr']) for row in reader: name = row['namn'].lower().strip("\"") if name == groupname: return str(row['id']), str(row['orgnr']) return "", ""
def handle(self, *args, **options): file_name = args[0] f = open(file_name, 'rb') d = csv.DictReader(f) site = Site.objects.get(pk=settings.SITE_ID) for row in d: names = row[u'שם'].split(' ') email = row[u'דואר אלקטרוני'] user = invite_user( username=email, email=email, first_name=names[0], last_name=' '.join(names[1:]), site=site, ) if user.is_active: self.stdout.write('%s is already active, no invitation sent' % email) else: ''' send an invitation email ''' reg_profile = user.registrationprofile_set.all()[0] ctx_dict = { 'invitation_key': reg_profile.activation_key, 'expiration_days': settings.ACCOUNT_ACTIVATION_DAYS, 'site': site } subject = render_to_string('user/invitation_email_subject.txt', ctx_dict).rstrip() # Email subject *must not* contain newlines html_content = render_to_string('user/invitation_email.html', ctx_dict) text_content = '\n'.join( (strip_tags(html_content), "http://%s%s" % (site.domain, reverse("accept-invitation", args=(ctx_dict['invitation_key'], ))))) # create the email, and attach the HTML version as well. msg = EmailMultiAlternatives(subject, text_content, settings.DEFAULT_FROM_EMAIL, [email]) msg.attach_alternative(html_content, "text/html") msg.send()
def main(argv): (inputfile, outputfile) = parse_input(argv) phrases = [] cleaned_file = '' try: cleaned_file = sanitize_file(inputfile) line_number = 1 # Here we don't need the codecs.open as we use ucsv to read the file with open(cleaned_file, 'rb') as csvfile: for row in ucsv.DictReader(csvfile): line_number += 1 pinyin_phrase = row['Pronunciation'] try: annotated_pinyin = annotate_phrase(pinyin_phrase) except ValueError: print "There's a fishy pronunciation entry on line %d." % line_number continue sort_value = calc_sort_value(annotated_pinyin[::-1], 1, 0) (first_syllable, _tone) = annotated_pinyin[0] hanzi_phrase = row['Word'] phrases.append((sort_value, first_syllable, hanzi_phrase, pinyin_phrase)) if cleaned_file.endswith('sanitized.csv'): os.remove(cleaned_file) except IOError: if cleaned_file.endswith('sanitized.csv'): os.remove(cleaned_file) print 'Bad input file: ', inputfile sorted_phrases = sorted(phrases, key = itemgetter(0, 1)) output_ready_phrases = [phrase[2:4] for phrase in sorted_phrases] with open(outputfile, 'wb') as f: writer = ucsv.writer(f) writer.writerow(['Word', 'Pronunciation']) writer.writerows(output_ready_phrases)
def load(secure,hostname,url,schema,table,verbose): show("begin "+hostname+" "+url+" "+schema+" "+table) if secure: address = "https://"+hostname+url else: address = "http://"+hostname+url #""" load from web show("load from "+address) try: response = requests.get(address) except e: show('HTTP GET failed.') show('Reason: %s'%(e.reason)) sys.exit(2) else: # everything is fine show("api call OK") # read the data. # all of it. this is dangerous for big datasets! # convert to utf-8 on-the-fly if it's not data = response.text.encode('utf-8') #""" # create temporary file (remove at the end) f = tempfile.NamedTemporaryFile() #defaults: mode='w+b', delete=True) show("using tempfile: %s"%(f.name)) f.write(data) # start using data, go to start f.seek(0) # remove BOM if exists if f.read(3)!=codecs.BOM_UTF8: f.seek(0) # make csv dictionary (first row must have column names) csvdata = csv.DictReader(f, delimiter=";") # discover columns and their types (read through entirely!) show("discover table structure") cnt=0 for row in csvdata: cnt+=1 if verbose: print cnt,row if verbose: for col in row: print cnt,col,row[col] dboperator.columns(row) # start operating with database # drop table show("drop %s.%s"%(schema,table)) dboperator.drop(schema,table) # create table show("create %s.%s"%(schema,table)) dboperator.create(schema,table) show("insert data") # reset csvdata! f.seek(0) # remove BOM if f.read(3)!=codecs.BOM_UTF8: f.seek(0) csvdata = csv.DictReader(f, delimiter=";") cnt=0 for row in csvdata: cnt+=1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) dboperator.insert(address,schema,table,row) show("wrote %d"%(cnt)) dboperator.close() # close (and delete) file f.close() show("ready")
class ParseXML: ##########################READ CSV################### #Read CSV file containing the right tags to produce dictReader = csv.DictReader(open('awol_title_strings.csv', 'rb'), fieldnames=['titles', 'tags'], delimiter=',', quotechar='"') #Build a dictionary from the CSV file-> {<string>:<tags to produce>} titleStringsDict = dict() for row in dictReader: titleStringsDict.update({row['titles']: row['tags']}) #Read awol_colon_prefixes.csv file and build a dictionary dictReader2 = csv.DictReader( open('awol_colon_prefixes.csv', 'rb'), fieldnames=['col_pre', 'omit_post', 'strip_title', 'mul_res'], delimiter=',', quotechar='"') colPrefDict = dict() #Build a dictionary of format {<column prefix>:<list of cols 2,3 and 4>} for row in dictReader2: colPrefDict.update({ row['col_pre']: [row['omit_post'], row['strip_title'], row['mul_res']] }) #Read content-disposition.csv file and build a dictionary dictReader3 = csv.DictReader(open('content-disposition.csv', 'rb'), fieldnames=[ 'title', 'title_normalized', 'colonfix', 'single_resource', 'ignore', 'checked', 'multiple_resource', 'url', 'id' ], delimiter=',', quotechar='"') contDispDict = dict() #Build a dictionary of format {<id>:[<list of rest of the columns>]} for row in dictReader3: if row['single_resource'] == 'true': contDispDict.update({ row['id']: [ row['title'], row['title_normalized'], row['colonfix'], row['single_resource'], row['ignore'], row['checked'], row['multiple_resource'], row['url'] ] }) #############END OF READ CSV######################### #Check if multiple tags separated by ',' exist in the titleStringsDict[tag] def checkMulTags(self, tag, tags): if ',' in tag: tagList = tag.split(',') for tg in tagList: tags.append({'tag': tg}) else: tags.append({'tag': tag}) #Function to get ISSNs if any from the given XML def getISSNFromXML(self, root): xmlStr = exml.tostring(root, encoding='utf8', method='xml') issnrex = re.findall(r'issn[^\d]*[\dX]{4}-?[\dX]{4}', xmlStr, re.IGNORECASE) if issnrex: log.debug('Found ISSNs') if len(issnrex) > 1: #If more than 1 issns are found for s in issnrex: if ('electrón' or 'électron' or 'electron' or 'digital' or 'online') in s: issn = re.search(r'[\dX]{4}-?[\dX]{4}', s) log.debug(issn.group()) return issn.group() issn = re.search(r'[\dX]{4}-?[\dX]{4}', issnrex[0], re.IGNORECASE) log.debug(issn.group()) return issn.group() else: return None #Function to look up data in CSV converted dict and produce relevant tags def produceTag(self, tags, categories, title): for c in categories: tag = c.attrib['term'] if (tag != '' or tag != None) and ('kind#post' not in tag.lower()): if tag in self.titleStringsDict.keys(): tag = self.titleStringsDict[tag] else: tag = self.caseConversion(tag) #Check if multiple tags separated by ',' exist in the titleStringsDict[tag] self.checkMulTags(tag, tags) print tags for key in self.titleStringsDict.keys(): try: if title != None and key in title.lower(): tag = self.titleStringsDict[key] if tag != '': self.checkMulTags(tag, tags) except Exception, e: pass # log.info("Problem with key:%s" % key) if title != None and "open" and ( "access" or "accesss") and not "partially" in title: tags.append({u'tag': "Open Access"}) elif title != None and "open" and ("access" or "accesss") and "partially" in title: tags.append({u'tag': "Mixed Access"}) elif title != None and "series" and not "lecture" in title: tags.append({u'tag': "Series"}) print tags return tags