Python UnicodeReader.next 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: unicodeManager

클래스/타입: UnicodeReader

메소드/함수: next

hotexamples.com에서의 예제들: 5

Python UnicodeReader.next - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 unicodeManager.UnicodeReader.next에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

UnicodeReader(16)

next(3)

예제 #1

파일 보기

파일: worldCountries.py 프로젝트: AyushiRastogi/countryNameManager

    def __init__(self):
        self.namesSet = set()
        self.tldsSet = set()
        self.alternative2name = {}
        self.tld2name = {}
        self.name2alternatives = {}

        # The list of country names, alternative spellings, and 2-letter codes (TLDs)
        f = open(os.path.join(DATA_PATH, 'countries.csv'), 'rb')
        reader = UnicodeReader(f)
        reader.next()
        for row in reader:
#            cid = int(row[0])
            # The country name
            name = unidecode(row[1]).lower().strip()
            self.namesSet.add(name)
            self.alternative2name[name] = name
            
            # Different alternative names, separated by comma
            alternatives = [unidecode(a).lower().strip() for a in row[2].split(',') if len(row[2].strip())]
            for a in alternatives:
                self.alternative2name[a] = name
                self.namesSet.add(a)
                
            allVariants = set(alternatives).union(set([name]))
            for variant in allVariants:
                self.name2alternatives[variant] = allVariants
                
            # The 2-letter codes (TLDs)
            codes = [t.lower().strip() for t in row[4].split(',')]
            for c in [c for c in codes if len(c)]:
                self.tld2name[c] = name
                self.tldsSet.add(c)
        f.close()

예제 #2

파일 보기

    def __init__(self):
        self.namesSet = set()
        self.tldsSet = set()
        self.alternative2name = {}
        self.tld2name = {}
        self.name2alternatives = {}

        # The list of country names, alternative spellings, and 2-letter codes (TLDs)
        f = open(os.path.join(DATA_PATH, 'countries.csv'), 'rb')
        reader = UnicodeReader(f)
        reader.next()
        for row in reader:
            #            cid = int(row[0])
            # The country name
            name = unidecode(row[1]).lower().strip()
            self.namesSet.add(name)
            self.alternative2name[name] = name

            # Different alternative names, separated by comma
            alternatives = [
                unidecode(a).lower().strip() for a in row[2].split(',')
                if len(row[2].strip())
            ]
            for a in alternatives:
                self.alternative2name[a] = name
                self.namesSet.add(a)

            allVariants = set(alternatives).union(set([name]))
            for variant in allVariants:
                self.name2alternatives[variant] = allVariants

            # The 2-letter codes (TLDs)
            codes = [t.lower().strip() for t in row[4].split(',')]
            for c in [c for c in codes if len(c)]:
                self.tld2name[c] = name
                self.tldsSet.add(c)
        f.close()

예제 #3

파일 보기

파일: brazilStates.py 프로젝트: zhangxunhui/countryNameManager

 def __init__(self):
     self.abbrev2name = {}
     self.namesSet = set()
     self.abbrevsSet = set()
     
     # Load data
     f = open(os.path.join(DATA_PATH, 'brazilStates.csv'), 'rb')
     reader = UnicodeReader(f)
     header = reader.next()
     for row in reader:
         name = unidecode(row[0]).lower().strip()
         abbrev = row[1].lower().strip()
         self.abbrevsSet.add(abbrev)
         self.abbrev2name[abbrev] = name
         self.namesSet.add(name)
     f.close()

예제 #4

파일 보기

파일: mergeAliases.py 프로젝트: fasarker/ght_unmasking_aliases

w_log = UnicodeWriter(open(os.path.join(dataPath, 'idm', 'idm_log.csv'), 'wb'))
writer = UnicodeWriter(open(os.path.join(dataPath, 'idm', 'idm_map.csv'),
                            'wb'))
w_maybe = UnicodeWriter(
    open(os.path.join(dataPath, 'idm', 'idm_maybe.csv'), 'wb'))

idx = 0
step = 100000
curidx = step

aliases = {}

#    reader = UnicodeReader(open(os.path.join(dataPath, 'users_clean_emails_sample.csv'), 'rb'))
reader = UnicodeReader(
    open(os.path.join(dataPath, 'active_prolific_users.csv'), 'rb'))
_header = reader.next()

# Helper structures
d_email_uid = {}
d_uid_email = {}

d_prefix_uid = {}
d_uid_prefix = {}

d_comp_prefix_uid = {}
d_uid_comp_prefix = {}

d_uid_domain = {}
d_domain_uid = {}

d_name_uid = {}

예제 #5

파일 보기

파일: mergeAliases.py 프로젝트: AyushiRastogi/ght_unmasking_aliases

dataPath = os.path.abspath('../../data/2014-01')

w_log = UnicodeWriter(open(os.path.join(dataPath, 'idm', 'idm_log.csv'), 'wb'))
writer = UnicodeWriter(open(os.path.join(dataPath, 'idm', 'idm_map.csv'), 'wb'))
w_maybe = UnicodeWriter(open(os.path.join(dataPath, 'idm', 'idm_maybe.csv'), 'wb'))

idx = 0
step = 100000
curidx = step

aliases = {}

#    reader = UnicodeReader(open(os.path.join(dataPath, 'users_clean_emails_sample.csv'), 'rb'))
reader = UnicodeReader(open(os.path.join(dataPath, 'clean', 'users_clean_emails.csv'), 'rb'))
_header = reader.next()

# Helper structures
d_email_uid = {}
d_uid_email = {}

d_prefix_uid = {}
d_uid_prefix = {}

d_comp_prefix_uid = {}
d_uid_comp_prefix = {}

d_uid_domain = {}
d_domain_uid = {}

d_name_uid = {}