Exemplo n.º 1
0
def crawling_tourspot_visitor(district,
                              start_year,
                              end_year,
                              fetch=True,
                              result_directory='',
                              service_key=''):
    results = []
    filename = '%s/%s_tourspot_%s_%s.json' % (result_directory, district,
                                              start_year, end_year)

    if fetch:
        for year in range(start_year, end_year + 1):
            for month in range(1, 13):
                for items in api.pd_fetch_tourspot_visitor(
                        district1=district,
                        year=year,
                        month=month,
                        service_key=service_key):
                    for item in items:
                        preprocess_tourspot_visitor(item)
                    results += items  # 전처리 된 data가 쌓임

        # save items to file
        with open(filename, 'w', encoding='utf-8') as outfile:
            json_string = json.dumps(results,
                                     indent=4,
                                     sort_keys=True,
                                     ensure_ascii=False)
            # json str으로 덤프하는 과정 텝을 4정도 주고 솔팅을 해라 모두 아스키코드로 해라
            outfile.write(json_string)

    return filename
Exemplo n.º 2
0
def crawling(district1, start_year, end_year):

    results = []
    print("results type", results)
    filename = '%s/%s_%s_%s.json' % (RESULT_DIRECTORY, district1, start_year,
                                     end_year)

    #for posts in api.pd_fetch_tourspot_visitor(district1, year, month):
    #    for post in posts:
    #        preprocess_post(post)

    for j in range(start_year, end_year):
        #print(j)
        for i in range(1, 3):
            #print(i)
            for items in pdapi.pd_fetch_tourspot_visitor(district1,
                                                         year=j,
                                                         month=i):
                print("items===", type(items), items)
                if type(items) is dict:
                    items = [items]
                #print(type(items['addrCd']))
                for item in items:
                    print("item========!!!!", item)
                    print()
                    preprocess_post(item)
                results += items
                print("type results", type(results), results)
                #print("tmp의type=====",type(tmp),tmp)# 이건 딕셔
                #print("result의type=====",type(results), results)#이건 리스트 인데 , 여기다 딕셔를 넣음 , 키값만 보임

            #print(results)

    print("results====", results)

    #   results += posts

    #save results to file (저장, 적재)

    with open(filename, 'w', encoding='utf-8') as outfile:
        json_string = json.dumps(results,
                                 indent=4,
                                 sort_keys=True,
                                 ensure_ascii=False)
        outfile.write(json_string)
Exemplo n.º 3
0
from analysis_pd.collect.api import api

# test for pd_gen_url
url = api.pd_gen_url(
    'http://openapi.tour.go.kr/openapi/service/TourismResourceStatsService/getPchrgTrrsrtVisitorList',
    YM='{0:04d}{1:02d}'.format(2012, 7),
    SIDO='서울특별시',
    GUNGU='',
    RES_NM='',
    numOfRows=10,
    _type='json',
    pageNo=1)
print(url)

# test for pd_tourspot_visitor
for items in api.pd_fetch_tourspot_visitor(district1='서울특별시',
                                           year=2017,
                                           month=7):
    print(items)

# test for pd_fetch_foreign_visitor
item = api.pd_fetch_foreign_visitor(112, 2017, 7)
print(item)
Exemplo n.º 4
0
request.get_method = lambda: 'GET'
response_body = urlopen(request).read()
print(response_body)
'''
'''
#url 어떻게 만드는지 테스트

url = pdapi.pd_gen_url(
'http://openapi.tour.go.kr/openapi/service/TourismResourceStatsService/getPchrgTrrsrtVisitorList',
    YM='{0:04d}{1:02d}'.format(2017, 1),
    SIDO='부산광역시',
    GUNGU='해운대구',
    RES_NM='부산시립미술관',
    numOfRows=10,
    _type='json',
    pageNo=1
)

print(url)
'''
for items in pdapi.pd_fetch_tourspot_visitor(district1='부산광역시',
                                             year=2012,
                                             month=1):
    print(items)

#tesst=ttt.html_request(url='http://openapi.tour.go.kr/openapi/service/TourismResourceStatsService/getPchrgTrrsrtVisitorList')
#print(tesst)

#for items in pdapi.pd_fetch_tourspot_visitor(district1='서울특별시', year=2012, month=7):
#print(items)
Exemplo n.º 5
0
import analysis_pd.collect.api.api as pdapi

# test for pd_gen_url
# url = pdapi.pd_gen_url(
#     'http://openapi.tour.go.kr/openapi/service/TourismResourceStatsService/getPchrgTrrsrtVisitorList',
#     YM='{0:04d}{1:02d}'.format(2017, 1),
#     SIDO='서울특별시',
#     GUNGU='',
#     RES_NM='',
#     numOfRows=10,
#     _type='json',
#     pageNo=1)
#
# print(url)

# test for .pd_fetch_tourspot_visitor
for items in pdapi.pd_fetch_tourspot_visitor(district1="서울특별시", year=2012, month=7):
    for item in items:
        print(item)
# item = pdapi.pd_fetch_tourspot_visitor(district1="서울특별시", year=2012, month=7)
# print(item)
#     for i in item.get('response').get('body').get('items').get('item'):
#         print("item : ", i)


# test for pd_fetch_foreign_visitor
# item = pdapi.pd_fetch_foreign_visitor(112, 2012, 7)
# print(item)