def readConfig(): if mutex.acquire(timeout=1) == False: log.error('mutex timeout') return data = utils.readJsonFile(filepath, log) mutex.release() return data
def startCrawlingAll(): # (1) # ../../restaurants/filename.txt 읽기 # for 문 돌려서 이미 읽은 동인지 확인( name[:name.find("_")] == me) or startCrawling함수처럼 createDir boolean 값으로 확인하기 # 이미 읽은 동이면 다른 동 # # (2) # 동이름을 naverCrawl_auto.startCrawling(dongName, dirPath) # integrateData.makeIntegrateData(dirPath) # # filename = open("../../restaurants/filename.txt", "r").read().split() json_data = utils.readJsonFile("../../dong_json/seoul_dong.json") utils.logging("크롤링 시작!") timeTerm = 30 for i in range(0, len(json_data)): dong = json_data[i] dongName = dong["DONG"] dirPath = "../../restaurants/" + dongName + "_restaurants_json" if createDir(dirPath) == False: utils.logging(dongName, "은 이미 크롤링을 한 동이기에 다음 동으로 크롤링을 넘깁니다.") continue utils.logging("메인에서", dongName, "크롤링 시작.") naverCrawl_auto.startCrawling(dongName, dirPath) utils.logging("쿼리문 데이터 통합 시작") integrateData.makeIntegrateData(dirPath) utils.logging("요청한", dongName, "에 대한 크롤링 완료") utils.logging("전체", len(json_data)+1, "중", i+1, "번째 동 크롤링 완료") utils.logging("다음 크롤링을 위해", timeTerm, "분 대기 중") time.sleep(timeTerm * 60)
def main(event, context): # 初始化日志文件 utils.initLog('log.txt') utils.clearLog() config = utils.readJsonFile('config.json') # artUrl = getArticle() # getSubscribeUrl(artUrl) getSubscribeUrl()
def getYahooFinanceStockUrlWithoutTicker( yahooFinanceURLsPath="./yahooFinanceURLs.json"): yahooFinanceURLJson = utils.readJsonFile(yahooFinanceURLsPath) return yahooFinanceURLJson['stockURL']
import os import sys import fileinput import pandas as pd import json from pandas.io.json import json_normalize from sklearn import preprocessing import matplotlib.pyplot as plt import numpy as np from utils import readJsonFile def plot_corr(df, size=8): corr = df.corr() fig, ax = plt.subplots(figsize=(size, size)) ax.matshow(corr) plt.xticks(range(len(corr.columns)), corr.columns) plt.yticks(range(len(corr.columns)), corr.columns) plt.show() df = readJsonFile() print(df.head()) # plot_corr(df) print('END')
def loadData(): return readJsonFile(filepath, log)