Пример #1
0
def extractData():
	url = request.args.get('url', 0, type=str)
	if url.startswith('http://') is False:
		url = 'http://' + url
	crawler = NaverBlog.post_crawling(url)
		
	data = crawler.write_data_as_dict()
	dataCreater = DataCreater()
	dataCreater.loadData(data)
	dataCreater.loadNgramData()
	dataCreater.postAnalysis()
	# return dataCreater.createDataSet()
	data = json.loads(dataCreater.createDataSet())
	data_0_jsonFormat = str(json.dumps(data[0], ensure_ascii=False, indent=4))
	return data_0_jsonFormat
Пример #2
0
def autoUpdate():
    dateAndTime_current = time.strftime("%Y%m%d%H%M%S")
    MODELPATH = "./model/" + dateAndTime_current
    MODELRECORD = "./model/model_record.txt"
    DATAPATH = "./data/" + dateAndTime_current
    DATARECORD = "./data/data_record.txt"
    DATAFILE_Before = "data_before.json"
    DATAFILE = "data.json"

    # read newData and file reset
    f_feedBack_DATA = open("./db/userFeedBack.json", "r")
    feedBackData = json.loads(f_feedBack_DATA.read())
    f_feedBack_DATA.close()

    if len(feedBackData) < 1:
        return "fail : there is no data"

    f_feedBack_DATA = open("./db/userFeedBack.json", "w")
    f_feedBack_DATA.write("[]")
    f_feedBack_DATA.close()

    # make new directory of model
    if not os.path.exists(MODELPATH):
        os.makedirs(MODELPATH)

        # make new directory of data
    if not os.path.exists(DATAPATH):
        os.makedirs(DATAPATH)

        # append new record to model record file
    f_MODELRECORD = open(MODELRECORD, "a+")
    f_MODELRECORD.write("\n" + dateAndTime_current)
    f_MODELRECORD.close()

    # read recent data and append new record to data record file
    f_DATARECORD = open(DATARECORD, "r")
    recent_DATARECORD = f_DATARECORD.read().splitlines()[-1]
    f_DATARECORD.close()
    f_DATARECORD = open(DATARECORD, "a+")
    f_DATARECORD.write("\n" + dateAndTime_current)
    f_DATARECORD.close()

    # get recent data
    f_recent_DATA = open("./data/" + recent_DATARECORD + "/" + DATAFILE, "r")
    recentData = json.loads(f_recent_DATA.read())
    f_recent_DATA.close()

    # merge
    f_new_DATA = open(DATAPATH + "/" + DATAFILE_Before, "w")
    newData = recentData + feedBackData
    json.dump(newData, f_new_DATA, ensure_ascii=False, indent=4)
    f_new_DATA.close()

    # dataCreater
    dataCreater = DataCreater()

    f_in = open(DATAPATH + "/" + DATAFILE_Before, "r")
    dataCreater.loadData(json.loads(f_in.read()))
    dataCreater.postAnalysis()
    print("*****텍스트 분석 및 Feature 추가 완료*****")
    print("# 텍스트 분석 소요시간 : {} sec".format(dataCreater.time_textAnalysis))
    print("# Feature 추가 소요시간 : {} sec".format(dataCreater.time_addFeature))
    # dataCreater.loadLabelData()
    # print("*****Label 추가 완료*****")
    # print("# Label 추가 소요시간 : {} sec".format(dataCreater.time_loadLabelData))
    f_out = open(DATAPATH + "/" + DATAFILE, "w")
    f_out.write(dataCreater.createDataSet())
    print("create json file")

    f_in.close()
    f_out.close()

    f_featureList = open(DATAPATH + "/featureList.json", "w")
    f_featureList.write(dataCreater.getFeatureList())
    print("create list of feature file")
    dataCreater.createFeatureNgram()
    print("create featureData file")
    # make model temp
    print("** start to make model **")
    f_data = open(DATAPATH + "/" + DATAFILE, "r")
    f_featureList = open(DATAPATH + "/featureList.json", "r")

    data = json.loads(f_data.read())
    featureList = json.loads(f_featureList.read())

    modelMaker = make_model(data, featureList, MODELPATH)
    modelMaker.play()
    print("** complete to make model **")
# insert crawler data to ./crawlingData/data.json



from DataCreater import DataCreater



dataCreater = DataCreater()



f_in = open("./crawlingData/data.json", "r")
dataCreater.loadData(f_in.read())
dataCreater.postAnalysis()
print("*****텍스트 분석 및 Feature 추가 완료*****")
print("# 텍스트 분석 소요시간 : {} sec".format(dataCreater.time_textAnalysis))
print("# Feature 추가 소요시간 : {} sec".format(dataCreater.time_addFeature))
# dataCreater.loadLabelData(path)
print("*****Label 추가 완료*****")
print("# Label 추가 소요시간 : {} sec".format(dataCreater.time_loadLabelData))
f_out = open("./data/data.json", "w")
f_out.write(dataCreater.createDataSet())
print("create json file")

f_in.close()
f_out.close()



f_featureList = open("./data/featureList.json", "w")