def store_data_to_db(self, data_collection=None, label_collection=None): """ 把stata对象中的数据存入数据库 :param data_collection: :param label_collection: :return: 返回self """ if data_collection is None: data_collection = MonCollection( database=MonDatabase(mongodb=MongoDB(), database_name='surveydata'), collection_name='cgssdata').collection if label_collection is None: label_collection = MonCollection( database=MonDatabase(mongodb=MongoDB(), database_name='surveydata'), collection_name='cgsslabel').collection for year in self._stata_object: stata_data = self._stata_object[year].read() records = stata_data.to_dict("records") for record in records: record["year"] = year print(record) data_collection.insert_one(record) value_labels = self._stata_object[year].value_labels str_value_labels = dict() for key in value_labels: str_value_labels[key] = { str(inn_key): value_labels[key][inn_key] for inn_key in value_labels[key] } str_value_labels["year"] = year str_value_labels["type"] = "value labels" print(str_value_labels) label_collection.insert_one(str_value_labels) variable_labels = self._stata_object[year].variable_labels variable_labels["year"] = year variable_labels["type"] = "variable labels" print(variable_labels) label_collection.insert_one(variable_labels) return self
def __init__(self): """ 初始化中国城市统计数据库接口 """ mongo = MongoDB(conn_str='localhost:27017') self.conn = MonCollection(mongo, database='regiondata', collection_name='citystatistics').collection
def __init__(self): mongo = MongoDB(conn_str='localhost:27017') self._college_info = MonCollection( mongo, database='webdata', collection_name='college_info').collection self._college_intro = MonCollection( mongo, database='webdata', collection_name='college_introduction').collection self._headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' }
def __init__(self, data_collection=None, label_collection=None): """ 初始化数据库连接 :param data_collection: :param label_collection: """ if data_collection is None: self._data_collection = MonCollection( database=MonDatabase( mongodb=MongoDB(conn_str='localhost:27017'), database_name='surveydata'), collection_name='cgssdata').collection else: self._data_collection = data_collection if label_collection is None: self._label_collection = MonCollection( database=MonDatabase( mongodb=MongoDB(conn_str='localhost:27017'), database_name='surveydata'), collection_name='cgsslabel').collection else: self._label_collection = label_collection
def __init__(self): mongo = MongoDB(conn_str='localhost:27017') self._web_conn = MonCollection(mongo, database='cache', collection_name='gaokaoweb').collection self._data_web_conn = MonCollection( mongo, database='cache', collection_name='gaokaodataweb').collection self._university_web_conn = MonCollection( mongo, database='cache', collection_name='gaokaouniversityweb').collection self._data_conn = MonCollection( mongo, database='webdata', collection_name='gaokao_entrancescore').collection self._copy_data_web_conn = MonCollection( mongo, database='webdata', collection_name='gaokaouniversityweb').collection
def store_label_to_db(self, label_collection=None): """ 把变量和值标签关联存储到数据库 :param label_collection: :return: 返回self """ if label_collection is None: label_collection = MonCollection( database=MonDatabase(mongodb=MongoDB(), database_name='surveydata'), collection_name='cgsslabel').collection for year in self._stata_label_object: stata_label_data = self._stata_label_object[year].read() records = dict( zip(stata_label_data.loc[:, "name"], stata_label_data.loc[:, "vallab"])) records["year"] = year records["type"] = "variable value lables" print(records) label_collection.insert_one(records) return self
# coding = UTF-8 import os import pickle import numpy as np import pandas as pd from lib.base.database.class_mongodb import MongoDB, MonDatabase, MonCollection mongo = MongoDB( conn_str= 'mongodb://*****:*****@dds-bp162bb74b8184e41658-pub.mongodb.rds.aliyuncs.com:3717' ) mdb = MonDatabase(mongodb=mongo, database_name='enterprise') mcon = MonCollection(mongo, mdb, 'cross_holding_data') PROJECT_DATA_PATH = r'E:\datahouse\projectdata\shareholder' file_path = os.path.join(PROJECT_DATA_PATH, 'cross_holding_main_table.xls') cross_holding_data_table = pd.read_excel(file_path) vars = list(cross_holding_data_table.columns) var_dtype = dict(zip(vars, [str] * len(vars))) print(var_dtype) cross_holding_data_table = pd.read_excel(file_path, dtype=var_dtype) #cross_holding_data_table = cross_holding_data_table.replace('nan',None) records = cross_holding_data_table.to_dict('records')
# coding = UTF-8 import re import pysal from pymongo import ASCENDING import pandas as pd from lib.base.database.class_mongodb import MongoDB, MonCollection from application.dataworld.admindivision.class_admindivision import AdminDivision # 1. 数据库连接 mongo = MongoDB(conn_str='localhost:27017') college_info_con = MonCollection(mongo, database='webdata', collection_name='college_info').collection entrance_score_con = MonCollection( mongo, database='webdata', collection_name='gaokao_entrancescore').collection # 2. 步骤参数设置 # a. 导出每年的高考分数数据 IS_EXPORT_RAW_EXAM_SCORE = False # b. 导出高校信息数据 IS_EXPORT_RAW_COLLEGE_INFO = False # c. 2011-2013年面板数据 IS_MERGE_INTO_PANEL = False # d. 合并高校信息数据 IS_MERGE_COLLEGE_INFO = False # e. 合并大学排名信息 IS_MERGE_COLLEGE_RATE = False # f. 合并省级经济信息 IS_MERGE_PROVINCE_PERGDP = False
def __init__(self): # 连接admindivision集合 mongo = MongoDB() mdb = MonDatabase(mongodb=mongo, database_name='region') self.collection = MonCollection(database=mdb, collection_name='admindivision')
def __init__(self, journals_webs=None): self._journal_websites = journals_webs self._pre_conn = MonCollection(mongodb=MongoDB(), database='papers', collection_name='econpaperwebsites') self._paper_conn = MonCollection(mongodb=MongoDB(), database='papers', collection_name='econpapers')
def __init__(self, database='papers', collection='econpapers'): self.literatures = None self._paper_conn = MonCollection(mongodb=MongoDB(), database=database, collection_name=collection)
], type=1) abstract = item.get('abstract') if abstract is not None: abstract = escape_latex(abstract) doc.document.append(abstract) #doc.document.generate_tex(r'E:\github\latexdoc\latexdoc\template\academicjournal\wlscirep\plutopaper.tex') doc.document.generate_pdf( r'D:\github\pluto\lib\base\pylatex\template\output\{}'.format( file_name)) if __name__ == '__main__': report = LiteratureReport() conn = MonCollection(mongodb=MongoDB(), database='papers', collection_name='econpapers') journals = conn.collection.find().distinct('journal') for journal in journals: print(journal) if journal == 'Econometrica': report.load_record_from_db(query={ 'journal': journal, 'year': { '$gte': 2012 } }, sort=[('journal', ASCENDING), ('year', DESCENDING)])
def __init__(self, database='proxy', collection_name='proxys'): # 设置数据库 self._conn = MonCollection(mongodb=MongoDB(), database=database, collection_name=collection_name)