Пример #1
0
def SetupProject(git_repo):
    error_log = ErrorLog()
    setup_bat_file_path = join(common_file_path, 'setup_git_stuff.bat')
    dst_file_path = join(main_dir_path, 'setup_git_stuff.bat')
    if not os.path.exists(setup_bat_file_path):
        error_log.LogError("Setup Git Stuff file template not found")
        return False
    try:
        with open(setup_bat_file_path) as setup_file:
            dst_file = open(dst_file_path, 'w+')
            for line in setup_file:
                temp_line = line.replace('%HOST%', git_repo.host)
                temp_line = temp_line.replace('%ORGANIZATION%',
                                              git_repo.organization)
                temp_line = temp_line.replace('%PROJECT%', git_repo.project)
                dst_file.write(temp_line)
            dst_file.close()
    except OSError:
        error_log.LogError("Unable to generate bat file for git")
        return False
    try:
        subprocess.call([dst_file_path])
    except:
        error_log.LogError("Git setup bat file execution failed for " +
                           git_repo.project)
        return False
    os.remove(dst_file_path)
    return True
Пример #2
0
def RemoveFile(path):
    error_log = ErrorLog()
    if os.path.exists(path):
        try:
            os.remove(path)
        except OSError:
            error_log.LogError("Unable to delete " + path)
Пример #3
0
def FixStupidSolutionFile(proj_name):
    error_log = ErrorLog()
    temp_sln_file_path = join(main_dir_path, proj_name,
                              proj_name + "_temp.sln")
    sln_file_path = join(main_dir_path, proj_name, proj_name + ".sln")

    os.rename(sln_file_path, temp_sln_file_path)

    try:
        with open(temp_sln_file_path) as sln_file:
            output_file = open(sln_file_path, 'w+')
            for line in sln_file:
                if line.find('"src", "src",') != -1:
                    sln_file.readline()
                elif line.find(
                        'GlobalSection(NestedProjects) = preSolution') != -1:
                    sln_file.readline()
                    sln_file.readline()
                else:
                    output_file.write(line)
            output_file.close()
    except OSError:
        error_log.LogError("Unable to fix " + proj_name + " solution file")
        return
    os.remove(temp_sln_file_path)
Пример #4
0
    def __init__(self, my_database):
        """
        书籍集合对象初始化

        :param my_database:
        """
        self._error_log = ErrorLog()  # 创建错误日志输出对象
        self._db = my_database  # 数据库对象
        self._books = self._db.book  # 获取book集合
Пример #5
0
    def __init__(self, my_database):
        """
        URL集合对象

        :param my_database:
        """
        self._error_log = ErrorLog()  # 新建错误日志对象
        self._db = my_database  # 数据库对象
        self._urls = self._db.urls  # URL集合
Пример #6
0
def AddSharedSubmodule(mapping_dir_path):
    error_log = ErrorLog()
    bat_file_path = join(mapping_dir_path, 'add_shared.bat')
    try:
        subprocess.call([bat_file_path])
    except:
        error_log.LogError("Unable to add shared submodule")
    os.remove(bat_file_path)
    return
Пример #7
0
def RemoveNugetExe(path):
    error_log = ErrorLog()
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.lower() == "nuget.exe":
                os.remove(join(root, file))
                if not os.listdir(root):
                    os.removedirs(root)
                return
    error_log.LogError("Could not find nuget.exe in dir " + path)
Пример #8
0
def AddMapperProjToSolution(proj_name, root_dir):
    error_log = ErrorLog()
    bat_file_path = join(root_dir, 'add_project.bat')
    AddCustomFile(join(common_file_path, 'replaceable'), 'add_project.bat', proj_name, root_dir)
    try:
        subprocess.call([bat_file_path])
    except:
        error_log.LogError("Unable to fix sln for " + proj_name + " Project")
    FixStupidSolutionFile(proj_name, root_dir)
    os.remove(bat_file_path)
Пример #9
0
def MoveInMappingFiles(proj_name):
    error_log = ErrorLog()
    mapping_path = join(main_dir_path, proj_name + '.Mapper', 'cpp')
    dst_path = join(main_dir_path, proj_name, 'src', proj_name + '.Mapper')
    for file_name in os.listdir(join(mapping_path)):
        if file_name.find('.map.cpp') == -1:
            try:
                copy(join(mapping_path, file_name), dst_path)
            except OSError:
                error_log.LogError("Failed to move mapper file: " +
                                   join(mapping_path, file_name))
Пример #10
0
    def __init__(self, thread_count):
        """
        初始化爬虫对象

        :param thread_count: 线程数量统计对象
        """
        self._conn = MyDatabase()
        self._db = self._conn.database
        self._book_coll = BookColl(self._db)  # 初始化对象
        self._url_coll = UrlColl(self._db)
        self._thread_count = thread_count
        self._error_log = ErrorLog()  # 新建错误日志输出记录对象
Пример #11
0
def main():
    thread_count = ThreadCount()  # 创建线程计数对象
    error_log = ErrorLog()  # 创建错误日志记录对象
    error_log.clear_error_log()  # 如果没有文件新建文件,清空错误日志
    crawler = Crawler(thread_count)  # 新建爬虫对象
    url = get_one_url()
    while not url is None:  # 判断是否为空
        if thread_count.total < 5:  # 判断当前线程数量是否超出
            print("加载:" + url)
            crawler.get_book(url)  # 让爬虫获取书籍页面
            url = get_one_url()  # 获取新的URL数据
        else:  # 线程数量超出
            sleep(10)  # 休眠10秒等待,线程执行完成
Пример #12
0
def AddMainDirCommonFile(dst_path):
    error_log = ErrorLog()
    src_dir_path = join(common_file_path, 'main_dir')
    dir_contents = os.listdir(src_dir_path)
    for f in dir_contents:
        src_file_path = join(src_dir_path, f)
        if isfile(src_file_path):
            try:
                copy(src_file_path, dst_path)
            except OSError:
                error_log.LogError("Unable to copy file " + f + " to " +
                                   dst_path)
        elif not isdir(src_file_path):
            error_log.LogError("Unable to copy file " + f + " to " + dst_path)
 def __init__(self, inputdata, outputdata):
     dao = DAOPsql('furman')
     self.geo = GeoSearch(dao)
     self.error_log = ErrorLog(self.__class__.__name__)
     self.progress = Progress()
     self.input = inputdata
     self.output = outputdata
Пример #14
0
def AddCustomFile(src_dir, file_name, replace_value, dst_dir):
    error_log = ErrorLog()
    orig_file_path = join(src_dir, file_name)
    if not os.path.exists(orig_file_path):
        error_log.LogError("Common file template not found")
        return
    dst_file_path = join(dst_dir, file_name)
    if file_name.find('AppName') != -1:
        dst_file_path = join(dst_dir, file_name.replace('AppName', replace_value))
    try:
        with open(orig_file_path) as orig_file:
            dst_file = open(dst_file_path, 'w+')
            dst_file.write(orig_file.read().replace('%REPLACE%', replace_value))
            dst_file.close()
    except OSError:
        error_log.LogError("Unable to create custom file " + file_name + " with replace value " + replace_value)
 def __init__(self, database):
     self.connection = None
     self.db = database
     self.sgbd = None
     self.dbAtrib = None
     self.settings()
     self.error_log = ErrorLog(self.__class__.__name__)
     self.cursor = self.get_connection().cursor()
Пример #16
0
def GenerateMappingEnvironment(dir_path = "c:/Starcounter/AppName", git_repo = GitRepo()):
    error_log = ErrorLog()
    app_name = dir_path.split('/')[-1]
    mapping_dir_path = join(dir_path, 'src', app_name + ".Mapper")
    replaceable_dir_path = join(common_file_path, 'replaceable')

    git_repo.SetProject(app_name)

    if not os.path.exists(mapping_dir_path):
        try:
            os.makedirs(mapping_dir_path)
        except OSError:
            error_log.LogError("Creation of Mapper directory failed.  Check permissions and try again")
            return

    AddMainDirCommonFile(mapping_dir_path)
    AddCustomFile(replaceable_dir_path, 'AppName.Mapper.csproj', app_name, mapping_dir_path)
    AddSharedSubmodule(mapping_dir_path)
    AddMapperProjToSolution(app_name, dir_path)
class ConnectionFactory:
    def __init__(self, database):
        self.connection = None
        self.db = database
        self.sgbd = None
        self.dbAtrib = None
        self.settings()
        self.error_log = ErrorLog(self.__class__.__name__)
        self.cursor = self.get_connection().cursor()

    def get_connection(self):
        try:
            self.connection = self.sgbd.connect(user=self.db.user, passwd=self.db.pswrd, db=self.db.db_name)
            return self.connection
        except Exception as e:
            self.close()
            self.error_log.open()
            self.error_log.write(e.message)
            self.error_log.close()

    def close(self):
        self.connection.close()

    def settings(self):
        pass
Пример #18
0
class PSQL:
    def __init__(self, database):
        self.connection = None
        self.db = database
        self.sgbd = None
        self.dbAtrib = None
        self.settings()
        self.error_log = ErrorLog(self.__class__.__name__)
        self.cursor = self.get_connection().cursor()

    def settings(self):
        try:
            self.db.db_name = "nycgisdb"
            self.db.user = "******"
            self.db.host = "localhost"
            self.db.pswrd = "m2a3rcio"
            self.sgbd = psycopg2
            self.dbAtrib = ("dbname='" + self.db.db_name + "' user='******' host='"
                            + self.db.host + "' password='******'")
        except TypeError as e:
            print e.message

    def select(self, arg):
        self.cursor.execute(arg)
        self.connection.commit()
        return self.cursor.fetchall()

    def get_connection(self):
        try:
            self.connection = self.sgbd.connect(self.dbAtrib)
            return self.connection
        except Exception as e:
            self.error_log.open()
            self.error_log.write(e.message)
            self.error_log.close()
Пример #19
0
class BookColl(object):
    """
    书籍集合对象
    """
    def __init__(self, my_database):
        """
        书籍集合对象初始化

        :param my_database:
        """
        self._error_log = ErrorLog()  # 创建错误日志输出对象
        self._db = my_database  # 数据库对象
        self._books = self._db.book  # 获取book集合

    def insert_to_db(self, data):
        """
        插入数据到数据库

        :type data dict
        :param data: 需要插入的字典数据
        :return: None
        """
        try:
            self._books.insert_one(data)  # 插入数据
        except (errors, Exception) as e:
            self._error_log.write_error('BookColl插入错误' + e)  # 错误日志记录

    def get_book_name(self):
        """
        获取书籍名称
        :return:
        """
        # for result in self._books.find({'book_name': {'$regex': '\w'}}):
        for result in self._books.find():
            print(result['book_name'])
            with open('book_name.txt', 'a', encoding='utf-8') as f:
                f.write(result['book_name'] + '\n')
Пример #20
0
def error_txt_to_csv(ms_name):
    
    with open('./error-logs/esb-' + ms_name + '-errors.txt', 'r', encoding='utf-8-sig') as csv_in_file:
        reader = csv.reader(csv_in_file, delimiter='|')
        print('Creating esb-'+ ms_name + '-errors.csv file...')
        print('Writing into esb-'+ ms_name + '-errors.csv file...')
        
        with open('./error-logs/esb-'+ ms_name + '-errors.csv', 'w',  newline='') as csv_out_file:
            writer = csv.writer(csv_out_file, delimiter=',')
            temp = ErrorLog()
            writer.writerow(temp.__dict__.keys())

            for row in reader:
                writer.writerow(get_error_log_from_txt(row))

        print('Finished processing esb-'+ ms_name + '-errors.csv file... \n')
Пример #21
0
def get_error_log_from_txt(data):
    error_log = ErrorLog()
    error_log.date = data[0]
    error_log.log_level = data[1]
    error_log.log_id = data[2]
    error_log.log_event = data[3]
    error_log.route = data[4]
    error_log.transaction_id = data[6]
    error_log.user_email = data[9]
    error_log.package = data[13]
    error_log.error_message = data[14]
    return error_log
Пример #22
0
def get_error_log_from_dict(data):
    error_log = ErrorLog()
    error_log.date = data['_date']
    error_log.log_level = data['_log_level']
    error_log.log_id = data['_log_id']
    error_log.log_event = data['_log_event']
    error_log.route = data['_route']
    error_log.transaction_id = data['_transaction_id']
    error_log.user_email = data['_user_email']
    error_log.package = data['_package']
    error_log.error_message = data['_error_message']
    return error_log
Пример #23
0
class UrlColl(object):
    """
    URL集合对象
    """
    def __init__(self, my_database):
        """
        URL集合对象

        :param my_database:
        """
        self._error_log = ErrorLog()  # 新建错误日志对象
        self._db = my_database  # 数据库对象
        self._urls = self._db.urls  # URL集合

    def add_url(self, url):
        """
        添加URL数据到集合中

        :param url:  URL数据
        :return: None
        """
        try:
            if not self.is_exist_url(url):  # 判断是否存在
                self._urls.insert_one({'url': url, 'isExist': 'false'})  # 插入
        except (errors, Exception) as e:
            self._error_log.write_error('UrlColl添加错误' + e)  # 错误日志写入

    def is_exist_url(self, url):
        """
        判读是否已经存在相对应的数据

        :param url: URL地址
        :return: boolean 存在返回True不存在返回False
        """
        try:
            result = self._urls.find_one({"url": url})  # 获取查询结果
            if result is None:
                return False  # 返回False
            else:
                return True  # 返回True
        except (errors, Exception) as e:
            self._error_log.write_error('UrlColl查找错误' + e)  # 错误日志写入

    def get_url(self):
        """
        从数据库中随机获取一条数据

        :return: URL地址字符串
        """
        num = randint(1, 100)  # 随机数
        try:
            result = self._urls.find({
                'isExist': 'false'
            }).skip(num).limit(1)  # 跳跃式获取数据
            return result[0]['url']  # 返回对应的URL地址
        except (errors, Exception) as e:
            self._error_log.write_error('UrlColl获取url错误' + e)  # 错误日志写入

    def update_url(self, url):
        """
        更新URL数据

        :param url: 需要更新的URL数据
        :return: None
        """
        try:
            self._urls.update({'url': url}, {'$set': {
                'isExist': 'true'
            }})  # 更新URL的状态为True表示已经爬取过了
        except (errors, Exception) as e:
            self._error_log.write_error('UrlColl更新URl数据错误' + e)  # 错误日志写入
class RealEstateSettings:

    def __init__(self, inputdata, outputdata):
        dao = DAOPsql('furman')
        self.geo = GeoSearch(dao)
        self.error_log = ErrorLog(self.__class__.__name__)
        self.progress = Progress()
        self.input = inputdata
        self.output = outputdata

    def fix_acris(self):
        tuples = self.preprocess()
        real_estates = []
        while tuples:
            try:
                t = tuples.pop(0)
                bbl = Normalizer.set_bbl(t[0], t[1], t[2])
                address = t[3]+" "+t[4]
                address = Normalizer.set_address(address, bbl)
                date = Normalizer.set_str_to_epoch(t[5])
                price = t[6]
                real_estates.append((bbl, address, date, price))
            except ValueError:
                self.error_log.open()
                self.error_log.write(t[1]+", "+str(t[0]))
                self.error_log.close()
            except KeyboardInterrupt:
                print ""
                print "Stopped"
                CsvManager.append_geo_codes(real_estates, self.output)
        CsvManager.append_geo_codes(real_estates, self.output)

    def preprocess(self):
        tuples = CsvManager.read(self.input)
        num = CsvManager.read_progress()
        print num
        if num == 0:
            CsvManager.write_geo_codes([], self.output)
            CsvManager.write_progress('0')
        self.progress.set_size(len(tuples))
        self.progress.update_progress(num)
        Normalizer.set_tuple(num, tuples)
        return tuples

    def build_geocodings(self):
        nominatim = NominatimGeocode(self.progress, self.error_log, self.geo)
        google = GoogleGeocode(self.progress, self.error_log, self.geo)
        opencage = OpenCageGeocode(self.progress, self.error_log, self.geo)
        bing = BingGeocode(self.progress, self.error_log, self.geo)
        tiger = TIGERGeocode(self.progress, self.error_log, self.geo)
        return nominatim, google, opencage, bing, tiger

    def search_lat_long(self):
        tuples = self.preprocess()
        count = 1
        nominatim, google, opencage, bing, tiger = self.build_geocodings()
        while tuples:
            t = tuples.pop(0)
            status, found = self.geocode_process(t, nominatim)
            if not found:
                if status == -1:
                    status, found = self.geocode_process(t, bing)
                    if not found and status == -1:
                        self.geocode_process(t, tiger)
                elif status == -2:
                    i = 1
                    while i < 3:
                        print "Waiting 45' for the "+Normalizer.set_order(str(i))+" time"
                        time.sleep(2700)
                        status, found = self.geocode_process(t, nominatim)
                        if found:
                            continue
                        elif status == -2:
                            i += 1
                        elif status == -3:
                            return
                if count % 100 == 0:
                    for i in range(3):
                        t = tuples.pop(0)
                        status, found = self.geocode_process(t, google)
                        time.sleep(3)
                        if not found:
                            self.geocode_process(t, opencage)
                            time.sleep(3)
                        else:
                            t = tuples.pop(0)
                            self.geocode_process(t, opencage)
                            time.sleep(3)
            count += 1

    def geocode_process(self, t, geocode):
        re, num = geocode.get_coordinates(t)
        if num:
            CsvManager.append_geo_codes([re], self.output)
            self.progress.update_progress(num)
        else:
            val = CsvManager.read_progress()
            self.progress.update_progress(val+1)
        return re, num
Пример #25
0
def FixStupidSolutionFile(proj_name, root_dir):
    error_log = ErrorLog()
    temp_sln_file_path = join(root_dir, proj_name + "_temp.sln")
    sln_file_path = join(root_dir, proj_name + ".sln")
    os.rename(sln_file_path, temp_sln_file_path)
    try:
        with open(temp_sln_file_path) as sln_file:
            output_file = open(sln_file_path, 'w+')
            for line in sln_file:
                if line.find('"src", "src",') != -1:
                    sln_file.readline()
                elif line.find('GlobalSection(NestedProjects) = preSolution') != -1:
                    sln_file.readline()
                    sln_file.readline()
                else:
                    output_file.write(line)
            output_file.close()
    except OSError:
        error_log.LogError("Unable to fix " + proj_name + " solution file")
        return
    os.remove(temp_sln_file_path)

error_log = ErrorLog()
error_log.ClearErrorLog()
GenerateMappingEnvironment()




Пример #26
0
                    )
                elif line.find('mapperOutput') != -1:
                    output_file.write(
                        '  "mapperOutput": "..\\\$app$\\\\bin\\\$config$\\\$app$.map.cpp",\n'
                    )
                else:
                    output_file.write(line)
            output_file.close()
    except OSError:
        error_log.LogError("Unable to fix " + proj_name + " mgen file")
        return
    os.remove(temp_file_path)


# MAIN
error_log = ErrorLog()
subfolders = [f.path for f in os.scandir(main_dir_path) if f.is_dir()]
for dir in subfolders:
    if dir.find(".Mapper") != -1 and dir.find("Blending") == -1:
        proj_name = dir.split('\\')[-1].split('.')[0]
        git_repo = GitRepo()
        git_repo.SetProject(proj_name)
        if SetupProject(git_repo):
            # Remove unneccessary dirs
            RemoveFile(join(main_dir_path, proj_name, 'Rebracer.xml'))
            RemoveNugetExe(join(main_dir_path, proj_name))
            # Add new dirs not added by setup bat
            try:
                os.mkdir(join(main_dir_path, proj_name, '%STAR_NUGET%'))
            except OSError:
                error_log.LogError("%STAR_NUGET% folder already exists for " +
Пример #27
0
class Crawler(object):
    def __init__(self, thread_count):
        """
        初始化爬虫对象

        :param thread_count: 线程数量统计对象
        """
        self._conn = MyDatabase()
        self._db = self._conn.database
        self._book_coll = BookColl(self._db)  # 初始化对象
        self._url_coll = UrlColl(self._db)
        self._thread_count = thread_count
        self._error_log = ErrorLog()  # 新建错误日志输出记录对象

    def get_book(self, url):
        """
        获取书籍数据

        :param url: 获取书籍的URL地址
        :return: None
        """
        book = {}  # 初始化字典 用于保存数据
        # 初始化浏览器驱动程序,获得浏览器驱动对象
        driver = webdriver.Firefox(
            executable_path='E:\DevelopTools\Python\geckodriver')
        # driver = webdriver.Ie(executable_path='E:\DevelopTools\Python\IEDriverServer')
        try:
            driver.set_page_load_timeout(12)  # 设置页面加载超时时间
            driver.set_script_timeout(30)  # 设置页面脚本响应超时时间
            driver.get(url)  # 设置浏览器获取页面的地址
            js = "var q=document.documentElement.scrollTop=100000"  # 浏览器执行的js代码 向下滑动100000xp
            driver.execute_script(js)  # 运行脚本
            time.sleep(1)  # 休眠等待浏览器执行
            js = "var q=document.documentElement.scrollTop=0"  # 浏览器js代码 回到顶部
            driver.execute_script(js)  # 运行脚本
            time.sleep(2)  # 休眠等待浏览器执行
            js = "var q=document.documentElement.scrollTop=100000"  # 浏览器js代码, 回到底部
            driver.execute_script(js)  # 运行脚本
            time.sleep(1)  # 休眠等待浏览器执行, 模拟浏览器滑动完成
            soup = BeautifulSoup(driver.page_source,
                                 "lxml")  # 传递页面数据, 初始化bs4对象
        except Exception as e:
            print(e)  # 输出错误信息
            self._error_log.write_error(e)  # 记录错误信息
            return  # 返回空
        finally:
            driver.close()  # 关闭浏览器
        # target = driver.find_element_by_id("footer")
        # driver.execute_script("arguments[0].scrollIntoView();", target)  # 拖动到可见的元素去

        # 下面是相关标签的数据获取
        null_wrap = soup.find("div", {"class": "null_wrap"})
        if not null_wrap is None:
            self._url_coll.update_url(url)
            return
        book['url'] = url
        book_name = soup.find("div", {"class": "name_info"})
        if book_name is None:
            self._url_coll.update_url(url)
            return
        book['book_name'] = book_name.h1.get_text(strip=True)
        book['image_url'] = soup.find("div", {"class": "big_pic"}).img['src']
        book['book_type'] = soup.find("div", {
            "class": "breadcrumb"
        }).get_text(strip=True)
        book['introduction'] = soup.find("span", {
            "class": "head_title_name"
        }).get_text(strip=True)
        author = soup.find("span", {"id": "author"})
        if author is None:
            book['author'] = ""
        else:
            book['author'] = soup.find("span", {"id": "author"}).text
        messbox = soup.find("div", {"class": "messbox_info"})
        for item in messbox:
            if "出版社" in str(item):
                book['publishing'] = item.get_text(strip=True)
            elif "出版时间" in str(item):
                book['publishing_time'] = item.get_text(strip=True)
        book['price'] = soup.find("p", {
            "id": "dd-price"
        }).get_text(strip=True).split("¥")[1]
        editors_choice = soup.find("div", {"id": "abstract"})
        if editors_choice is None:
            book['editors_choice'] = ""
        else:
            book['editors_choice'] = editors_choice.contents[1].get_text()
        content_validity = soup.find("div", {"id": "content"})
        if content_validity is None:
            book['content_validity'] = ""
        else:
            book['content_validity'] = content_validity.contents[1].get_text()
        about_author = soup.find("div", {"id": "authorIntroduction"})
        if about_author is None:
            book['about_author'] = ""
        else:
            book['about_author'] = about_author.contents[1].get_text()
        catalog = soup.find("textarea", {"id": "catalog-textarea"})
        if catalog is None:
            catalog2 = soup.find("div", {"id": "catalog"})
            if catalog2 is None:
                book['catalog'] = ""
            else:
                book['catalog'] = catalog2.contents[1].get_text()
        else:
            book['catalog'] = catalog.get_text(strip=True)
        media_reviews = soup.find("div", {"id": "mediaFeedback"})
        if media_reviews is None:
            book['media_reviews'] = ""
        else:
            book['media_reviews'] = media_reviews.get_text()
        # 数据获取成功,插入book集合
        self._book_coll.insert_to_db(book)
        self._conn.close_conn()
        print(url + "完成")
        try:
            self._thread_count.add_one()  # 线程计数加一
            thread = MyThread(soup, self._thread_count)  # 创建线程对象
            thread.start()  # 开启线程
        except Exception as e:
            self._error_log.write_error(e)  # 写入错误日志
            print("Error: 无法启动线程" + e)