def buildMusicList(self):
        '''
        访问top100歌曲url 提取真正的歌曲文件地址
        '''
        mList=codecs.open(self.currentPath+"/download.txt")
        lines= mList.readlines()
        downloadClass = DownloadHTMLParser()
        i=1
        for line in lines:
            if i<=100:
                request= urllib2.Request(line)
                listFile = urllib2.urlopen(request)
                downloadClass.feed(listFile.read())
                i+=1
            
        if downloadClass.mp3_128kbpsFiles:
            self.rowCount=len(downloadClass.mp3_128kbpsFiles)

            self.songAddresses = downloadClass.mp3_128kbpsFiles
   
            mp3UrlList = codecs.open(self.currentPath+"/musicFile.txt","w+","utf-8")
            for mp3File in downloadClass.mp3_128kbpsFiles:

                mp3UrlList.write(mp3File+"\r\n")
        
            
            #mp3UrlList.close()
        else:pass
Пример #2
0
def download(
    url,
    gently=False,
    refresh=False,
    no_user_agent=False,
    expect_xml=False,
):
    if not os.path.isdir(_CACHE):
        os.mkdir(_CACHE)
    key = hashlib.md5(url).hexdigest()
    if no_user_agent:
        key += 'nouseragent'
    fp = os.path.join(_CACHE, key)
    if os.path.isfile(fp):
        age = time.time() - os.stat(fp).st_mtime
        if age > 60 * 60 * 24:
            os.remove(fp)
    if not os.path.isfile(fp) or refresh:
        print "* requesting", url
        r = realistic_request(url, no_user_agent=no_user_agent)
        if r.status_code == 200:
            if expect_xml and not (
                'xml' in r.headers['Content-Type'] or '<rss' in r.text
            ):
                raise NotXMLResponse(r.headers['Content-Type'])
            with codecs.open(fp, 'w', 'utf8') as f:
                f.write(r.text)
            if gently:
                time.sleep(random.randint(1, 4))
        else:
            raise Exception(r.status_code)

    with codecs.open(fp, 'r', 'utf8') as f:
        return f.read()
Пример #3
0
    def test_scanner_uses_sha256_by_default(self):
        dirname = self.create_sample_bag()

        # single-level
        output = codecs.open(os.path.join(dirname, 'default_checkm.txt'), encoding='utf-8', mode="w")
        output = self.reporter.create_checkm_file(scan_directory=dirname,
                                         checkm_filename='default_checkm.txt',
                                         checkm_file=output)
        input = open(os.path.join(dirname, 'default_checkm.txt'), 'r')
        lines = self.checkm_p.parse(input)
        for line in lines:
            if not( line[1] == 'dir' ):
                self.assertEqual(line[1], 'sha256')
                break

        # multilevel
        output = codecs.open(os.path.join(dirname, 'default_checkm.txt'), encoding='utf-8', mode="w")
        output = self.reporter.create_multilevel_checkm(top_directory=dirname,
                                                        checkm_filename='default_checkm.txt')
        input = open(os.path.join(dirname, 'default_checkm.txt'), 'r')
        lines = self.checkm_p.parse(input)
        for line in lines:
            if not( line[1] == 'dir' ):
                self.assertEqual(line[1], 'sha256')
                break
Пример #4
0
    def send_file_content():
        import re
        import codecs
        import time
        file_pattern = r'E:\github\zhihu_spider\data\question_answer_content_2015_05_24.txt.partial_%s'
        # filename = os.path.join(PATH, 'data', 'question_answer_content_2015_05_24.txt.partial_10.html')
        # filename = r'E:\github\zhihu\text_data\zhihu_v500_l0-100_d20150228_r1272-1910.html'
        # file_content = codecs.open(filename).read()
        for file_index in range(1, 11):
            filename = file_pattern % file_index
            print file_index
            new_line_list = []
            with codecs.open(filename, encoding='utf-8') as f:
                index = 0
                for line in  f.readlines():
                    line = re.sub(r'\<div width="\d+"\>', '', line)
                    question_subject_match = re.search(r'<table><tr><td><font color="#4EABF9"><u>(.*?)</u></font>', line)
                    answer_list = re.findall(r'A:.*?<br>', line)
                    if (not question_subject_match):
                        print line.strip()
                        continue

                    for answer in answer_list:
                        if not answer.replace('A:', '').strip():
                            print line.strip()
                            continue
                    index += 1
                    new_line = question_subject_match.group().strip().replace('<u>', '<u>%s' % index) + "\n<br>" + "\n".join(answer_list)+'</tr></td></table>'#网页显示,不需要考虑\n的问题
                    new_line_list.append(new_line)

            file_content = '\n'.join(new_line_list)
            send_to_163_mail(file_content, mail_to)
            time.sleep(3)
            codecs.open(filename + '.html', mode='wb', encoding='utf-8').write('<html><head><meta charset=\'utf-8\'></head><body>'+'\n'.join(new_line_list)+'</body></html>')
Пример #5
0
def write_syn_dataset(csvPathname, rowCount, colCount, SEED):
    r1 = random.Random(SEED)
    if UTF8 or UTF8_MULTIBYTE:
        dsf = codecs.open(csvPathname, encoding='utf-8', mode='w+')
    elif UTF16:
        dsf = codecs.open(csvPathname, encoding='utf-16', mode='w+')
    else:
        dsf = open(csvPathname, "w+")

    for i in range(rowCount):
        if UTF16:
            u = unichr(0x2018) + unichr(6000) + unichr(0x2019)
            rowDataCsv = u
        else: # both ascii and utf-8 go here?
            rowData = []
            for j in range(colCount):
                r = generate_random_utf8_string(length=2)
                rowData.append(r)
            rowDataCsv = ",".join(rowData)
        if UTF16:
            # we're already passing it unicode. no decoding needed
            print "utf16:", repr(rowDataCsv), type(rowDataCsv)
            decoded = rowDataCsv
        else:
            print "str:", repr(rowDataCsv), type(rowDataCsv)
            decoded = rowDataCsv.decode('utf-8')
            # this has the right length..multibyte utf8 are decoded 
            print "utf8:" , repr(decoded), type(decoded)
        
        # dsf.write(rowDataCsv + "\n")
        dsf.write(decoded + "\n")
    dsf.close()
Пример #6
0
 def _run(self, *cmdargs):
     cmdargs = [str(x) for x in cmdargs]
     p1 = self.tmpdir.join("stdout")
     p2 = self.tmpdir.join("stderr")
     print_("running:", ' '.join(cmdargs))
     print_("     in:", str(py.path.local()))
     f1 = codecs.open(str(p1), "w", encoding="utf8")
     f2 = codecs.open(str(p2), "w", encoding="utf8")
     try:
         now = time.time()
         popen = self.popen(cmdargs, stdout=f1, stderr=f2,
             close_fds=(sys.platform != "win32"))
         ret = popen.wait()
     finally:
         f1.close()
         f2.close()
     f1 = codecs.open(str(p1), "r", encoding="utf8")
     f2 = codecs.open(str(p2), "r", encoding="utf8")
     try:
         out = f1.read().splitlines()
         err = f2.read().splitlines()
     finally:
         f1.close()
         f2.close()
     self._dump_lines(out, sys.stdout)
     self._dump_lines(err, sys.stderr)
     return RunResult(ret, out, err, time.time()-now)
Пример #7
0
 def compile_html(self, source, dest):
     """Compile reSt into HTML."""
     if not has_docutils:
         raise Exception('To build this site, you need to install the '
                         '"docutils" package.')
     try:
         os.makedirs(os.path.dirname(dest))
     except:
         pass
     error_level = 100
     with codecs.open(dest, "w+", "utf8") as out_file:
         with codecs.open(source, "r", "utf8") as in_file:
             data = in_file.read()
             output, error_level, deps = rst2html(
                 data, settings_overrides={
                     'initial_header_level': 2,
                     'record_dependencies': True,
                     'stylesheet_path': None,
                     'link_stylesheet': True,
                     'syntax_highlight': 'short',
                 })
             out_file.write(output)
         deps_path = dest + '.dep'
         if deps.list:
             with codecs.open(deps_path, "wb+", "utf8") as deps_file:
                 deps_file.write('\n'.join(deps.list))
         else:
             if os.path.isfile(deps_path):
                 os.unlink(deps_path)
     if error_level < 3:
         return True
     else:
         return False
Пример #8
0
def process_slides():
  with codecs.open('presentation.html', 'w', encoding='utf8') as outfile:
    md = codecs.open('slides.md', encoding='utf8').read()
    md_slides = md.split('\n---\n')
    print 'Compiled %s slides.' % len(md_slides)

    slides = []
    # Process each slide separately.
    for md_slide in md_slides:
      slide = {}
      sections = md_slide.split('\n\n')
      # Extract metadata at the beginning of the slide (look for key: value)
      # pairs.
      metadata_section = sections[0]
      metadata = parse_metadata(metadata_section)
      slide.update(metadata)
      remainder_index = metadata and 1 or 0
      # Get the content from the rest of the slide.
      content_section = '\n\n'.join(sections[remainder_index:])
      html = markdown.markdown(content_section)
      slide['content'] = postprocess_html(html, metadata)

      slides.append(slide)

    template = jinja2.Template(open('_base.html').read())

    outfile.write(template.render(locals()))
def cdrom():
    # now parse the cd-rom raw texts as we got in corrupt TEI-XML from the INL
    for textN in os.listdir("../data/original/unannotated/cdrom_xml/"):
        if not textN.endswith(".xml"):
            continue
        print textN
        with open("../data/original/unannotated/cdrom_xml/"+textN) as oldF:
            try:
                text = text_tag.split(oldF.read(), maxsplit=1)[1]
                soup = Soup(text)
                text = soup.get_text()
                text = clean_text(text)
                if not text.startswith("voor de tekst zie"):
                    with codecs.open("../data/uniform/unannotated/cdrom/"+str(textN)+".txt", "w+", "utf-8") as newF:
                        newF.write(text)
            except:
                pass
    # now parse the cd-rom raw texts from Brill (which we didn't get via the INL)
    # in the format as Lisanne downloaded them from the Cd-rom
    for textN in os.listdir("../data/original/unannotated/cdrom_txt/"):
        if not textN.endswith(".txt"):
            continue
        print textN
        with codecs.open("../data/original/unannotated/cdrom_txt/"+textN, "r+", "utf-8-sig") as oldF:
            words = [clean_token(w) for w in oldF.read().strip().split()]
            with codecs.open("../data/uniform/unannotated/cdrom/"+textN, "w+", "utf-8-sig") as newF:
                newF.write(" ".join(words))
    return
Пример #10
0
 def __call__(self, file, config):
     """ Compare expected output to actual output and report result. """
     cfg_section = get_section(file, config)
     if config.get(cfg_section, 'skip'):
         raise nose.plugins.skip.SkipTest, 'Test skipped per config.'
     input_file = file + config.get(cfg_section, 'input_ext')
     with codecs.open(input_file, encoding="utf-8") as f:
         input = f.read()
     output_file = file + config.get(cfg_section, 'output_ext') 
     with codecs.open(output_file, encoding="utf-8") as f:
         expected_output = f.read()
     output = markdown.markdown(input, **get_args(file, config))
     if tidy and config.get(cfg_section, 'normalize'):
         # Normalize whitespace before comparing.
         expected_output = normalize(expected_output)
         output = normalize(output)
     elif config.get(cfg_section, 'normalize'):
         # Tidy is not available. Skip this test.
         raise nose.plugins.skip.SkipTest, 'Test skipped. Tidy not available in system.'
     diff = [l for l in difflib.unified_diff(expected_output.splitlines(True),
                                             output.splitlines(True), 
                                             output_file, 
                                             'actual_output.html', 
                                             n=3)]
     if diff:
         raise util.MarkdownSyntaxError('Output from "%s" failed to match expected '
                                        'output.\n\n%s' % (input_file, ''.join(diff)))
Пример #11
0
def split_linkedin_dump():
    skip = 2100000
    count = 0
    log = codecs.open("C:\\data\\log"+str(skip)+".txt",'w', encoding="utf-8") 
    id_map = codecs.open("C:\\data\\idmap"+str(skip)+".txt",'w', encoding="utf-8") 
    linkedin_dump = codecs.open('D:\\result.csv', encoding="utf-8")
    out = ""
    linkedin_dump.next()
    for line in linkedin_dump:
        x = 0
        if count < skip:
            count+=1
            if count % 10000 == 0:
                print count
            continue
        print str(count)+':'+str(len(line))
        log.write(str(count)+' '+str(len(line)))
        if line[0] == '"':
            x = line.find('",')
            log.write(str(count)+' '+line[1:x]+'\n')
            verbose.debug(str(count)+' '+line[1:x])
            id_map.write(str(count)+' '+line[1:x]+'\n')
            count+=1
            try:
                out = codecs.open("C:\\data\\linkedin\\"+line[1:x].strip().replace('"'," ").split('?')[0],'w', encoding="utf-8")
            except Exception, e:
                print e
        else:
            log.write("[EXCEPTION]"+str(count)+":"+line+'\n')
        out.write(line[x:])
Пример #12
0
def manual_convert():
    root = upath.abspath(sys.argv[1])
    if len(sys.argv) == 3 and sys.argv[2] == '--convert':
        dry_run = False
    else:
        dry_run = True
    log = codecs.open('../convert.log', 'w', 'utf-8')
    for curdir, subdirs, filenames in os.walk(root, topdown=True):
        for name in filenames:
            try:
                src = os.path.join(curdir, name)
                print('Converting %s' % src)
                with open(src, 'rb') as f:
                    data = f.read()
                dst = src
                if dry_run:
                    dst = os.path.join(curdir, '_%s' % name)
                else:
                    dst = src
                utf8_data = compat.to_text(data, encoding='gb18030')
                with codecs.open(dst, 'w', 'utf-8') as f:
                    f.write(utf8_data)
            except Exception as e:
                traceback.print_exc()
                log.write(src)
def generate_theme_file(theme_file_path, dict_seq, new_theme_file_path):
    """Appends `dict_seq` to `new_theme_file_path`, converting it to plist format.

    :param theme_file_path: path to the theme file to read from
    :param dict_seq: list of dictionaries with color definitions
    :param new_theme_file_path: path to the created theme file

    """

    with codecs.open(theme_file_path, 'r', 'utf-8') as f:
        # parse dict objects to plist format
        tempate_to_write = (dict_to_plist(d) for d in dict_seq)
        # fix codecs.StreamReaderWriter.read inaccuracy
        f = StringIO.StringIO(f.read())
        # find end of colors difinition
        end_pos = seek_until(f, '</array>')
        # text until insertation place
        f.seek(0)
        begin_text = f.read(end_pos)
        # new colors definition plus end of file
        f.seek(end_pos)
        end_text = '\n'.join(tempate_to_write) + f.read()
        new_theme_text = begin_text + end_text

    with codecs.open(new_theme_file_path, 'w', 'utf-8') as f:
        f.write(new_theme_text)
Пример #14
0
    def _parse_hosts_inventory(self, inventory_path):
        """
        Read all the available hosts inventory information into one big list
        and parse it.
        """
        hosts_contents = []
        if os.path.isdir(inventory_path):
            self.log.debug("Inventory path {} is a dir. Looking for inventory files in that dir.".format(inventory_path))
            for fname in os.listdir(inventory_path):
                # Skip .git folder
                if fname == '.git':
                    continue
                path = os.path.join(inventory_path, fname)
                if os.path.isdir(path):
                    continue
                with codecs.open(path, 'r', encoding='utf8') as f:
                    hosts_contents += f.readlines()
        else:
            self.log.debug("Inventory path {} is a file. Reading as inventory.".format(inventory_path))
            with codecs.open(inventory_path, 'r', encoding='utf8') as f:
                hosts_contents = f.readlines()

        # Parse inventory and apply it to the hosts
        hosts_parser = parser.HostsParser(hosts_contents)
        for hostname, key_values in hosts_parser.hosts.items():
            self.update_host(hostname, key_values)
Пример #15
0
def _translate_document(path):
    if path not in _translated_documents:
        with codecs.open(path, "r", "utf-8") as infile:
            with codecs.open(_compiled_path(path), "w", "utf-8") as outfile:
                _translated_documents[path] = DocumentTranslator(infile, outfile, path)
                _translated_documents[path].translate()
    return _translated_documents[path]
Пример #16
0
def copy_static_entry(source, targetdir, builder, context={},
                      exclude_matchers=(), level=0):
    """Copy a HTML builder static_path entry from source to targetdir.

    Handles all possible cases of files, directories and subdirectories.
    """
    if exclude_matchers:
        relpath = relative_path(builder.srcdir, source)
        for matcher in exclude_matchers:
            if matcher(relpath):
                return
    if path.isfile(source):
        target = path.join(targetdir, path.basename(source))
        if source.lower().endswith('_t') and builder.templates:
            # templated!
            fsrc = open(source, 'r', encoding='utf-8')
            fdst = open(target[:-2], 'w', encoding='utf-8')
            fdst.write(builder.templates.render_string(fsrc.read(), context))
            fsrc.close()
            fdst.close()
        else:
            copyfile(source, target)
    elif path.isdir(source):
        if level == 0:
            for entry in os.listdir(source):
                if entry.startswith('.'):
                    continue
                copy_static_entry(path.join(source, entry), targetdir,
                                  builder, context, level=1,
                                  exclude_matchers=exclude_matchers)
        else:
            target = path.join(targetdir, path.basename(source))
            if path.exists(target):
                shutil.rmtree(target)
            shutil.copytree(source, target)
Пример #17
0
def tf_idf_predict_v(dicts,filename,filename1,filename2):
    t=CHI.dict_df(filename);mid_dict=dict();
    file1=codecs.open(filename1, 'r','utf-8');
    readlists=file1.readlines();
    l=len(readlists);
    print(len(dicts))
    for i in range(l):
        texts=readlists[i].strip().split(' ');
        data=[0 for i in range(len(dicts))];  
        l_word=len(texts);
        for text in texts:
            if text in mid_dict:
                mid_dict[text]+=1;
            else:
                mid_dict[text]=1;
        for k,x in mid_dict.items():
            if k in dicts:
               tf=float(x)/l_word;
               idf=math.log(8000/float(t[k]));
               #print(dicts[k])
               data[int(dicts[k])-1]=tf*idf;
        l_word=0;
        fileHandle = codecs.open (filename2, 'a' ,"utf-8")
        for k,x in enumerate(data): 
                fileHandle.write(str(k)+':'+str(x)+' ');
        fileHandle.write('\n');
        fileHandle.close() ;
Пример #18
0
def _add_license(root, license_path="include-license.txt", whitelist=[]):
    """
    Read a license from license_path and append it to all files under root
    whose extension is in _license_exts.
    """
    if not os.path.isfile(license_path):
        return

    lfile = codecs.open(license_path, "r", encoding="utf_8_sig")
    license = lfile.read()
    lfile.close()

    license_files = []

    for base, dirs, files in os.walk(root):
        if whitelist:
            bl = [d for d in dirs if not d in whitelist]
            while bl:
                dirs.pop(dirs.index(bl.pop()))

        license_files.extend([os.path.join(base, f) for f in files if f.endswith(_license_exts)])

    for f in license_files:
        source = codecs.open(f, "r", encoding="utf_8_sig")
        tmpfd, tmppath = tempfile.mkstemp(".tmp", "dfbuild.")
        tmpfile = os.fdopen(tmpfd, "w")
        wrapped = codecs.getwriter("utf_8_sig")(tmpfile)
        wrapped.write(license)
        wrapped.write("\n")
        wrapped.write(source.read())
        source.close()
        tmpfile.close()
        shutil.copy(tmppath, f)
        os.unlink(tmppath)
Пример #19
0
def _ansi2utf8(path):
    f = codecs.open(path, "r", "utf-8")
    c = f.read()
    f.close()
    f = codecs.open(path, "w", "utf_8_sig")
    f.write(c)
    f.close()
Пример #20
0
    def load_json(self, name, config_old, save=False):
        config_new = config_old
        path = './res_mods/configs/spoter_mods/%s/' % self.name
        if not os.path.exists(path):
            os.makedirs(path)
        new_path = '%s%s.json' % (path, name)
        if save:
            with codecs.open(new_path, 'w', encoding='utf-8-sig') as json_file:
                data = json.dumps(config_old, sort_keys=True, indent=4, ensure_ascii=False, encoding='utf-8-sig', separators=(',', ': '))
                json_file.write('%s' % self.byte_ify(data))
                json_file.close()
                config_new = config_old
        elif os.path.isfile(new_path):
            try:
                with codecs.open(new_path, 'r', encoding='utf-8-sig') as json_file:
                    data = self.json_comments(json_file.read().decode('utf-8-sig'))
                    config_new = self.byte_ify(json.loads(data))
                    json_file.close()
            except Exception as e:
                self.sys_mess()
                print '%s%s' % (self.sys_mes['ERROR'], e)

        else:
            self.sys_mess()
            print '%s[%s, %s %s]' % (self.sys_mes['ERROR'], self.code_pa(self.description), self.version, self.sys_mes['MSG_RECREATE_CONFIG'])
            with codecs.open(new_path, 'w', encoding='utf-8-sig') as json_file:
                data = json.dumps(config_old, sort_keys=True, indent=4, ensure_ascii=False, encoding='utf-8-sig', separators=(',', ': '))
                json_file.write('%s' % self.byte_ify(data))
                json_file.close()
                config_new = config_old
            print '%s[%s, %s %s]' % (self.sys_mes['INFO'], self.code_pa(self.description), self.version, self.sys_mes['MSG_RECREATE_CONFIG_DONE'])
        return config_new
Пример #21
0
 def _readTrainFile(self, inputFile, outputSeg, outputPos, tagNum):
     outSeg = codecs.open(outputSeg, 'w', self._textEncoding)
     outPos = codecs.open(outputPos, 'w', self._textEncoding)
     with codecs.open(inputFile, 'r', self._textEncoding) as inFile:
         for line in inFile:
             ret = line.strip().split()
             if not ret:
                 continue
             for item in ret[1:]:
                 if not item:
                     continue
                 index1 = item.find(u'[')
                 if index1 >= 0:
                     item = item[index1+1:]
                     index2 = item.find(u']')
                 if index2 > 0:
                     item = item[:index2]
                 word, tag = item.split(u'/')
                 if tag == 'w' and word in [u'。', u',']:
                     outSeg.write('\n')
                     outPos.write('\n')
                     continue
                 outPos.write('%s %s\n' % (word, tag))
                 if word:
                     if tagNum == 4:
                         self._write4Tag(word, outSeg)
                     elif tagNum == 6:
                         self._write6Tag(word, outSeg)
             outSeg.write('\n')
             outPos.write('\n')
     outSeg.close()
     outPos.close()
def GetLatestNews(input_file):
#	latest_news = []
#	if os.path.exists(input_file):
#		lines = GetLastLines(input_file)
#		for line in lines:
#			latest_news.append(line.split('\t')[1])
#	return latest_news
	latest_news = {}
	if os.path.exists(input_file):
		input = codecs.open(input_file, encoding = 'utf-8')
		lines = input.readlines()
		for line in lines:
			latest_news[line.split('\t')[1]] = 0
		input.close()

	folder = input_file.split('/')[0]
	date = input_file.split('/')[1].split('.')[0]
	today = datetime.datetime.strptime(date, '%Y-%m-%d').date()
	yesterday = str(today - datetime.timedelta(days=1))
	input_file = folder + '/' + yesterday + '.txt'

	if os.path.exists(input_file):
		input = codecs.open(input_file, encoding = 'utf-8')
		lines = input.readlines()
		for line in lines:
			latest_news[line.split('\t')[1]] = 0
		input.close()

	return latest_news
Пример #23
0
 def run(self, input, output, tokenizer=False, pathBioModel=None):
     if pathBioModel != None:
         assert os.path.exists(pathBioModel), pathBioModel
     if tokenizer:
         print >> sys.stderr, "Running BLLIP parser with tokenization"
         firstStageArgs = ["first-stage/PARSE/parseIt", "-l999", "-N50"]
     else:
         print >> sys.stderr, "Running BLLIP parser without tokenization"
         firstStageArgs = ["first-stage/PARSE/parseIt", "-l999", "-N50" , "-K"]
     secondStageArgs = ["second-stage/programs/features/best-parses", "-l"]
     if pathBioModel != None:
         firstStageArgs += [pathBioModel+"/parser/"]
         secondStageArgs += [pathBioModel+"/reranker/features.gz", pathBioModel+"/reranker/weights.gz"]
     else:
         firstStageArgs += ["first-stage/DATA/EN/"]
         secondStageArgs += ["second-stage/models/ec50spfinal/features.gz", "second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz"]
     print >> sys.stderr, "1st Stage arguments:", firstStageArgs
     print >> sys.stderr, "2nd Stage arguments:", secondStageArgs 
     firstStage = subprocess.Popen(firstStageArgs,
                                   stdin=codecs.open(input, "rt", "utf-8"),
                                   stdout=subprocess.PIPE)
     secondStage = subprocess.Popen(secondStageArgs,
                                    stdin=firstStage.stdout,
                                    stdout=codecs.open(output, "wt", "utf-8"))
     return ProcessUtils.ProcessWrapper([firstStage, secondStage])
Пример #24
0
def mksnt(voc_f,txt_f,ext=True,verbose=0):
    voc = open(voc_f,'r',encoding='utf-8')
    data = []
    dic = {}
    n_snt = 0
    n_wrd = 0
    for ii,txt in  enumerate(open(txt_f,'r',encoding='utf-8')):
        n_snt += 1
        words = txt.strip().split()
        for word in words:
            n_wrd += 1
            dic[word] = 0
        data.append(words)

    if verbose:
        sys.stderr.write(repr(n_snt)+' sents, '+repr(n_wrd)+' words, '+repr(len(dic))+' vocab\n')

    n_voc = 0
    max_voc = 0
    for ii,txt in enumerate(open(voc_f,'r',encoding='utf-8')):
        n_voc += 1
        a,w = txt.split()[:2]
        a = int(a)
        if a > max_voc:
            max_voc = a
        v = dic.get(w)
        if v is None:
            continue
        if v == 0:
            dic[w] = int(a)
            continue
        
        if verbose > 1:
            sys.stderr.write('collision: dic '+repr(v)+' vs. voc '+a+"\n")

    if verbose:
        sys.stderr.write('vsz = '+repr(n_voc)+"\n")
    
    oov = set()
    i_ext = max_voc + 1
    for w in dic:
        if dic[w] == 0:
            dic[w] = i_ext
            i_ext += 1
            oov.add(w)

    for words in data:
        for word in words:
            v = dic.get(word)
            print v,
        print

    if verbose:
        sys.stderr.write('oov = '+repr(n_ext-max_voc)+"\n")

    if ext:
        for w in oov:
            sys.stderr.write(repr(dic[w])+' '+w.encode('utf-8')+' 1\n')

    return 0
def main():
    if len(argv) < 3:
        exit('usage: %s quran_file trans_file' % argv[0])
    charset = 'utf-8'
    
    quran_file = argv[1]
    #prepare surah numbers to be splitted
    surah_numbers = set()
         
    # read quran translation file and split each surah in a list    
    surah = []
    description = []
    surah_trans = {}
    trans_lines = open(quran_file, 'U', charset).read().split('\n')
    current = 1
    for line in trans_lines:
        #line = str(line).strip().replace('\xef\xbb\xbf', '')
        if line=='' or line.startswith('#'): 
            description.append(line)
            continue 
        parts = line.rpartition('|')
        surah.append(parts[2])
    
    #dest = ''.join( [ quran_file, ".trans"])
    dest = argv[2];
    open(dest, 'w', charset).writelines(linesep.join(surah))        
Пример #26
0
 def wrap_encrypt(path, password):
     """Wrap a post with encryption."""
     with codecs.open(path, 'rb+', 'utf8') as inf:
         data = inf.read() + "<!--tail-->"
     data = CRYPT.substitute(data=rc4(password, data))
     with codecs.open(path, 'wb+', 'utf8') as outf:
         outf.write(data)
Пример #27
0
def chg_sys_time(request):
    current_time = time.strftime('%Y-%m-%d %H:%M:%S')
    d = time.strftime('{"Y": "%Y", "M": "%m", "D": "%d", "H": "%H", "I": "%M", "S": "%S", "W": "%w"}')
    if request.POST:
        flag = request.POST.get('flag', None)
        if not flag:
            tname = request.POST['name_who']
            tchoice = request.POST['date_choice']
            tcomment = request.POST['comment']
            if com_date(tchoice, current_time):
                s = subprocess.call("chgtime 'date -d \"{0}\"' >>/mnt/lisp/djchgtiem/templates/data/djchgtiem.log 2>&1".format(tchoice), shell=True)
                if not s.real:
                    logline = u"{0} 在{1} 把时间切到了{2},理由是:{3}".format(tname, current_time, tchoice, tcomment)
                    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
                    with codecs.open(base_dir + '/templates/data/record.log', 'r', 'utf-8') as original:
                        data = original.read()
                    with codecs.open(base_dir + '/templates/data/record.log', 'w', 'utf-8') as fw:
                        fw.write("<p>" + logline + "</p>\n")
                        fw.write(data)
                    return HttpResponse("<p>" + logline + "</p>")
                else:
                    return HttpResponse(u"<p> 日切失败!请联系管理员~ </p>")
            else:
                return HttpResponse(u"<p> 您选择的时间小于当前时间! </p>")
        else:
            return HttpResponse(d)
Пример #28
0
def cross_texts(pText1, pText2, pOutput):

	FILE_TEXT1 = pText1
	FILE_TEXT2 = pText2
	FILE_OUTPUT = pOutput

	file1 = codecs.open(FILE_TEXT1, "r", "utf-8")
	file2 = codecs.open(FILE_TEXT2, "r", "utf-8")
	file3 = codecs.open(FILE_OUTPUT, "wb", "utf-8")

	content_file1 = file1.readlines()
	content_file2 = file2.readlines()

	#exclude the first seven elements cause they do not contribute in anything
	list1 = remove_garbage(7, content_file1)
	list2 = remove_garbage(7, content_file2)

	#Cross each term from text1 with all terms from text2
	for x in list1:
		for y in list2:
			file3.write(x + ": \n".encode("utf-8"))
			file3.write(y + ": \n".encode("utf-8"))
			file3.write(x + "_" + y + ":\n".encode("utf-8"))

	file1.close()
	file2.close()
	file3.close()
Пример #29
0
    def file_changed(self, path):
        response = False
        SAVE = u"名前をつけて保存"
        RELOAD = u"ファイルを再読込"
        CANCEL = u"キャンセル"
        message = QtGui.QMessageBox(self)
        message.setText(u"ファイルは変更されています")
        message.setWindowTitle("Notepad")
        message.setIcon(QtGui.QMessageBox.Warning)
        message.addButton(SAVE, QtGui.QMessageBox.AcceptRole)
        message.addButton(RELOAD, QtGui.QMessageBox.DestructiveRole)
        message.addButton(CANCEL, QtGui.QMessageBox.RejectRole)
        message.setDetailedText(str(path) + u" は、他のアプリケーションで内容を変更されたか削除されました。どうしますか?")
        message.exec_()
        response = message.clickedButton().text()

        if response == SAVE:
            fd = QtGui.QFileDialog(self)
            newfile = fd.getSaveFileName()
            if newfile:
                s = codecs.open(newfile, "w", "utf-8")
                s.write(unicode(self.ui.editor_window.toPlainText()))
                s.close()
                self.ui.button_save.setEnabled(False)
                if self.filename and str(newfile) != str(self.filename):
                    self.watcher.removePath(self.filename)
                    self.watcher.addPath(self.filename)
                    self.filename = newfile
        elif response == RELOAD:
            s = codecs.open(self.filename, "r", "utf-8").read()
            self.ui.editor_window.setPlainText(s)
            self.ui.button_save.setEnabled(False)
Пример #30
0
 def prepare(self):
     try:
         cssFile = codecs.open(self._cssFilename, "r", "utf-8")
     except IOError:
         self._outLogFunc("** Warning: Could not open stylesheet file; the output HTML will be ugly.")
         cssContent = ""
     else:
         cssContent = cssFile.read()
         cssFile.close()
     
     self._outfile = codecs.open(self._filename + ".html", "w", "utf-8")
     self._outfile.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n')
     self._outfile.write('<html xmlns="http://www.w3.org/1999/xhtml">\n')
     self._outfile.write('\t<head>\n')
     self._outfile.write('\t\t<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n')
     self._outfile.write((b'\t\t<title>' + self._displayName.encode("utf-8") + b'\'s plurk Backup</title>\n').decode("utf-8"))
     self._outfile.write('\t\t<style type="text/css">\n')
     self._outfile.write(cssContent)
     self._outfile.write('\t\t</style>\n')
     self._outfile.write('\t</head>\n')
     self._outfile.write('\t<body>\n')
     self._outfile.write((b'\t\t<h1>' + self._displayName.encode("utf-8") + b'\'s plurk Backup</h1>\n').decode("utf-8"))
     self._outfile.write('\t\t<p class="smallnote">\n')
     self._outfile.write('\t\t\tClick on a plurk\'s timestamp to go to its page on plurk.com .\n')
     self._outfile.write('\t\t</p>\n')
Пример #31
0
def process_config(config_path, py3_wrapper=None):
    """
    Parse i3status.conf so we can adapt our code to the i3status config.
    """
    def notify_user(error):
        if py3_wrapper:
            py3_wrapper.notify_user(error)
        else:
            print(error)

    def parse_config(config):
        """
        Parse text or file as a py3status config file.
        """

        if hasattr(config, "readlines"):
            config = "".join(config.readlines())
        parser = ConfigParser(config, py3_wrapper)
        parser.parse()
        parsed = parser.config
        del parser
        return parsed

    def parse_config_error(e, config_path):
        # There was a problem use our special error config
        error = e.one_line(config_path)
        notify_user(error)
        # to display correctly in i3bar we need to do some substitutions
        for char in ['"', "{", "|"]:
            error = error.replace(char, "\\" + char)
        error_config = Template(ERROR_CONFIG).substitute(error=error)
        return parse_config(error_config)

    config = {}

    # get the file encoding this is important with multi-byte unicode chars
    try:
        encoding = check_output(
            ["file", "-b", "--mime-encoding", "--dereference", config_path])
        encoding = encoding.strip().decode("utf-8")
    except CalledProcessError:
        # bsd does not have the --mime-encoding so assume utf-8
        encoding = "utf-8"
    try:
        with codecs.open(config_path, "r", encoding) as f:
            try:
                config_info = parse_config(f)
            except ParseException as e:
                config_info = parse_config_error(e, config_path)
    except LookupError:
        with codecs.open(config_path) as f:
            try:
                config_info = parse_config(f)
            except ParseException as e:
                config_info = parse_config_error(e, config_path)

    # update general section with defaults
    general_defaults = GENERAL_DEFAULTS.copy()
    if "general" in config_info:
        general_defaults.update(config_info["general"])
    config["general"] = general_defaults

    config["py3status"] = config_info.get("py3status", {})
    modules = {}
    on_click = {}
    i3s_modules = []
    py3_modules = []
    module_groups = {}

    def process_onclick(key, value, group_name):
        """
        Check on_click events are valid.  Store if they are good
        """
        button_error = False
        button = ""
        try:
            button = key.split()[1]
            if int(button) not in range(1, 20):
                button_error = True
        except (ValueError, IndexError):
            button_error = True

        if button_error:
            err = "Invalid on_click for `{}`. Number not in range 1-20: `{}`."
            notify_user(err.format(group_name, button))
            return False
        clicks = on_click.setdefault(group_name, {})
        clicks[button] = value
        return True

    def get_module_type(name):
        """
        i3status or py3status?
        """
        if name.split()[0] in I3S_MODULE_NAMES:
            return "i3status"
        return "py3status"

    def process_module(name, module, parent):
        if parent:
            modules[parent]["items"].append(name)
            mg = module_groups.setdefault(name, [])
            mg.append(parent)
            if get_module_type(name) == "py3status":
                module[".group"] = parent

        # check module content
        for k, v in list(module.items()):
            if k.startswith("on_click"):
                # on_click event
                process_onclick(k, v, name)
                # on_click should not be passed to the module via the config.
                del module[k]
            if isinstance(v, ModuleDefinition):
                # we are a container
                module["items"] = []
        return module

    def get_modules(data, parent=None):
        for k, v in data.items():
            if isinstance(v, ModuleDefinition):
                module = process_module(k, v, parent)
                modules[k] = module
                get_modules(v, parent=k)

    get_modules(config_info)

    config["order"] = []

    def remove_any_contained_modules(module):
        """
        takes a module definition and returns a dict without any modules that
        may be defined with it.
        """
        fixed = {}
        for k, v in module.items():
            if not isinstance(v, ModuleDefinition):
                fixed[k] = v
        return fixed

    def append_modules(item):
        module_type = get_module_type(item)
        if module_type == "i3status":
            if item not in i3s_modules:
                i3s_modules.append(item)
        else:
            if item not in py3_modules:
                py3_modules.append(item)

    def add_container_items(module_name):
        module = modules.get(module_name, {})
        items = module.get("items", [])
        for item in items:
            if item in config:
                continue

            append_modules(item)
            module = modules.get(item, {})
            config[item] = remove_any_contained_modules(module)
            # add any children
            add_container_items(item)

    # create config for modules in order
    for name in config_info.get("order", []):
        module_name = name.split(" ")[0]
        if module_name in RETIRED_MODULES:
            notify_user(
                "Module `{}` is no longer available".format(module_name) +
                ". Alternative modules are: {}.".format(", ".join(
                    "`{}`".format(x) for x in RETIRED_MODULES[module_name])))
            continue
        module = modules.get(name, {})
        config["order"].append(name)
        add_container_items(name)
        append_modules(name)

        config[name] = remove_any_contained_modules(module)

    config["on_click"] = on_click
    config["i3s_modules"] = i3s_modules
    config["py3_modules"] = py3_modules
    config[".module_groups"] = module_groups

    # time and tztime modules need a format for correct processing
    for name in config:
        if name.split()[0] in TIME_MODULES and "format" not in config[name]:
            if name.split()[0] == "time":
                config[name]["format"] = TIME_FORMAT
            else:
                config[name]["format"] = TZTIME_FORMAT

    if not config["order"]:
        notify_user("Your configuration file does not list any module"
                    ' to be loaded with the "order" directive.')
    return config
Пример #32
0
    def _parse_in_more_detail_XML(self):
        """Parse unimod xml.

        Returns:
            list: list of dicts with information regarding a unimod
        """
        data_list = []
        for xml_path in self.unimod_xml_names:
            xml_path = Path(xml_path)
            if os.path.exists(xml_path) is False:
                logger.warning(f"{xml_path} does not exist")
                continue

            logger.info("Parsing mod xml file ({0})".format(xml_path))
            unimodXML = ET.iterparse(
                codecs.open(xml_path, "r", encoding="utf8"),
                events=(b"start", b"end"),
            )
            for event, element in unimodXML:
                if event == b"start":
                    if element.tag.endswith("}mod"):
                        tmp = {
                            "Name": element.attrib["title"],
                            "Accession":
                            str(element.attrib.get("record_id", "")),
                            "Description": element.attrib.get("full_name", ""),
                            "elements": {},
                            "specificity": [],
                            "PSI-MS approved": False,
                        }
                        if element.attrib.get("approved", "0") == "1":
                            tmp["PSI-MS approved"] = True
                            tmp["PSI-MS Name"] = element.attrib["title"]
                    elif element.tag.endswith("}delta"):
                        tmp["mono_mass"] = float(element.attrib["mono_mass"])
                    elif element.tag.endswith("}alt_name"):
                        tmp["Alt Description"] = element.text
                    else:
                        pass
                else:
                    # end mod

                    if element.tag.endswith("}delta"):
                        tmp["elements"] = self._extract_elements(element)

                    elif element.tag.endswith("}specificity"):
                        amino_acid = element.attrib["site"]
                        classification = element.attrib["classification"]
                        if classification == "Artefact":
                            continue

                        neutral_loss_elements = {}
                        neutral_loss_mass = 0
                        if len(element) > 0:
                            for sub_element in element.iter():
                                if (sub_element.tag.endswith("}NeutralLoss")
                                        and len(sub_element) > 0):

                                    neutral_loss_elements = self._extract_elements(
                                        sub_element)
                                    neutral_loss_mass = float(
                                        sub_element.attrib["mono_mass"])
                        tmp["specificity"].append(
                            f"{amino_acid}<|>{classification}<|>{neutral_loss_elements}<|>{neutral_loss_mass}"
                        )

                    elif element.tag.endswith("}mod"):
                        data_list.append(tmp)
                    else:
                        pass
        return data_list
Пример #33
0
table_list = ['nodes', 'nodes_tags', 'ways', 'ways_tags', 'ways_nodes']

con = sqlite3.connect("safety_harbor.db")
cur = con.cursor()

#drop tables if they exists so we do not insert repeat data
for tablename in table_list:
    stmt = "DROP TABLE IF EXISTS " + tablename
    cur.execute(stmt)
    con.commit()

# create nodes table
cur.execute("CREATE TABLE IF NOT EXISTS nodes (id, lat, lon, user, uid, version, changeset, timestamp);")

# load table
with codecs.open('nodes.csv', encoding='utf-8-sig') as fin:
    dr = csv.DictReader(fin)
    pprint.pprint(dr.fieldnames)
    to_db = [(i['id'], i['lat'], i['lon'], i['user'], i['uid'], i['version'], i['changeset'], i['timestamp']) for i in dr]

cur.executemany("INSERT INTO nodes (id, lat, lon, user, uid, version, changeset, timestamp) \
                VALUES (?, ?, ?, ?, ?, ?, ?, ?);", to_db)


# create nodes_tags table
cur.execute("CREATE TABLE IF NOT EXISTS nodes_tags (id, key, value, type);")

# load table
with codecs.open('nodes_tags.csv', encoding='utf-8-sig') as fin:
    dr = csv.DictReader(fin)
    pprint.pprint(dr.fieldnames)
Пример #34
0
    def writeaveragemaxminarray(file, average, maxsetspec, minsetspec):
        output = codecs.open(file, 'w')
        linenumber = 0
        havekey = 0

        if len(average) == 1:
            averagemax = (np.array(average[0])) * (1 + maxsetspec)
            averagemin = (np.array(average[0])) * (1 - minsetspec)
            havekey = 0

        elif len(average) == 2:
            havekey = 1
            averagemax = (np.array(average[0])) * (1 + maxsetspec)
            averagemin = (np.array(average[0])) * (1 - minsetspec)
            keymax = (np.array(average[1])) * (1 + maxsetspec)
            keymin = (np.array(average[1])) * (1 - minsetspec)
            '''print(averagemax)
            print(averagemin)
            print(keymax)
            print(keymin)'''

        for line in averagemax:
            output.write('CM_DELTA_MAX_ROW' + str("%02d" % linenumber) + ' ' +
                         '=' + '  ')
            inumber = 0
            for avdata in line:
                if avdata == 0:
                    avdata = 5
                else:
                    pass

                if inumber == (len(line) - 1):
                    output.write(str(int(avdata)) + '\n')
                else:
                    output.write(str(int(avdata)) + ',' + '   ')
                inumber += 1
            linenumber += 1

        if havekey == 1:
            output.write('CM_DELTA_MAX_KEY' + '   ' + '=' + '  ')
            inumber = 0
            for i in keymax:
                if i == 0:
                    i = 5
                else:
                    pass
                if inumber == (len(keymax) - 1):
                    output.write(str(int(i)) + '\n')
                else:
                    output.write(str(int(i)) + ',' + '   ')
                inumber += 1

        output.write('\n' + '\n' + '; cm delta min' + '\n')
        linenumber = 0

        for line in averagemin:
            output.write('CM_DELTA_MIN_ROW' + str("%02d" % linenumber) + ' ' +
                         '=' + '  ')
            inumber = 0
            for i in line:
                if inumber == (len(line) - 1):
                    output.write(str(int(i)) + '\n')
                else:
                    output.write(str(int(i)) + ',' + '   ')
                inumber += 1
            linenumber += 1

        if havekey == 1:
            inumber = 0
            output.write('CM_DELTA_MIN_KEY' + '   ' + '=' + '  ')
            for i in keymin:
                if i == 0:
                    i = 5
                else:
                    pass
                if inumber == (len(keymin) - 1):
                    output.write(str(int(i)) + '\n')
                else:
                    output.write(str(int(i)) + ',' + '   ')
                inumber += 1
        output.close()
Пример #35
0
    def filedirectorycatchdata(
            filedirectory):  #获取log csv文件数据 输出格式为[[all屏体数据][all按键数据]]数组
        global L, usefuldatafile
        listfile = os.listdir(filedirectory)
        L = [
            filename for filename in listfile
            if filename[-4:] == '.csv' and not filename.find('summary') != -1
        ]
        print('   ' + '-' * 19 + '导入文件' + '-' * 20)

        alldata = []
        allsampledata = []
        allsamplekeydata = []
        allsamplecptestdata = []
        allsamplecpshortdata = []
        nodatafile = []
        usefuldatafile = []

        for fileadr in L:
            try:  #解决文件存在非法编码导致无法linecache问题
                linecache.updatecache(filedirectory + fileadr)
                linefile = linecache.getlines(filedirectory + fileadr)
            except Exception:
                print(str(fileadr) + '该文件数据存在非法字符')
                newfile = codecs.open(filedirectory + fileadr, 'r', 'gbk',
                                      'ignore')
                text = newfile.read()
                newfile.close()
                with codecs.open(filedirectory + fileadr, 'w') as newfile2:
                    newfile2.write(text)
                linecache.updatecache(filedirectory + fileadr)
                linefile = linecache.getlines(filedirectory + fileadr)
            '''print(filedirectory+fileadr)'''
            linenumber = 0
            starline = 0
            endline = 0
            sampledata = []
            keyarray = []

            cpteststartline = 0
            cptestendline = 0
            cpshortstartline = 0
            cpshortendline = 0
            sampledata = []
            keyarray = []
            samplecpselfdata = []
            samplecpshortdata = []

            for line in linefile:
                linenumber += 1
                if line.find('CMDelta Test Start') != -1:
                    starline = linenumber

                if line.find('CMDelta Test End') != -1:
                    endline = linenumber

                if line.find('Self Cp Test Start') != -1:  #加入Self CP test
                    cpteststartline = linenumber

                if line.find('Self Cp Test End') != -1:
                    cptestendline = linenumber

                if line.find('CP_SHORT Test Start') != -1:  #加入CP Short test
                    cpshortstartline = linenumber
                    #print(cpshortstartline)

                if line.find('CP_SHORT Test End') != -1:
                    cpshortendline = linenumber
            datanumber = 0

            if starline != 0 and endline != 0:
                dataline = linefile[starline:endline]

                for data in dataline:
                    if data.find('[Row00]') != -1:
                        datastar = datanumber
                    if data.find('CM Delta Key') != -1:
                        dataend = datanumber
                    datanumber += 1
                keydata = dataline[dataend:endline]
                del keydata[0]
                del keydata[-1]
                keyarray = []

                for k in keydata:
                    if k == '\n':
                        pass
                    else:
                        keyread = k.split(',')
                        keyrealdata = keyread[:-1]
                        for i in keyrealdata:
                            if i == '' or i == '\n':
                                pass
                            else:
                                newkey = (((((i.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                keyarray.append(int(newkey))

                data = dataline[datastar:dataend - 1]
                for datare in data:
                    if datare == '\n':
                        pass
                    else:
                        dataread = datare.split(',')
                        d = dataread[1:]
                        slist = []
                        for s in d:
                            if s == '' or s == '\n':
                                pass
                            else:
                                news = (((((s.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                slist.append(int(news))
                        if len(slist) != 0:
                            sampledata.append(slist)
                usefuldatafile.append(str(fileadr))
            else:
                nodatafile.append(str(fileadr))

            if (len(sampledata) != 0):
                allsampledata.append(sampledata)
            if (len(keyarray) != 0):
                allsamplekeydata.append(keyarray)

            if cpteststartline != 0 and cptestendline != 0:  #提取 Self CP 测试数据
                #print('try to catch self cp data')
                selfcpdatanumber = 0
                selfcpdataline = linefile[cpteststartline:cptestendline]

                for selfcpdata in selfcpdataline:
                    if selfcpdata.find('Row00') != -1:
                        selfdatastart = selfcpdatanumber
                    if selfcpdata.find(' Self Cp Test End') != -1:
                        selfdataend = selfcpdatanumber
                    selfcpdatanumber += 1

                selfcpdatafile = selfcpdataline[selfdatastart:selfdataend]

                for datare in selfcpdatafile:
                    if datare == '\n':
                        pass
                    else:
                        dataread = datare.split(',')
                        d = dataread[1:]
                        slist2 = []
                        for s in d:
                            if s == '' or s == '\n':
                                pass
                            else:
                                news = (((((s.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                slist2.append(int(news))
                        if len(slist2) != 0:
                            samplecpselfdata.append(slist2)
            if (len(samplecpselfdata) != 0):
                #print(samplecpselfdata)
                allsamplecptestdata.append(samplecpselfdata)

            if cpshortstartline != 0 and cpshortendline != 0:  #提取 CP SHORT 测试数据
                #print('try to catch SHORT data')
                selfshortnumber = 0
                cpshortline = linefile[cpshortstartline:cpshortendline]
                #print(cpshortline)

                for cpshortdata in cpshortline:
                    if cpshortdata.find('Row00') != -1:
                        cpshortstart = selfshortnumber
                    if cpshortdata.find(' CP_SHORT Test End') != -1:
                        cpshortend = selfshortnumber
                    selfshortnumber += 1

                cpshortfile = cpshortline[cpshortstart:cpshortend]

                #print(cpshortfile)
                for datare in cpshortfile:
                    if datare == '\n':
                        pass
                    else:
                        dataread = datare.split(',')
                        d = dataread[1:]
                        slist3 = []
                        for s in d:
                            if s == '' or s == '\n':
                                pass
                            else:
                                news = (((((s.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                slist3.append(int(news))
                        if len(slist3) != 0:
                            samplecpshortdata.append(slist3)
            if (len(samplecpshortdata) != 0):
                #print(samplecpshortdata)
                allsamplecpshortdata.append(samplecpshortdata)

        print('*' * 19 + '数据不存在样品' + '*' * 19)
        print(nodatafile)
        print('\n')
        '''print('-'*19+'有效文件'+'-'*19)
        print(usefuldatafile)'''
        alldata.append(allsampledata)
        if (len(allsamplekeydata) != 0):
            alldata.append(allsamplekeydata)
        return alldata
Пример #36
0
def Maketestdataspec(inputfiledirectory, vatargetpercent, keytargetpercent,
                     outputaveragefile, alldataoutputfile, outputmaxsetspec,
                     outputminsetspec):
    def filedirectorycatchdata(
            filedirectory):  #获取log csv文件数据 输出格式为[[all屏体数据][all按键数据]]数组
        global L, usefuldatafile
        listfile = os.listdir(filedirectory)
        L = [
            filename for filename in listfile
            if filename[-4:] == '.csv' and not filename.find('summary') != -1
        ]
        print('   ' + '-' * 19 + '导入文件' + '-' * 20)

        alldata = []
        allsampledata = []
        allsamplekeydata = []
        allsamplecptestdata = []
        allsamplecpshortdata = []
        nodatafile = []
        usefuldatafile = []

        for fileadr in L:
            try:  #解决文件存在非法编码导致无法linecache问题
                linecache.updatecache(filedirectory + fileadr)
                linefile = linecache.getlines(filedirectory + fileadr)
            except Exception:
                print(str(fileadr) + '该文件数据存在非法字符')
                newfile = codecs.open(filedirectory + fileadr, 'r', 'gbk',
                                      'ignore')
                text = newfile.read()
                newfile.close()
                with codecs.open(filedirectory + fileadr, 'w') as newfile2:
                    newfile2.write(text)
                linecache.updatecache(filedirectory + fileadr)
                linefile = linecache.getlines(filedirectory + fileadr)
            '''print(filedirectory+fileadr)'''
            linenumber = 0
            starline = 0
            endline = 0
            sampledata = []
            keyarray = []

            cpteststartline = 0
            cptestendline = 0
            cpshortstartline = 0
            cpshortendline = 0
            sampledata = []
            keyarray = []
            samplecpselfdata = []
            samplecpshortdata = []

            for line in linefile:
                linenumber += 1
                if line.find('CMDelta Test Start') != -1:
                    starline = linenumber

                if line.find('CMDelta Test End') != -1:
                    endline = linenumber

                if line.find('Self Cp Test Start') != -1:  #加入Self CP test
                    cpteststartline = linenumber

                if line.find('Self Cp Test End') != -1:
                    cptestendline = linenumber

                if line.find('CP_SHORT Test Start') != -1:  #加入CP Short test
                    cpshortstartline = linenumber
                    #print(cpshortstartline)

                if line.find('CP_SHORT Test End') != -1:
                    cpshortendline = linenumber
            datanumber = 0

            if starline != 0 and endline != 0:
                dataline = linefile[starline:endline]

                for data in dataline:
                    if data.find('[Row00]') != -1:
                        datastar = datanumber
                    if data.find('CM Delta Key') != -1:
                        dataend = datanumber
                    datanumber += 1
                keydata = dataline[dataend:endline]
                del keydata[0]
                del keydata[-1]
                keyarray = []

                for k in keydata:
                    if k == '\n':
                        pass
                    else:
                        keyread = k.split(',')
                        keyrealdata = keyread[:-1]
                        for i in keyrealdata:
                            if i == '' or i == '\n':
                                pass
                            else:
                                newkey = (((((i.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                keyarray.append(int(newkey))

                data = dataline[datastar:dataend - 1]
                for datare in data:
                    if datare == '\n':
                        pass
                    else:
                        dataread = datare.split(',')
                        d = dataread[1:]
                        slist = []
                        for s in d:
                            if s == '' or s == '\n':
                                pass
                            else:
                                news = (((((s.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                slist.append(int(news))
                        if len(slist) != 0:
                            sampledata.append(slist)
                usefuldatafile.append(str(fileadr))
            else:
                nodatafile.append(str(fileadr))

            if (len(sampledata) != 0):
                allsampledata.append(sampledata)
            if (len(keyarray) != 0):
                allsamplekeydata.append(keyarray)

            if cpteststartline != 0 and cptestendline != 0:  #提取 Self CP 测试数据
                #print('try to catch self cp data')
                selfcpdatanumber = 0
                selfcpdataline = linefile[cpteststartline:cptestendline]

                for selfcpdata in selfcpdataline:
                    if selfcpdata.find('Row00') != -1:
                        selfdatastart = selfcpdatanumber
                    if selfcpdata.find(' Self Cp Test End') != -1:
                        selfdataend = selfcpdatanumber
                    selfcpdatanumber += 1

                selfcpdatafile = selfcpdataline[selfdatastart:selfdataend]

                for datare in selfcpdatafile:
                    if datare == '\n':
                        pass
                    else:
                        dataread = datare.split(',')
                        d = dataread[1:]
                        slist2 = []
                        for s in d:
                            if s == '' or s == '\n':
                                pass
                            else:
                                news = (((((s.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                slist2.append(int(news))
                        if len(slist2) != 0:
                            samplecpselfdata.append(slist2)
            if (len(samplecpselfdata) != 0):
                #print(samplecpselfdata)
                allsamplecptestdata.append(samplecpselfdata)

            if cpshortstartline != 0 and cpshortendline != 0:  #提取 CP SHORT 测试数据
                #print('try to catch SHORT data')
                selfshortnumber = 0
                cpshortline = linefile[cpshortstartline:cpshortendline]
                #print(cpshortline)

                for cpshortdata in cpshortline:
                    if cpshortdata.find('Row00') != -1:
                        cpshortstart = selfshortnumber
                    if cpshortdata.find(' CP_SHORT Test End') != -1:
                        cpshortend = selfshortnumber
                    selfshortnumber += 1

                cpshortfile = cpshortline[cpshortstart:cpshortend]

                #print(cpshortfile)
                for datare in cpshortfile:
                    if datare == '\n':
                        pass
                    else:
                        dataread = datare.split(',')
                        d = dataread[1:]
                        slist3 = []
                        for s in d:
                            if s == '' or s == '\n':
                                pass
                            else:
                                news = (((((s.replace('[', '')).replace(
                                    ']', '')).replace('{', '')).replace(
                                        '}',
                                        '')).replace('\n',
                                                     '')).replace('\t', '')
                                slist3.append(int(news))
                        if len(slist3) != 0:
                            samplecpshortdata.append(slist3)
            if (len(samplecpshortdata) != 0):
                #print(samplecpshortdata)
                allsamplecpshortdata.append(samplecpshortdata)

        print('*' * 19 + '数据不存在样品' + '*' * 19)
        print(nodatafile)
        print('\n')
        '''print('-'*19+'有效文件'+'-'*19)
        print(usefuldatafile)'''
        alldata.append(allsampledata)
        if (len(allsamplekeydata) != 0):
            alldata.append(allsamplekeydata)
        return alldata

    def makespec(testsampledata, targetpercent):
        def makeaverage(sampledata2):
            b = (np.array(sampledata2[0])) * 0
            for i in sampledata2:
                j = np.array(i)
                b = b + j
            average = b // (len(sampledata2))
            return average

        havengsample = 1
        ngfileadr = []

        while havengsample == 1:
            print('-' * 19 + '判断良品中' + '-' * 19)
            print('数目:', len(testsampledata))
            print('\n')
            sampleaverage = makeaverage(testsampledata)
            percentarray = []
            diffvaluearray = []

            for data in testsampledata:
                specvalue = abs(((np.array(data)) / sampleaverage) - 1)
                percentarray.append(specvalue)

                diffvalue = abs((np.array(data) - sampleaverage))
                diffvaluearray.append(diffvalue)

            testsamplenumber = 0
            samplenumber = 0
            ngsamplenumber = []
            havengsample = 0
            percentarray = np.nan_to_num(percentarray)
            diffvaluearray = np.nan_to_num(diffvaluearray)

            for samplepercent in percentarray:
                maxpercent = np.max(samplepercent)
                if maxpercent >= targetpercent:

                    singellinepercent = samplepercent.flatten(
                    )  #样品数据从二维变为一维方便比较
                    singellinediff = (
                        diffvaluearray[testsamplenumber]
                    ).flatten()  #样品测试数值与average值的差值从二维变为一维方便比较
                    b = np.arange(len(singellinepercent))
                    c = b[
                        singellinepercent >=
                        targetpercent]  # c array 存放的是单个样品中大于targetpercent位置的索引

                    for i in range(len(c)):
                        if singellinediff[c[i]] > 5:
                            havengsample = 1
                            ngsamplenumber.append(testsamplenumber)
                            del testsampledata[samplenumber]
                            samplenumber -= 1
                            break

                testsamplenumber += 1
                samplenumber += 1

            if havengsample == 1:
                for ng in ngsamplenumber:
                    ngfileadr.append(L[ng])
        print('*' * 19 + 'VA区不良样品' + '*' * 19)
        print(ngfileadr)
        print('VA区不良样品总数:', len(ngfileadr))
        print('\n')
        '''print(sampleaverage)'''
        return sampleaverage

    def makekeyspec(samplekeydata, targetpercent):
        def makekeyaverage(data):
            b = np.array(data[0]) * 0
            for i in data:
                j = np.array(i)
                b = b + j
            average = b // len(data)
            return average

        havengsample = 1
        ngfileadr = []

        while havengsample == 1:
            print('-' * 19 + '判断按键良品中' + '-' * 19)
            print('数目:', len(samplekeydata))
            samplekeyaverage = makekeyaverage(samplekeydata)
            percentarray = []
            diffvaluearray = []

            for data in samplekeydata:
                specvalue = abs((((np.array(data)) / samplekeyaverage) - 1))
                percentarray.append(specvalue)

                diffvalue = abs((np.array(data)) - samplekeyaverage)
                diffvaluearray.append(diffvalue)

            testsamplenumber = 0
            samplenumber = 0
            ngsamplenumber = []
            havengsample = 0

            percentarray = np.nan_to_num(percentarray)
            diffvaluearray = np.nan_to_num(diffvaluearray)

            for samplepercent in percentarray:
                maxpercent = np.max(samplepercent)

                if maxpercent >= targetpercent:
                    maxlocation = np.where(
                        samplepercent == np.max(samplepercent))

                    maxdatanumbers = len(maxlocation)
                    diffarray = []

                    while (maxdatanumbers >= 1):
                        x = 0
                        row = maxlocation[x]
                        diff = diffvaluearray[testsamplenumber][row]
                        diffarray.append(diff)
                        maxdatanumbers -= 1
                        x += 1

                    maxdiff = np.max(diffarray)
                    if (maxdiff <= 5):
                        samplenumber += 1
                        break
                    else:
                        havengsample = 1
                        ngsamplenumber.append(testsamplenumber)
                        del samplekeydata[samplenumber]

                    testsamplenumber += 1

                else:
                    samplenumber += 1
                    testsamplenumber += 1

            if havengsample == 1:
                for ng in ngsamplenumber:
                    ngfileadr.append(L[ng])
        print('*' * 19 + '按键不良样品' + '*' * 19)
        print(ngfileadr)
        print('\n')
        return samplekeyaverage

    def writeaveragearray(file, average):
        output = codecs.open(file, 'w')
        linenumber = 0
        for line in average:
            output.write('CM_DELTA_ROW' + str("%02d" % linenumber) + ' ' +
                         '=' + '  ')
            inumber = 0
            for i in line:
                if inumber == (len(line) - 1):
                    output.write(str(i) + '\n')
                else:
                    output.write(str(i) + ',' + '   ')
                inumber += 1
            linenumber += 1
        output.close()

    def writeaveragemaxminarray(file, average, maxsetspec, minsetspec):
        output = codecs.open(file, 'w')
        linenumber = 0
        havekey = 0

        if len(average) == 1:
            averagemax = (np.array(average[0])) * (1 + maxsetspec)
            averagemin = (np.array(average[0])) * (1 - minsetspec)
            havekey = 0

        elif len(average) == 2:
            havekey = 1
            averagemax = (np.array(average[0])) * (1 + maxsetspec)
            averagemin = (np.array(average[0])) * (1 - minsetspec)
            keymax = (np.array(average[1])) * (1 + maxsetspec)
            keymin = (np.array(average[1])) * (1 - minsetspec)
            '''print(averagemax)
            print(averagemin)
            print(keymax)
            print(keymin)'''

        for line in averagemax:
            output.write('CM_DELTA_MAX_ROW' + str("%02d" % linenumber) + ' ' +
                         '=' + '  ')
            inumber = 0
            for avdata in line:
                if avdata == 0:
                    avdata = 5
                else:
                    pass

                if inumber == (len(line) - 1):
                    output.write(str(int(avdata)) + '\n')
                else:
                    output.write(str(int(avdata)) + ',' + '   ')
                inumber += 1
            linenumber += 1

        if havekey == 1:
            output.write('CM_DELTA_MAX_KEY' + '   ' + '=' + '  ')
            inumber = 0
            for i in keymax:
                if i == 0:
                    i = 5
                else:
                    pass
                if inumber == (len(keymax) - 1):
                    output.write(str(int(i)) + '\n')
                else:
                    output.write(str(int(i)) + ',' + '   ')
                inumber += 1

        output.write('\n' + '\n' + '; cm delta min' + '\n')
        linenumber = 0

        for line in averagemin:
            output.write('CM_DELTA_MIN_ROW' + str("%02d" % linenumber) + ' ' +
                         '=' + '  ')
            inumber = 0
            for i in line:
                if inumber == (len(line) - 1):
                    output.write(str(int(i)) + '\n')
                else:
                    output.write(str(int(i)) + ',' + '   ')
                inumber += 1
            linenumber += 1

        if havekey == 1:
            inumber = 0
            output.write('CM_DELTA_MIN_KEY' + '   ' + '=' + '  ')
            for i in keymin:
                if i == 0:
                    i = 5
                else:
                    pass
                if inumber == (len(keymin) - 1):
                    output.write(str(int(i)) + '\n')
                else:
                    output.write(str(int(i)) + ',' + '   ')
                inumber += 1
        output.close()

    averagedata = []
    sampleplotdata = []

    #def makecpselfspec(samplecpdata,maxspec,minspec):

    if len(filedirectorycatchdata(
            inputfiledirectory)) == 1 and len(usefuldatafile) != 0:
        dataoutput = codecs.open(alldataoutputfile + 'alldata.csv', 'w+')
        d = filedirectorycatchdata(inputfiledirectory)
        for i in range(len(d[0])):
            dataoutput.write(str(usefuldatafile[i]) + ',')
            dataoutput.write(
                ((str((np.array(d[0][i]).flatten()).tolist())).replace(
                    '[', '')).replace(']', ''))
            dataoutput.write('\n')
            sampleplotdata.append((np.array(d[0][i]).flatten()).tolist())
        dataoutput.close()
        averagedata.append(makespec(d[0], vatargetpercent))

    elif len(filedirectorycatchdata(
            inputfiledirectory)) == 2 and len(usefuldatafile) != 0:
        dataoutput = codecs.open(alldataoutputfile + 'alldata.csv', 'w+')
        d = filedirectorycatchdata(inputfiledirectory)
        for i in range(len(d[0])):
            dataoutput.write(str(usefuldatafile[i]) + ',')
            dataoutput.write(
                ((str(((np.array(d[0][i]).flatten()).tolist()) +
                      ((np.array(d[1][i]).flatten()).tolist()))).replace(
                          '[', '')).replace(']', ''))
            dataoutput.write('\n')
            sampleplotdata.append(((np.array(d[0][i]).flatten()).tolist()) +
                                  ((np.array(d[1][i]).flatten()).tolist()))
        dataoutput.close()
        averagedata.append(makespec(d[0], vatargetpercent))
        averagedata.append(makekeyspec(d[1], keytargetpercent))

    writeaveragemaxminarray(outputaveragefile, averagedata, outputmaxsetspec,
                            outputminsetspec)
    print('<<<<<<<<<样品数据已保存在Tarnsitdata文件夹>>>>>>>>>')
    return sampleplotdata
Пример #37
0
    return res


def get_comments(post_id, offset=0):
    r = requests.get(
        COMMENTS_ENDPOINT.format(owner_id=OWNER_ID,
                                 post_id=post_id,
                                 offset=offset))
    try:
        res = r.json()['response'][1:]
    except KeyError:
        res = []
    return res


f = codecs.open('out.txt', 'w', encoding='utf-8')

count = 0
while count < TOTAL_POSTS:
    posts = get_posts(offset=count)
    for p in posts:
        count += 1
        dt = datetime.fromtimestamp(p['date'])
        post_id = p['id']
        print(count, file=f)
        print(dt.strftime('%Y-%m-%d'), file=f)
        print(p['text'], file=f)
        print(POST_URL_TEPMPLATE.format(owner_id=OWNER_ID, post_id=post_id),
              file=f)
        comments = get_comments(post_id)
        for c in comments:
Пример #38
0
            case = 3
    else : 
        print("Invalid")
        inputpath=""
while (outputpath==""):
    outputpath = input("Output Path: ")
    outputpath = outputpath.replace('"','')
    outputpath = Path(outputpath)
    if not outputpath.exists():
        outputpath.mkdir()

#Input file Generator 
words = []
uniquewords = []
if case == 1 :
    with codecs.open(inputpath, "r",encoding='utf-8-sig') as labfile:
        contents = labfile.read()
        contents = contents.split(" ")
        for i in range(len(contents)):
            if contents[i] not in punctuation:
                words.append(contents[i])
elif case == 2 :
    with codecs.open(inputpath, "r",encoding='utf-8-sig') as txtfile:
        for contents in txtfile:
            contents = contents.rstrip("\n")
            words.append(contents) 
elif case == 3 :
    for entry in inputpath.iterdir():
        if entry.suffix == '.lab':
            with codecs.open(entry, "r",encoding='utf-8-sig') as labfile:
                contents = labfile.read()
Пример #39
0
from codecs import open
from setuptools import setup

try:
    from azure_bdist_wheel import cmdclass
except ImportError:
    from distutils import log as logger

    logger.warn("Wheel is not available, disabling bdist_wheel hook")
    cmdclass = {}

VERSION = "2.0.46"
# If we have source, validate that our version numbers match
# This should prevent uploading releases with mismatched versions.
try:
    with open('azure/cli/core/__init__.py', 'r', encoding='utf-8') as f:
        content = f.read()
except OSError:
    pass
else:
    import re
    import sys

    m = re.search(r'__version__\s*=\s*[\'"](.+?)[\'"]', content)
    if not m:
        print('Could not find __version__ in azure/cli/core/__init__.py')
        sys.exit(1)
    if m.group(1) != VERSION:
        print('Expected __version__ = "{}"; found "{}"'.format(VERSION, m.group(1)))
        sys.exit(1)
Пример #40
0
import codecs
from setuptools import setup

with codecs.open('README.rst', encoding='utf-8') as f:
    long_description = f.read()

setup(
    name="shadowsocks",
    version="2.6.9",
    license='http://www.apache.org/licenses/LICENSE-2.0',
    description="A fast tunnel proxy that help you get through firewalls",
    author='clowwindy',
    author_email='*****@*****.**',
    url='https://github.com/shadowsocks/shadowsocks',
    packages=['shadowsocks', 'shadowsocks.crypto'],
    package_data={'shadowsocks': ['README.rst', 'LICENSE']},
    install_requires=[],
    entry_points="""
    [console_scripts]
    sslocal = shadowsocks.local:main
    ssserver = shadowsocks.server:main
    """,
    classifiers=[
        'License :: OSI Approved :: Apache Software License',
        'Programming Language :: Python :: 2',
        'Programming Language :: Python :: 2.6',
        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.3',
        'Programming Language :: Python :: 3.4',
        'Programming Language :: Python :: Implementation :: CPython',
Пример #41
0
# (C) Datadog, Inc. 2019-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
from codecs import open  # To use a consistent encoding
from os import path

from setuptools import setup

HERE = path.dirname(path.abspath(__file__))

# Get version info
ABOUT = {}
with open(path.join(HERE, 'datadog_checks', 'clickhouse', '__about__.py')) as f:
    exec(f.read(), ABOUT)

# Get the long description from the README file
with open(path.join(HERE, 'README.md'), encoding='utf-8') as f:
    long_description = f.read()


def get_dependencies():
    dep_file = path.join(HERE, 'requirements.in')
    if not path.isfile(dep_file):
        return []

    with open(dep_file, encoding='utf-8') as f:
        return f.readlines()


CHECKS_BASE_REQ = 'datadog-checks-base>=15.0.0'
Пример #42
0
    def getResourceFor(self, request):
        uri = request.uri
        uri = uri.split("?", 1)[0]
        uri = uri.split("#", 1)[0]

        if uri == '/':
            # This serves the message, but also throws an exception; can't understand why...
            result = static.Data(
                '<html>In production you would now be on https://clipperz.is/</html>',
                'text/html')
        elif uri.startswith('/json') or uri.startswith('/dump'):
            resource.prepath = ['app']
            result = resource.getChildForRequest(self.resource, request)
        elif uri.startswith('/payment'):
            resource.prepath = ['payment']
            result = resource.getChildForRequest(self.resource, request)
        elif uri == '/favicon.ico':
            return
        else:
            pathParts = uri.split('/')
            version = pathParts[1]

            if pathParts[2].startswith('index.'):
                print("-> index")
                contentType = 'text/html'
                absoluteFilePath = os.path.join(projectTargetDir(), 'dev',
                                                version, pathParts[2])
                #				print("INDEX.HTML absolute path " + str(absoluteFilePath))
                result = static.File(absoluteFilePath, contentType)
            elif pathParts[2].endswith('.webapp'):
                print("-> webapp")
                contentType = 'application/x-web-app-manifest+json'
                absoluteFilePath = os.path.join(projectBaseDir(), 'frontend',
                                                version, 'properties',
                                                pathParts[2])
                result = static.File(absoluteFilePath, contentType)
#			elif pathParts[2].endswith('.appcache'):
            elif pathParts[2].endswith('.appcache_disabled'):
                print("-> appcache")
                contentType = 'text/cache-manifest'
                absoluteFilePath = os.path.join(projectBaseDir(), 'frontend',
                                                version, 'properties',
                                                pathParts[2])
                fileContent = codecs.open(absoluteFilePath, 'r',
                                          'utf-8').read()
                #				fileContent = fileContent.replace('@application.version@', str(uuid.uuid1()))
                fileContent = fileContent.replace('@application.version@',
                                                  str(round(time.time())))
                result = static.Data(str(fileContent), contentType)
            else:
                #	http://homer.local:8888/beta/css/clipperz/images/loginInfoBackground.png
                #	pathParts: ['', 'beta', 'css', 'clipperz', 'images', 'loginInfoBackground.png']
                try:
                    imagePathIndex = pathParts.index('images')
                    resourceType = 'images'
                    for _ in range(2, imagePathIndex):
                        del pathParts[2]
                except:
                    resourceType = pathParts[2]

                basePath = projectBaseDir() + '/frontend'
                if resourceType == 'images':
                    fileExtension = os.path.splitext(uri)[1]
                    if fileExtension == '.png':
                        #						print("-> image - png")
                        contentType = 'image/png'
                    elif fileExtension == '.jpg':
                        #						print("-> image - jpg")
                        contentType = 'image/jpeg'
                    elif fileExtension == '.gif':
                        #						print("-> image - gif")
                        contentType = 'image/gif'
                    else:
                        print "ERROR - unknown image extension: " + fileExtension

                    absoluteFilePath = basePath + '/'.join(pathParts)
                else:
                    resourceType = pathParts[2]

                    if resourceType == 'css':
                        #						print("-> css")
                        contentType = 'text/css'
                    elif resourceType == 'js':
                        #						print("-> js")
                        contentType = 'text/javascript'
                    else:
                        #						print("-> text/html")
                        contentType = 'text/html'

                    absoluteFilePath = basePath + uri

                result = static.File(absoluteFilePath, contentType)


#		print("RESULT\n" + str(result))
        return result
Пример #43
0
# (C) Datadog, Inc. 2018
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

from codecs import open  # To use a consistent encoding
from os import path

from setuptools import setup

HERE = path.abspath(path.dirname(__file__))

# Get version info
ABOUT = {}
with open(path.join(HERE, "datadog_checks", "kube_proxy", "__about__.py")) as f:
    exec(f.read(), ABOUT)

# Get the long description from the README file
with open(path.join(HERE, 'README.md'), encoding='utf-8') as f:
    long_description = f.read()


CHECKS_BASE_REQ = 'datadog_checks_base'

setup(
    name='datadog-kube-proxy',
    version=ABOUT["__version__"],
    description='The kube_proxy Check',
    long_description=long_description,
    long_description_content_type='text/markdown',
    keywords='datadog agent kube_proxy check',
    # The project's main homepage.
Пример #44
0
import codecs

from setuptools import find_packages, setup

VERSION_FILE = "pytest_never_sleep/_version.py"

with codecs.open("README.md", "r", "utf-8") as fh:
    long_description = fh.read()

setup(
    name="pytest-never-sleep",
    # use_scm_version={
    #     "write_to": VERSION_FILE,
    #     "local_scheme": "dirty-tag",
    # },
    # setup_requires=["setuptools_scm==5.0.2"],
    author="Denis Korytkin",
    author_email="*****@*****.**",
    description=
    "pytest plugin helps to avoid adding tests without mock `time.sleep`",
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://github.com/DKorytkin/pytest-never-sleep",
    keywords=["py.test", "pytest", "without sleep", "mock time.sleep"],
    py_modules=[
        "pytest_never_sleep.plugin",
        "pytest_never_sleep.hooks",
        "pytest_never_sleep.never_sleep",
    ],
    packages=find_packages(exclude=["tests*"]),
    install_requires=["pytest>=3.5.1"],
def tagging():
    # 加载配置文件
    with open('./config.yml') as file_config:
        config = yaml.load(file_config)

    feature_names = config['model_params']['feature_names']  # 读取特征名
    use_char_feature = config['model_params']['use_char_feature']

    # 初始化embedding shape, dropouts, 预训练的embedding也在这里初始化)
    feature_weight_shape_dict, feature_weight_dropout_dict, \
        feature_init_weight_dict = dict(), dict(), dict()
    for feature_name in feature_names:
        feature_weight_shape_dict[feature_name] = \
            config['model_params']['embed_params'][feature_name]['shape']
        feature_weight_dropout_dict[feature_name] = \
            config['model_params']['embed_params'][feature_name]['dropout_rate']
        path_pre_train = config['model_params']['embed_params'][feature_name]['path']
        if path_pre_train:  # 如果特证包含与训练embedding
            with open(path_pre_train, 'rb') as file_r:
                feature_init_weight_dict[feature_name] = pickle.load(file_r)

    # char embedding shape
    if use_char_feature:
        feature_weight_shape_dict['char'] = \
            config['model_params']['embed_params']['char']['shape']
        conv_filter_len_list = config['model_params']['conv_filter_len_list']
        conv_filter_size_list = config['model_params']['conv_filter_size_list']
    else:
        conv_filter_len_list = None
        conv_filter_size_list = None

    # 加载vocs
    print "加载字典......"
    path_vocs = []
    if use_char_feature:
        path_vocs.append(config['data_params']['voc_params']['char']['path'])
    for feature_name in feature_names:
        path_vocs.append(config['data_params']['voc_params'][feature_name]['path'])
    path_vocs.append(config['data_params']['voc_params']['label']['path'])
    vocs = load_vocs(path_vocs)


    # 加载数据
    print "加载测试集......"
    sep_str = config['data_params']['sep']

    assert sep_str in ['table', 'space']
    sep = '\t' if sep_str == 'table' else ' '
    max_len = config['model_params']['sequence_length']
    word_len = config['model_params']['word_length']
    data_dict = init_data(
        path=config['data_params']['path_test'], feature_names=feature_names, sep=sep,
        vocs=vocs, max_len=max_len, model='test', use_char_feature=use_char_feature,
        word_len=word_len)

    # 加载模型
    model = SequenceLabelingModel(
        sequence_length=config['model_params']['sequence_length'],
        nb_classes=config['model_params']['nb_classes'],
        nb_hidden=config['model_params']['bilstm_params']['num_units'],
        num_layers=config['model_params']['bilstm_params']['num_layers'],
        feature_weight_shape_dict=feature_weight_shape_dict,
        feature_init_weight_dict=feature_init_weight_dict,
        feature_weight_dropout_dict=feature_weight_dropout_dict,
        dropout_rate=config['model_params']['dropout_rate'],
        nb_epoch=config['model_params']['nb_epoch'], feature_names=feature_names,
        batch_size=config['model_params']['batch_size'],
        train_max_patience=config['model_params']['max_patience'],
        use_crf=config['model_params']['use_crf'],
        l2_rate=config['model_params']['l2_rate'],
        rnn_unit=config['model_params']['rnn_unit'],
        learning_rate=config['model_params']['learning_rate'],
        use_char_feature=use_char_feature,
        conv_filter_size_list=conv_filter_size_list,
        conv_filter_len_list=conv_filter_len_list,
        word_length=word_len,
        path_model=config['model_params']['path_model'])
    saver = tf.train.Saver()
    saver.restore(model.sess, config['model_params']['path_model'])

    # 标记
    viterbi_sequences = model.predict(data_dict)

    # # 写入文件
    label_voc = dict()
    for key in vocs[-1]:
        label_voc[vocs[-1][key]] = key
    with codecs.open(config['data_params']['path_test'], 'r', encoding='utf-8') as file_r:
        sentences = file_r.read().strip().split('\n\n')
    file_result = codecs.open(
        config['data_params']['path_result'], 'w', encoding='utf-8')
    for i, sentence in enumerate(sentences):
        for j, item in enumerate(sentence.split('\n')):
            if j < len(viterbi_sequences[i]):
                file_result.write('%s\t%s\n' % (item, label_voc[viterbi_sequences[i][j]]))
            else:
                file_result.write('%s\tO\n' % item)
        file_result.write('\n')

    file_result.close()
Пример #46
0
from setuptools import setup, find_packages
from codecs import open
from os import path

__version__ = '0.0.21'

here = path.abspath(path.dirname(__file__))

# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
    long_description = f.read()

# get the dependencies and installs
with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f:
    all_reqs = f.read().split('\n')

install_requires = [x.strip() for x in all_reqs if 'git+' not in x]
dependency_links = [
    x.strip().replace('git+', '') for x in all_reqs if x.startswith('git+')
]

setup(
    name='biocircuits',
    version=__version__,
    description=
    'Python utilities for the Caltech course BE 150: Design Principles of Genetic Circuits',
    long_description=long_description,
    url='https://github.com/justinbois/biocircuits',
    download_url='https://github.com/justinbois/biocircuits/tarball/' +
    __version__,
    license='BSD',
Пример #47
0
from sklearn import metrics
import glob
import errno
import codecs
path1 = 'C:/Users/NoT/Desktop/ML/Project/Stylogenetics/stylogenetics/Hasan Mahbub/*.doc'
path2 = 'C:/Users/NoT/Desktop/ML/Project/Stylogenetics/stylogenetics/MZI/MZI/*.doc'
path3 = 'C:/Users/NoT/Desktop/ML/Project/Stylogenetics/stylogenetics/Nir Shondhani/Nir Shondhani/*.doc'
labels, texts = [], []
val_x, val_y = [], []
files = glob.glob(path1)

for name in files:
    try:
        with codecs.open(name, 'r', encoding='utf-8') as f:
            str = f.read()
            # str = re.sub(' +', ' ', str)
            str = " ".join(str.split())
            labels.append("hm")
            texts.append(str)

    except IOError as exc:
        if exc.errno != errno.EISDIR:
            raise

files = glob.glob(path2)
for name in files:
    try:
        with codecs.open(name, 'r', encoding='utf-8') as f:
            str = f.read()
            #  str = re.sub(' +', ' ', str)
            str = " ".join(str.split())
Пример #48
0

objectnames = gatherList(objectnames, objectlist, name='objects', required=False)

# authenticate
apiauth(vip=vip, username=username, domain=domain, password=password, useApiKey=useApiKey)

cluster = api('get', 'cluster')

print('\nGathering Job Info from %s...\n' % cluster['name'])

# outfile
now = datetime.now()
dateString = now.strftime("%Y-%m-%d")
outfile = 'protectedObjectReport-%s-%s.csv' % (cluster['name'], dateString)
f = codecs.open(outfile, 'w')

# gather info
sources = api('get', 'protectionSources?includeVMFolders=true')
policies = api('get', 'data-protect/policies', v=2)['policies']
jobs = api('get', 'data-protect/protection-groups?includeTenants=true', v=2)

# headings
f.write('Cluster Name,Job Name,Environment,Object Name,Object Type,Object Size (MiB),Parent,Policy Name,Policy Link,Archive Target,Direct Archive,Frequency (Minutes),Last Backup,Last Status,Last Run Type,Job Paused,Indexed,Start Time,Time Zone,QoS Policy,Priority,Full SLA,Incremental SLA\n')

report = []

for job in sorted(jobs['protectionGroups'], key=lambda j: j['name']):

    objects = {}
Пример #49
0
# d = path.dirname(__file__)
d = "./source/word_cloud-master/examples"

stopwords_path = d + '/wc_cn/stopwords_cn_en.txt'
# Chinese fonts must be set
font_path = d + '/fonts/SourceHanSerif/SourceHanSerifK-Light.otf'

# the path to save worldcloud
imgname1 = d + '/wc_cn/LuXun.jpg'
imgname2 = d + '/wc_cn/LuXun_colored.jpg'
# read the mask / color image taken from
back_coloring = imread(d + '/wc_cn/LuXun_color.jpg')

# Read the whole text.
text = codecs.open(d + '/wc_cn/CalltoArms.txt', "r", "utf-8").read()

#
userdict_list = ['孔乙己']


# The function for processing text with HaanLP
def pyhanlp_processing_txt(text, isUseStopwordsOfHanLP=True):
    CustomDictionary = JClass("com.hankcs.hanlp.dictionary.CustomDictionary")
    for word in userdict_list:
        CustomDictionary.add(word)

    mywordlist = []
    HanLP.Config.ShowTermNature = False
    CRFnewSegment = HanLP.newSegment("viterbi")
def get_precision():
    """
    精度计算,基于人名
    """
    with open('./config.yml') as file_config:
        config = yaml.load(file_config)
    f_answer = codecs.open(config["data_params"]["path_answer"], encoding="utf-8")
    f_result = codecs.open(config["data_params"]["path_result"], encoding="utf-8")
    data = f_answer.read()
    f_answer.close()
    rows_answer = data.split("\n")
    items_answer = [[i.split("	")[0], i.split("	")[len(i.split("	"))-1]] for i in rows_answer]
    data = f_result.read()
    f_result.close()
    rows_result = data.split("\n")
    items_result = [[i.split("	")[0], i.split("	")[len(i.split("	")) - 1]] for i in rows_result]
    precision_num = 0.0
    recall_num = 0.0
    correct_num = 0.0
    for items in items_result:
        # print items[0],items[1]
        try:
            if items[1][0] == "B" and items[1][2:]=="nrn":
                precision_num += 1
        except:
            pass
    for items in items_answer:
        # print items
        try:
            if items[1][0] == "B" and items[1][2:]=="nrn":
                recall_num += 1
        except:
            pass
    i = 0
    while i < items_answer.__len__():
        if items_result[i][1]!="" and items_answer[i][1]!="":
            if items_result[i][1][0] == "B" and items_result[i][1][0] == items_answer[i][1][0]:
                j = i
                while j<items_answer.__len__():

                    if items_answer[j][1][0] != "E":
                        j+=1
                    else:
                        break
                if items_result[j][1][0] == "E" and items_answer[j][1][2:]=="nrn":
                    # print items_answer[j][1], items_result[j][1]
                    correct_num += 1
                    i = j
        i += 1
    p = correct_num/precision_num
    r = correct_num/recall_num

    print("nrn")
    print ("p:")
    print(p)
    print ("r:")
    print (r)
    print ("f:")
    print(2*p*r/(p+r))

# if __name__ == '__main__':
#     create_testset()
#     tagging()  # 标记测试集
#     get_precision()
    # get_indicator()
Пример #51
0
def read(*parts):
    with codecs.open(join(here, *parts), 'r') as fp:
        return fp.read()
Пример #52
0
import codecs
from setuptools import setup, find_packages

with codecs.open('README.md', 'r', 'utf8') as reader:
    long_description = reader.read()

with codecs.open('requirements.txt', 'r', 'utf8') as reader:
    install_requires = list(map(lambda x: x.strip(), reader.readlines()))

setup(
    name='keras-gpt-2',
    version='0.13.0',
    packages=find_packages(),
    url='https://github.com/CyberZHG/keras-gpt-2',
    license='MIT',
    author='CyberZHG',
    author_email='*****@*****.**',
    description='GPT-2',
    long_description=long_description,
    long_description_content_type='text/markdown',
    install_requires=install_requires,
    classifiers=(
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ),
)
Пример #53
0
See:
https://packaging.python.org/en/latest/distributing.html
https://github.com/pypa/sampleproject
"""

# Always prefer setuptools over distutils
from setuptools import setup, find_packages
# To use a consistent encoding
from codecs import open
from os import path

here = path.abspath(path.dirname(__file__))

# Get the long description from the README file
with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
    long_description = f.read()

setup(name='scipion-em-empiar',
      version='3.0.1',
      description='A Scipion plugin to make depositions to EMPIAR',
      long_description=long_description,
      url='https://github.com/scipion-em/scipion-em-empiar',
      author='I2PC',
      author_email='*****@*****.**',
      keywords='scipion empiar scipion-3.0',
      packages=find_packages(),
      install_requires=['empiar-depositor', 'jsonschema', 'scipion-em'],
      package_data={
          'empiar': [
              'EMPIAR_logo.png', 'empiar_deposition.schema.json',
Пример #54
0
def main():
    global SUBJECT

    content = ""
    test_counts = []
    attachments = {}

    updateproc = subprocess.Popen(
        "cd /opt/sqlmap/ ; python /opt/sqlmap/sqlmap.py --update",
        shell=True,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE)
    stdout, stderr = updateproc.communicate()

    if stderr:
        failure_email("Update of sqlmap failed with error:\n\n%s" % stderr)

    regressionproc = subprocess.Popen(
        "python /opt/sqlmap/sqlmap.py --live-test",
        shell=True,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        close_fds=False)
    stdout, stderr = regressionproc.communicate()

    if stderr:
        failure_email("Execution of regression test failed with error:\n\n%s" %
                      stderr)

    failed_tests = re.findall(
        "running live test case: (.+?) \((\d+)\/\d+\)[\r]*\n.+test failed (at parsing items: (.+))?\s*\- scan folder: (\/.+) \- traceback: (.*?)( - SQL injection not detected)?[\r]*\n",
        stdout, re.M)

    for failed_test in failed_tests:
        title = failed_test[0]
        test_count = int(failed_test[1])
        parse = failed_test[3] if failed_test[3] else None
        output_folder = failed_test[4]
        traceback = False if failed_test[5] == "False" else bool(
            failed_test[5])
        detected = False if failed_test[6] else True

        test_counts.append(test_count)

        console_output_file = os.path.join(output_folder, "console_output")
        log_file = os.path.join(output_folder, TARGET, "log")
        traceback_file = os.path.join(output_folder, "traceback")

        if os.path.exists(console_output_file):
            console_output_fd = codecs.open(console_output_file, "rb", "utf8")
            console_output = console_output_fd.read()
            console_output_fd.close()
            attachments[test_count] = str(console_output)

        if os.path.exists(log_file):
            log_fd = codecs.open(log_file, "rb", "utf8")
            log = log_fd.read()
            log_fd.close()

        if os.path.exists(traceback_file):
            traceback_fd = codecs.open(traceback_file, "rb", "utf8")
            traceback = traceback_fd.read()
            traceback_fd.close()

        content += "Failed test case '%s' (#%d)" % (title, test_count)

        if parse:
            content += " at parsing: %s:\n\n" % parse
            content += "### Log file:\n\n"
            content += "%s\n\n" % log
        elif not detected:
            content += " - SQL injection not detected\n\n"
        else:
            content += "\n\n"

        if traceback:
            content += "### Traceback:\n\n"
            content += "%s\n\n" % str(traceback)

        content += "#######################################################################\n\n"

    end_string = "Regression test finished at %s" % time.strftime(
        "%H:%M:%S %d-%m-%Y", time.gmtime())

    if content:
        content += end_string
        SUBJECT = "Failed %s (%s)" % (SUBJECT, ", ".join(
            "#%d" % count for count in test_counts))

        msg = prepare_email(content)

        for test_count, attachment in attachments.items():
            attachment = MIMEText(attachment)
            attachment.add_header("Content-Disposition",
                                  "attachment",
                                  filename="test_case_%d_console_output.txt" %
                                  test_count)
            msg.attach(attachment)

        send_email(msg)
    else:
        SUBJECT = "Successful %s" % SUBJECT
        msg = prepare_email("All test cases were successful\n\n%s" %
                            end_string)
        send_email(msg)
Пример #55
0
ITEM_PIPELINES = {
    'FirmCrawler.pipelines.FirmcrawlerPipeline':300,
}
LOG_LEVEL = 'INFO'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'FirmCrawler (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

#get mongodb info
import codecs
import ConfigParser
config = ConfigParser.ConfigParser()
configfile = r'./scrapy.cfg'
config.readfp(codecs.open(configfile,'r','utf-8'))
MONGO_URI = config.get('mongo_cfg',"MONGO_IP")
MONGO_PORT = config.get('mongo_cfg',"MONGO_PORT")
MONGO_DATABASE = config.get('mongo_cfg',"MONGO_DATABASE")
MONGO_COLLECTION = config.get('mongo_cfg',"MONGO_SCRAPY_COLLECTION_NAME")

# #edit by @zhangguodong
# dirs_root = config.get('mogo_cfg',"FIRMWARE_STORE_PATH")
# #file_sile = config.get('mongo_cfg',"")
# configfile =r'./CONFIG.cfg'


# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
from django.conf import settings
from django_bootstrap import bootstrap
bootstrap(__file__)

from application.models import SubmissionInfo, Applicant, Major, MajorPreference, PersonalInfo
from confirmation.models import AdmissionMajorPreference, AdmissionWaiver

from utils import get_submitted_applicant_dict

applicants = get_submitted_applicant_dict({
        'preference': MajorPreference,
        'personal_info': PersonalInfo,
        'submission_info': SubmissionInfo,
        })

f = codecs.open(file_name, encoding="utf-8", mode="w")

pref = {}

uses_nat_id =  '--nat' in sys.argv

SUBMISSION_RANK = {1: 2, # doc by mail
                   2: 1, # online
                   3: 3} # all by mail

for applicantion_id in sorted(applicants.keys()):
    applicant = applicants[applicantion_id]
    #if not applicant.submission_info.doc_reviewed_complete:
    #    continue

    if AdmissionWaiver.is_waived(applicant):
Пример #57
0
## 각 판례에서 빼야 할 단어를 위한 리스트: word_list
## TODO: Q.word_list를 나눈 기준은 무엇인가? 그것을 사람의 기준으로 나누고 시작하는 것이 과연 올바른 word2vec을 위한 전처리인가?
with open(fileName_dnusing_wordSet, 'r') as infile:
    word_list = [line.rstrip() for line in infile]

## 타이틀만 모아놓은 것 리스트로 만들기
with open(fileName_title, 'r') as infile2:
    title_list = [line.rstrip() for line in infile2]

## 키워드 넘버만 모아놓은 것 리스트 만들기
# 왜 이부분에서 오류가 나는 것인지, 이유가 무엇인지 알고 있는가?
try:
    with open(fileName_keyNum) as infile3:
        keyNum_list = [line.rstrip() for line in infile3]
except UnicodeDecodeError:  ## TODO: 텍스트 파일을 불러올 때, 에러가 나는 이유는 무엇인가? 왜 그래서 codecs를 써야 하는가?
    with codecs.open(fileName_keyNum, "r", "utf-8") as infile3:
        keyNum_list = [line.rstrip() for line in infile3]


## 여기서 pasing 된 corpus 값을 명사만 추출하여 각 단어별 갯수를 딕셔너리로 만들어주고 append
## TODO: 과연 이 데이터에서 명사만을 추출하는 것이 올바른 접근법인가? 이것으로 명사들과의 관계나 유사도를 올바르게 판단할 수 있는가?
def append_noun_words(corpus):
    noun_words = ['NNG', 'NNB', 'NP']  # 일반명사, 고유명사, 대명사만 학습  // 이렇게 한 이유에 대해서?
    results = []
    for text in corpus:
        for noun_word in noun_words:
            if noun_word in text[1]:
                results.append(text[0])
    return results

Пример #58
0
# plt.imshow(wc) # 显示词云
# plt.axis('off') # 关闭坐标轴
# plt.show() # 显示图像

# 导入扩展库
import re  # 正则表达式库
import collections  # 词频统计库
import numpy as np  # numpy数据处理库
import jieba  # 结巴分词
import wordcloud  # 词云展示库
from PIL import Image  # 图像处理库
import matplotlib.pyplot as plt  # 图像展示库

# 读取文件
text = codecs.open(r'C:\Users\think\Desktop\情感分析\doc\answer.txt',
                   'r',
                   encoding='utf-8').read()
print(text)
#fn = open(r'C:\Users\think\Desktop\情感分析\answer.txt') # 打开文件
#string_data = fn.read() # 读出整个文件
#fn.close() # 关闭文件

# 文本预处理
pattern = re.compile(u'\t|\n|\.|-|:|;|\)|\(|\?|"')  # 定义正则表达式匹配模式
string_data = re.sub(pattern, '', text)  # 将符合模式的字符去除

# 文本分词
seg_list_exact = jieba.cut(text, cut_all=False)  # 精确模式分词
object_list = []
remove_words = [
    u'的', u',', u'和', u'是', u'随着', u'对于', u'对', u'等', u'能', u'都', u'。', u' ',
Пример #59
0
 def from_file(self, filepath, encoding="utf-8"):
     """Read TAP file using `filepath` as source."""
     with codecs.open(filepath, encoding=encoding) as fp:
         for line in fp.readlines():
             self.parse_line(line.rstrip("\n\r"))
Пример #60
0
def extract_header_body(filename):
    """Extract the text of the headers and body from a yaml headed file."""
    import codecs
    with codecs.open(filename, 'rb', 'utf-8') as f:
        metadata, content = frontmatter.parse(f.read())
    return metadata, content