Пример #1
0
# TODO: We should introduce a high-level tracker/resolver (maybe as
# catalogue) that does caching. It should basically maintain
# a mapping of the graph:
#
# - (type,name) → path
# - path + provides  → [ (type,name) ]
# - path + requires  → [ (type,name) ]
#
# - find(name|(type,name))
# - depends|requires(path|name|item)
# - provides(path|item)

try:
    import reporter
    logging = reporter.bind("deparse", template=reporter.TEMPLATE_COMMAND)
except ImportError as e:
    import logging

if sys.version_info.major >= 3:
    unicode = str

__doc__ = """
*deparse* extracts/lists and resolves dependencies from a variety of files.
Tracker are listed as couples `(<type>, <name>)` where type is a string like
`<language>:<type>`, for instance `js:file`, `js:module`, etc.

The `deparse` module features both an API and a command-line interface.
"""

Пример #2
0
# -----------------------------------------------------------------------------
# Project           :   PAML
# -----------------------------------------------------------------------------
# Author            :   Sebastien Pierre                  <*****@*****.**>
# License           :   Lesser GNU Public License
# -----------------------------------------------------------------------------
# Creation date     :   10-May-2007
# Last mod.         :   13-Sep-2016
# -----------------------------------------------------------------------------

import os, sys, re, string, json, time, glob, tempfile, argparse
IS_PYTHON3 = sys.version_info[0] > 2

try:
	import reporter
	logging = reporter.bind("paml")
except:
	import logging

__version__    = "0.8.2"
PAMELA_VERSION = __version__

# TODO: Add an option to start a sugar compilation server and directly query
# it, maybe using ZMQ.

def ensure_unicode( t, encoding="utf8" ):
	if IS_PYTHON3:
		return t if isinstance(t, str) else str(t, encoding)
	else:
		return t if isinstance(t, unicode) else t.decode(encoding)
Пример #3
0
#     GNU General Public License for more details.
# 
#     You should have received a copy of the GNU General Public License
#     along with Broken Promises.  If not, see <http://www.gnu.org/licenses/>.


from brokenpromises          import Article, settings
from brokenpromises.channels import Channel, channel
import brokenpromises.utils  as utils
import datetime
import reporter
import requests
import time
from bs4 import BeautifulSoup

debug, trace, info, warning, error, fatal = reporter.bind(__name__)

# TODO
#   [ ] handle pagination

@channel("The New-York Times")
class NewYorkTimes(Channel):
	"""
	based on NYT ARTICLE SEARCH API VERSION 2

		doc: http://developer.nytimes.com/docs/read/article_search_api_v2#building-search

		restrictions for `articlesearch`:
			10      Calls per second
			10,000  Calls per day
	"""
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
# 
#     You should have received a copy of the GNU General Public License
#     along with Broken Promises.  If not, see <http://www.gnu.org/licenses/>.

from   brokenpromises.operations import CollectArticles
from   bson.json_util            import dumps
import optparse
import brokenpromises.channels
import sys
import reporter

reporter.REPORTER.register(reporter.StderrReporter())

debug, trace, info, warning, error, fatal = reporter.bind("script_collect_articles")

oparser = optparse.OptionParser(usage ="\n./%prog [options] year \n./%prog [options] year month\n./%prog [options] year month day")
# oparser.add_option("-C", "--nocache", action="store_true", dest="nocache",
# 	help = "Prevents from using the cache", default=False)
oparser.add_option("-f", "--channelslistfile", action="store", dest="channels_file",
	help = "Use this that as channels list to use", default=None)
oparser.add_option("-c", "--channels", action="store", dest="channels_list",
	help = "channels list comma separated", default=None)
oparser.add_option("-s", "--storage", action="store_true", dest="storage",
	help = "Save the result with the default storage", default=False)
oparser.add_option("-d", "--drop", action="store_true", dest="mongodb_drop",
	help = "drop the previous articles from database before", default=False)
oparser.add_option("--force", action="store_true", dest="force_collect",
	help = "Force the scrap. If --storage is enable, the scrap could be escape b/c of a previous similar scrap", default=False)
oparser.add_option("-o", "--output", action="store", dest="output_file",
Пример #5
0
"""

try:
	import http.server  as SimpleHTTPServer
	import socketserver as SocketServer
	import urllib.parse as urlparse
	BaseHTTPServer = SimpleHTTPServer
except ImportError:
	import SimpleHTTPServer, SocketServer, BaseHTTPServer, urlparse

from .core import ensureUnicode

import sys, socket, errno, time, traceback, io, threading, re
try:
	import reporter
	logging = reporter.bind("retro")
except ImportError:
	import logging
	reporter = None

from . import core, web

# Jython has no signal module
try:
	import signal
	HAS_SIGNAL = True
except:
	HAS_SIGNAL = False

# ------------------------------------------------------------------------------
#
Пример #6
0
#
#     You should have received a copy of the GNU General Public License
#     along with Broken Promises.  If not, see <http://www.gnu.org/licenses/>.

from brokenpromises import settings, Report
from brokenpromises.storage import Storage
import brokenpromises.channels
import brokenpromises.utils
import nltk
import os
import dateparser
import datetime
import calendar
import reporter

debug, trace, info, warning, error, fatal = reporter.bind(__name__)


class Collector(object):
    def __init__(self):
        self.report = None

    @classmethod
    def retrieve_referenced_dates(cls, text, filters_on_text=None):
        references = []
        # filters
        # remove all tags
        if filters_on_text:
            text = filters_on_text(text)
        # search and add dates to `refrences`
        for date_obj, date_row, date_position in dateparser.find_dates(text):
Пример #7
0
# Project           : NAME
# -----------------------------------------------------------------------------
# Author            : FFunction
# License           : BSD License
# -----------------------------------------------------------------------------
# Creation date     : YYYY-MM-DD
# Last modification : YYYY-MM-DD
# -----------------------------------------------------------------------------

import sys
from .catalogue import Catalogue
from .dedup import Dedup

try:
	import reporter
	logging = reporter.bind(sys.argv[0].split("/")[-1])
except ImportError:
	import logging

def cat(args=None):
	"""Writes out the catalogue at the given location."""
	args = sys.argv[1:] if args is None else args
	paths = args
	cat = Catalogue(paths, logging=logging)
	cat.write(sys.stdout)

def dedup(args=None):
	args = sys.argv[1:] if args is None else args
	cat = Dedup(args[0], logging=logging)

Пример #8
0
# -----------------------------------------------------------------------------
# Creation date     : 14-Jul-2013
# Last modification : 17-Nov-2016
# -----------------------------------------------------------------------------

from __future__ import print_function
import re, os, sys, argparse, json, copy, io, time
from   io        import BytesIO
from  .grammar   import getGrammar
from  .processor import PCSSProcessor
from  .writer    import CSSWriter
from  .cache     import Graph

try:
	import reporter
	logging = reporter.bind("pcss")
except ImportError:
	import logging

GRAPH = Graph()

def parse(path, convert=True):
	if GRAPH:
		node = GRAPH.get(path)
		return node.css if convert else node.ast
	else:
		res = getGrammar().parsePath(path)
		return processResult(res, path=path) if convert else res

def parseString(text, path=None, convert=True):
	res = getGrammar().parseString(text)
Пример #9
0
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with Broken Promises.  If not, see <http://www.gnu.org/licenses/>.

from brokenpromises.operations import CollectArticles
from bson.json_util import dumps
import optparse
import brokenpromises.channels
import sys
import reporter

reporter.REPORTER.register(reporter.StderrReporter())

debug, trace, info, warning, error, fatal = reporter.bind(
    "script_collect_articles")

oparser = optparse.OptionParser(
    usage=
    "\n./%prog [options] year \n./%prog [options] year month\n./%prog [options] year month day"
)
# oparser.add_option("-C", "--nocache", action="store_true", dest="nocache",
# 	help = "Prevents from using the cache", default=False)
oparser.add_option("-f",
                   "--channelslistfile",
                   action="store",
                   dest="channels_file",
                   help="Use this that as channels list to use",
                   default=None)
oparser.add_option("-c",
                   "--channels",
Пример #10
0
#!/usr/bin/env python3
import re, sys

# try:
import reporter
reporter.template(reporter.TEMPLATE_COMPACT)
logging = reporter.bind("memcheck")
# except Exception as e:
# 	print("PROBLEM", e)
# 	import logging
#
__doc__ = """
`memcheck` is a tool that parses the output of memory-related operations
as defined in `oo.h` and ensures the following properties:

- Any freed pointer was previously allocated
- Any created/resized pointer is deallocated
- No pointer is deallocated more than once

To use memcheck, run your program compiled with `<oo.h>` using the
`__NEW`, `__FREE`, ‥ primitives, save the output to a log
file and run `memcheck` on it.

```bash
./a.out > a.log
./bin/memcheck.py a.log
```

"""

ADDR = "(\(nil\)|0x[\w\d]+)"