# TODO: We should introduce a high-level tracker/resolver (maybe as # catalogue) that does caching. It should basically maintain # a mapping of the graph: # # - (type,name) → path # - path + provides → [ (type,name) ] # - path + requires → [ (type,name) ] # # - find(name|(type,name)) # - depends|requires(path|name|item) # - provides(path|item) try: import reporter logging = reporter.bind("deparse", template=reporter.TEMPLATE_COMMAND) except ImportError as e: import logging if sys.version_info.major >= 3: unicode = str __doc__ = """ *deparse* extracts/lists and resolves dependencies from a variety of files. Tracker are listed as couples `(<type>, <name>)` where type is a string like `<language>:<type>`, for instance `js:file`, `js:module`, etc. The `deparse` module features both an API and a command-line interface. """
# ----------------------------------------------------------------------------- # Project : PAML # ----------------------------------------------------------------------------- # Author : Sebastien Pierre <*****@*****.**> # License : Lesser GNU Public License # ----------------------------------------------------------------------------- # Creation date : 10-May-2007 # Last mod. : 13-Sep-2016 # ----------------------------------------------------------------------------- import os, sys, re, string, json, time, glob, tempfile, argparse IS_PYTHON3 = sys.version_info[0] > 2 try: import reporter logging = reporter.bind("paml") except: import logging __version__ = "0.8.2" PAMELA_VERSION = __version__ # TODO: Add an option to start a sugar compilation server and directly query # it, maybe using ZMQ. def ensure_unicode( t, encoding="utf8" ): if IS_PYTHON3: return t if isinstance(t, str) else str(t, encoding) else: return t if isinstance(t, unicode) else t.decode(encoding)
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Broken Promises. If not, see <http://www.gnu.org/licenses/>. from brokenpromises import Article, settings from brokenpromises.channels import Channel, channel import brokenpromises.utils as utils import datetime import reporter import requests import time from bs4 import BeautifulSoup debug, trace, info, warning, error, fatal = reporter.bind(__name__) # TODO # [ ] handle pagination @channel("The New-York Times") class NewYorkTimes(Channel): """ based on NYT ARTICLE SEARCH API VERSION 2 doc: http://developer.nytimes.com/docs/read/article_search_api_v2#building-search restrictions for `articlesearch`: 10 Calls per second 10,000 Calls per day """
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Broken Promises. If not, see <http://www.gnu.org/licenses/>. from brokenpromises.operations import CollectArticles from bson.json_util import dumps import optparse import brokenpromises.channels import sys import reporter reporter.REPORTER.register(reporter.StderrReporter()) debug, trace, info, warning, error, fatal = reporter.bind("script_collect_articles") oparser = optparse.OptionParser(usage ="\n./%prog [options] year \n./%prog [options] year month\n./%prog [options] year month day") # oparser.add_option("-C", "--nocache", action="store_true", dest="nocache", # help = "Prevents from using the cache", default=False) oparser.add_option("-f", "--channelslistfile", action="store", dest="channels_file", help = "Use this that as channels list to use", default=None) oparser.add_option("-c", "--channels", action="store", dest="channels_list", help = "channels list comma separated", default=None) oparser.add_option("-s", "--storage", action="store_true", dest="storage", help = "Save the result with the default storage", default=False) oparser.add_option("-d", "--drop", action="store_true", dest="mongodb_drop", help = "drop the previous articles from database before", default=False) oparser.add_option("--force", action="store_true", dest="force_collect", help = "Force the scrap. If --storage is enable, the scrap could be escape b/c of a previous similar scrap", default=False) oparser.add_option("-o", "--output", action="store", dest="output_file",
""" try: import http.server as SimpleHTTPServer import socketserver as SocketServer import urllib.parse as urlparse BaseHTTPServer = SimpleHTTPServer except ImportError: import SimpleHTTPServer, SocketServer, BaseHTTPServer, urlparse from .core import ensureUnicode import sys, socket, errno, time, traceback, io, threading, re try: import reporter logging = reporter.bind("retro") except ImportError: import logging reporter = None from . import core, web # Jython has no signal module try: import signal HAS_SIGNAL = True except: HAS_SIGNAL = False # ------------------------------------------------------------------------------ #
# # You should have received a copy of the GNU General Public License # along with Broken Promises. If not, see <http://www.gnu.org/licenses/>. from brokenpromises import settings, Report from brokenpromises.storage import Storage import brokenpromises.channels import brokenpromises.utils import nltk import os import dateparser import datetime import calendar import reporter debug, trace, info, warning, error, fatal = reporter.bind(__name__) class Collector(object): def __init__(self): self.report = None @classmethod def retrieve_referenced_dates(cls, text, filters_on_text=None): references = [] # filters # remove all tags if filters_on_text: text = filters_on_text(text) # search and add dates to `refrences` for date_obj, date_row, date_position in dateparser.find_dates(text):
# Project : NAME # ----------------------------------------------------------------------------- # Author : FFunction # License : BSD License # ----------------------------------------------------------------------------- # Creation date : YYYY-MM-DD # Last modification : YYYY-MM-DD # ----------------------------------------------------------------------------- import sys from .catalogue import Catalogue from .dedup import Dedup try: import reporter logging = reporter.bind(sys.argv[0].split("/")[-1]) except ImportError: import logging def cat(args=None): """Writes out the catalogue at the given location.""" args = sys.argv[1:] if args is None else args paths = args cat = Catalogue(paths, logging=logging) cat.write(sys.stdout) def dedup(args=None): args = sys.argv[1:] if args is None else args cat = Dedup(args[0], logging=logging)
# ----------------------------------------------------------------------------- # Creation date : 14-Jul-2013 # Last modification : 17-Nov-2016 # ----------------------------------------------------------------------------- from __future__ import print_function import re, os, sys, argparse, json, copy, io, time from io import BytesIO from .grammar import getGrammar from .processor import PCSSProcessor from .writer import CSSWriter from .cache import Graph try: import reporter logging = reporter.bind("pcss") except ImportError: import logging GRAPH = Graph() def parse(path, convert=True): if GRAPH: node = GRAPH.get(path) return node.css if convert else node.ast else: res = getGrammar().parsePath(path) return processResult(res, path=path) if convert else res def parseString(text, path=None, convert=True): res = getGrammar().parseString(text)
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Broken Promises. If not, see <http://www.gnu.org/licenses/>. from brokenpromises.operations import CollectArticles from bson.json_util import dumps import optparse import brokenpromises.channels import sys import reporter reporter.REPORTER.register(reporter.StderrReporter()) debug, trace, info, warning, error, fatal = reporter.bind( "script_collect_articles") oparser = optparse.OptionParser( usage= "\n./%prog [options] year \n./%prog [options] year month\n./%prog [options] year month day" ) # oparser.add_option("-C", "--nocache", action="store_true", dest="nocache", # help = "Prevents from using the cache", default=False) oparser.add_option("-f", "--channelslistfile", action="store", dest="channels_file", help="Use this that as channels list to use", default=None) oparser.add_option("-c", "--channels",
#!/usr/bin/env python3 import re, sys # try: import reporter reporter.template(reporter.TEMPLATE_COMPACT) logging = reporter.bind("memcheck") # except Exception as e: # print("PROBLEM", e) # import logging # __doc__ = """ `memcheck` is a tool that parses the output of memory-related operations as defined in `oo.h` and ensures the following properties: - Any freed pointer was previously allocated - Any created/resized pointer is deallocated - No pointer is deallocated more than once To use memcheck, run your program compiled with `<oo.h>` using the `__NEW`, `__FREE`, ‥ primitives, save the output to a log file and run `memcheck` on it. ```bash ./a.out > a.log ./bin/memcheck.py a.log ``` """ ADDR = "(\(nil\)|0x[\w\d]+)"