def test_normal (self): url = 'http://example.com/' doc = download (url) title = pq (doc).find ('title').text () print title self.assertTrue ('Example Domain', title)
quiet = args.quiet # Twitter OAuth oauthFile = '.oauth.json' if not os.path.exists(oauthFile): auth = {'consumer_token': '', 'consumer_secret': ''} auth['consumer_token'] = input('Token: ') auth['consumer_secret'] = input('Secret: ') with open(oauthFile, 'w') as file: json.dump(auth, file, indent=4, default=lambda x: str(x)) else: file = open(oauthFile).read() auth = json.loads(file) # Create output and tmp directory if necessary if not os.path.exists('tmp'): os.makedirs('tmp') if not os.path.exists(outputDir): os.makedirs(outputDir) urlsFile = 'tmp/' + userId + '_urls.json' downloadsFile = 'tmp/' + userId + '_download.json' if quiet: sys.stdout = open(os.devnull, 'w') getMedias(auth, userId, includeRetweets, imageSize, urlsFile) generateResults(urlsFile, downloadsFile, filenameFormat) download(downloadsFile, outputDir, False, False)
def test_404 (self): url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pd' nil, err_code = download (url) self.assertEqual (nil, None) self.assertEqual (err_code, 404)
def test_nonhtml (self): url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pdf' nil, err_msg = download (url) self.assertEqual (nil, None) self.assertEqual (err_msg, 'application/pdf')
from src.mapper import generate_results from src.downloader import download if __name__ == '__main__': # Parse program arguments ARGS = parse_args(sys.argv[1:]) USER_IDS = parse_file_arg(ARGS.userid) # Twitter OAuth AUTH = get_oauth('.oauth.json') # Suppress output if the "quiet" flag is enabled if ARGS.quiet: sys.stdout = open(os.devnull, 'w') # For each user in the ID list for user_id in USER_IDS: # Create output directory if necessary outputDir = os.path.join(ARGS.output, user_id + os.sep if ARGS.o_userid else '') if not os.path.exists(outputDir): os.makedirs(outputDir) # Start the download medias = get_medias(AUTH, user_id, ARGS.retweets, ARGS.image_size, ARGS.since, ARGS.since_id, ARGS.until, ARGS.until_id, ARGS.likes) results = generate_results(medias, ARGS.format) download(results, outputDir, False, True)
def test_normal(self): url = 'http://example.com/' doc = download(url) title = pq(doc).find('title').text() print title self.assertTrue('Example Domain', title)
def test_nonhtml(self): url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pdf' nil, err_msg = download(url) self.assertEqual(nil, None) self.assertEqual(err_msg, 'application/pdf')
def test_404(self): url = 'http://www.cs.helsinki.fi/u/hxiao/rl-seminar/paper.pd' nil, err_code = download(url) self.assertEqual(nil, None) self.assertEqual(err_code, 404)
try: argparser.parse() # creates new csv file if it doesn't already exist if not os.path.isfile(csv_file): writer.create_new(csv_file) if not os.path.exists(xml_dir): os.makedirs(xml_dir) if not os.path.exists(save_xml_dir): os.makedirs(save_xml_dir) print('---------%s---------' % strftime("%Y-%m-%d %H:%M:%S", gmtime())) start_time = time() downloader.download() scraper.main() cleaner.clean() print('Execution took %s seconds' % (round(time() - start_time, 3))) print('-------------------------------------') except Exception as e: error_msg = 'Something went horribly wrong. Please check the program.\nError: %s' % e print(error_msg) traceback.print_exc() # if USE_NOTIFY is True it sends a message to the given endpoint if USE_NOTIFIER: from pushnotifier import PushNotifier as pn from threading import Thread def send_notification():