Source code for okscraper.cli.runner

# encoding: utf-8

import importlib
import logging
import traceback

[docs]class Runner(object): """ Provides functionality for running a scraper from the command line it gets a module_name and looks for a scrapers module under that module name e.g. if module_name = lobbyists then the scrapers module is under lobbyists.scrapers it then looks for a MainScraper class in that module and scrapes that class alternatively - if scraper_class_name is provided it uses that scraper class also - you can pass arbitrary args and kwargs which are passed to the scraper """ def __init__(self, module_name, scraper_class_name = None, *args, **kwargs): if scraper_class_name is None: scraper_class_name = 'MainScraper' self._module_name = module_name self._scraper_class_name = scraper_class_name if 'extra_scraper_args' in kwargs: args += tuple(kwargs['extra_scraper_args']) del kwargs['extra_scraper_args'] self._args = args self._kwargs = kwargs self.post_init() def post_init(self): pass def post_run(self): pass def run(self): try: module = importlib.import_module('%s.scrapers' % self._module_name) scraperClass = getattr(module, self._scraper_class_name) scraper = scraperClass() if len(self._kwargs) > 0 and len(self._args) > 0: result = scraper.scrape(*self._args, **self._kwargs) elif len(self._args) > 0: result = scraper.scrape(*self._args) elif len(self._kwargs) > 0: result = scraper.scrape(**self._kwargs) else: result = scraper.scrape() finally: self.post_run() return result
[docs]class LogRunner(Runner): """Adds logging capabilities to the Runner class""" def _getLogLevelFromVerbosity(self, verbosity): verbosities = { '1': logging.WARN, '2': logging.INFO, '3': logging.DEBUG } return verbosities.get(str(verbosity), logging.ERROR) def __init__(self, *args, **kwargs): if 'log_handler' in kwargs: logger = logging.getLogger() logger.setLevel(logging.DEBUG) handler = kwargs.pop('log_handler') if 'log_verbosity' in kwargs: level = self._getLogLevelFromVerbosity(kwargs.pop('log_verbosity')) handler.setLevel(level) logger.addHandler(handler) super(LogRunner, self).__init__(*args, **kwargs) def run(self): try: return super(LogRunner, self).run() except: exc = traceback.format_exc() logging.getLogger(self.__class__.__module__+'('+self.__class__.__name__+')').exception(exc) return None
class _DbLogHandler(logging.Handler): def __init__(self, *args, **kwargs): self.log_runner = kwargs.pop('log_runner') super(_DbLogHandler, self).__init__(*args, **kwargs) def emit(self, record): self.log_runner.on_dblog_emit(record)
[docs]class DbLogRunner(LogRunner): """Adds capabilities relevant to db logging to the LogRunner class""" def __init__(self, *args, **kwargs): logger = logging.getLogger() logger.setLevel(logging.DEBUG) handler = _DbLogHandler(log_runner=self) handler.setLevel(logging.INFO) logger.addHandler(handler) super(DbLogRunner, self).__init__(*args, **kwargs) def on_dblog_emit(self, record): raise Exception('on_dblog_emit must be implemented by extending classes')