Source code for okscraper.base

from unittest import TestCase
import os
import inspect
import logging

[docs]class BaseScraper(object): """ Abstract Scraper class - should be extended by concrete scraper objects You must declare the following:: def __init__(self, *args, **kwargs): self.source = (an object derived from a class based on okscraper.sources.BaseSource) self.storage = (an object derived from a class based on okscraper.storages.BaseStorage) def _scrape(self): # here you do the actual scraping based on source and storing to storage """ def __init__(self, *args, **kwargs): pass def _scrape(self, *args, **kwargs): raise Exception('_scrape method must be implemented by extending classes') def _getLogger(self): return logging.getLogger(self.__class__.__module__+'('+self.__class__.__name__+')') def scrape(self, *args, **kwargs): self._scrape(*args, **kwargs) self.storage.commit() return self.storage.get()
[docs]class ParsingFromFileTestCase(TestCase): """ base class for testing scrapers with input from a file minimal implementation sample:: class MyScraperTestCase(ParsingFromFileTestCase): def _getScraperClass(self): return MyScraper def _getFilename(self): # this is a file containing test data return 'my_data_<<id>>.xml' def testParsing(self): self.assertScrape( args=(220), expectedData={'id': 220, 'name':'Hello World',} ) """ def _getScraperClass(self): raise Exception('you must implement the _GetScraperClass or _getScraper methods') def _getScraper(self): scraperClass = self._getScraperClass() return scraperClass() def _getFilename(self): return self._filename def _getDataDir(self): _file_ = inspect.getfile(self.__class__) return os.path.join(os.path.abspath(os.path.dirname(_file_)), 'testdata') def _getSource(self): from okscraper.sources import FileSource return FileSource(os.path.join(self._getDataDir(), self._getFilename())) def _getStorageClass(self): return self.scraper.storage.getBaseStorage() def _getStorage(self): storageClass = self._getStorageClass() return storageClass() def _assertParseSuccessful(self, expected_data): self.scraper.storage.assertEquals(self, expected_data) def _initScraper(self): self.scraper = self._getScraper() self.scraper.source = self._getSource() self.scraper.storage = self._getStorage() def _init(self): self._filename = None self.scraper = None def assertScrape(self, expectedData, args=(), kwargs={}, filename=None): self._init() if filename is not None: self._filename = filename self._initScraper() self.scraper.scrape(*args, **kwargs) self._assertParseSuccessful(expectedData)