Source code for okscraper.sources

import re, requests

[docs]class BaseSource(object): """ Abstract BaseSource extending classes must implement a fetch method which returns the input data """
class BaseStringParamsSource(BaseSource): def __init__(self, source_string): self._source_string = source_string def get_source_string(self, *args, **kwargs): args = list(args) src = self._source_string identifiers = re.findall(r"<<(\w*)>>", src) while len(args) > 0 and len(identifiers) > 0: identifier = identifiers.pop(0) arg = args.pop(0) src = src.replace('<<{}>>'.format(identifier), str(arg)) return src def _fetch(self, *args, **kwargs): raise NotImplementedError() def fetch(self, *args, **kwargs): return self._fetch(self.get_source_string(*args, **kwargs))
[docs]class UrlSource(BaseStringParamsSource): """fetch data from a url""" def _fetch(self, url): return requests.get(url).text
[docs]class FileSource(BaseStringParamsSource): """fetch data from a file""" def _fetch(self, filepath): with open(filepath) as f: return f.read()
[docs]class ScraperSource(BaseSource): """fetch data from an okscraper""" def __init__(self, scraper): self._scraper = scraper def fetch(self, *args, **kwargs): return self._scraper.scrape(*args, **kwargs)