Files
arcli/crawler.py

18 lines
466 B
Python

from driver import Driven
from bs4 import BeautifulSoup
class CrawlerBase(Driven):
'''
Webcrawler base class
'''
def __init__(self, new_driver=False, **kwargs):
super(CrawlerBase,self).__init__(new_driver=new_driver, **kwargs)
self.data = []
def crawl(self):
raise NotImplementedError('Inheriting class must implement crawl!')
def make_soup(self):
self.soup = BeautifulSoup(self.driver.page_source,'lxml')