18 lines
466 B
Python
18 lines
466 B
Python
from driver import Driven
|
|
from bs4 import BeautifulSoup
|
|
|
|
class CrawlerBase(Driven):
|
|
|
|
'''
|
|
Webcrawler base class
|
|
'''
|
|
|
|
def __init__(self, new_driver=False, **kwargs):
|
|
super(CrawlerBase,self).__init__(new_driver=new_driver, **kwargs)
|
|
self.data = []
|
|
|
|
def crawl(self):
|
|
raise NotImplementedError('Inheriting class must implement crawl!')
|
|
|
|
def make_soup(self):
|
|
self.soup = BeautifulSoup(self.driver.page_source,'lxml') |