olivia_finder.myrequests.proxy_builders.proxy_builder
1from __future__ import annotations 2from abc import ABC, abstractmethod 3from typing import List, Optional 4import requests 5 6from ...utilities.config import Configuration 7from ...utilities.logger import MyLogger 8 9class ProxyBuilder(ABC): 10 ''' 11 Abstract class to represent a proxy builder 12 13 .. warning:: 14 This class can't be instantiated directly, use a subclass instead 15 16 17 - :class:`olivia_finder.myrequests.proxy_builders.ssl_proxies.SSLProxiesBuilder` 18 - :class:`olivia_finder.myrequests.proxy_builders.list_builder.ListProxyBuilder` 19 20 ''' 21 22 23 24 def __init__(self, url: Optional[str] = None, request_timeout: Optional[int] = None) -> None: 25 ''' 26 Constructor 27 28 Parameters 29 ---------- 30 url : str 31 URL of the proxy list website to get the proxies 32 request_timeout : int 33 Timeout for the proxy list requests 34 ''' 35 36 # Get logger name from config file 37 self.logger = MyLogger.get_logger('logger_myrequests') 38 39 # Check if this clas is not instantiated directly 40 if self.__class__ == ProxyBuilder: 41 raise TypeError("ProxyBuilder can't be instantiated directly, use a subclass instead") 42 43 # URL of the proxy list website to get the proxies 44 if url is None: 45 raise ValueError("url parameter can't be None") 46 self.url = url 47 48 # Timeout for the proxy list requests 49 self.request_timeout = request_timeout 50 51 def get_proxies(self) -> List[str]: 52 ''' 53 Get the proxies from the website, parse the response and return a list of proxies 54 55 Returns 56 ------- 57 List[str] 58 A list of proxies 59 60 Raises 61 ------ 62 Exception 63 If there is an error getting the proxies 64 65 Examples 66 -------- 67 >>> from proxy_builders.ssl_proxies import SSLProxies 68 >>> ssl_proxies = SSLProxies() 69 >>> proxies = ssl_proxies.get_proxies() 70 ''' 71 72 # Do the request, if there is an error return an empty list 73 try: 74 response = requests.get(self.url, timeout=self.request_timeout) 75 except Exception as e: 76 message = f"Error getting proxies from {self.__class__.__name__}: {e}" 77 message += f"URL: {self.url}" 78 self.logger.error(message) 79 return [] 80 81 # Reesponse is OK, parse the response and return the proxies 82 proxies = [] 83 if response.status_code == 200: 84 proxies = self._parse_request(response) 85 self.logger.debug(f"Found {len(proxies)} proxies from {self.__class__.__name__}") 86 else: 87 message = f"Error getting proxies from {self.__class__.__name__}" 88 message += f"Request returned status code: {response.status_code}" 89 self.logger.error(message) 90 91 return proxies 92 93 @abstractmethod 94 def _parse_request(self, response: requests.Response) -> List[str]: 95 ''' 96 Abstract method to parse the response from the request 97 98 Parameters 99 ---------- 100 response : requests.Response 101 The response from the request 102 103 Raises 104 ------ 105 NotImplementedError 106 If the method is not implemented in the subclass 107 108 Returns 109 ------- 110 List[str] 111 A list of proxies 112 ''' 113 114 raise NotImplementedError 115 116 117
class
ProxyBuilder(abc.ABC):
11class ProxyBuilder(ABC): 12 ''' 13 Abstract class to represent a proxy builder 14 15 .. warning:: 16 This class can't be instantiated directly, use a subclass instead 17 18 19 - :class:`olivia_finder.myrequests.proxy_builders.ssl_proxies.SSLProxiesBuilder` 20 - :class:`olivia_finder.myrequests.proxy_builders.list_builder.ListProxyBuilder` 21 22 ''' 23 24 25 26 def __init__(self, url: Optional[str] = None, request_timeout: Optional[int] = None) -> None: 27 ''' 28 Constructor 29 30 Parameters 31 ---------- 32 url : str 33 URL of the proxy list website to get the proxies 34 request_timeout : int 35 Timeout for the proxy list requests 36 ''' 37 38 # Get logger name from config file 39 self.logger = MyLogger.get_logger('logger_myrequests') 40 41 # Check if this clas is not instantiated directly 42 if self.__class__ == ProxyBuilder: 43 raise TypeError("ProxyBuilder can't be instantiated directly, use a subclass instead") 44 45 # URL of the proxy list website to get the proxies 46 if url is None: 47 raise ValueError("url parameter can't be None") 48 self.url = url 49 50 # Timeout for the proxy list requests 51 self.request_timeout = request_timeout 52 53 def get_proxies(self) -> List[str]: 54 ''' 55 Get the proxies from the website, parse the response and return a list of proxies 56 57 Returns 58 ------- 59 List[str] 60 A list of proxies 61 62 Raises 63 ------ 64 Exception 65 If there is an error getting the proxies 66 67 Examples 68 -------- 69 >>> from proxy_builders.ssl_proxies import SSLProxies 70 >>> ssl_proxies = SSLProxies() 71 >>> proxies = ssl_proxies.get_proxies() 72 ''' 73 74 # Do the request, if there is an error return an empty list 75 try: 76 response = requests.get(self.url, timeout=self.request_timeout) 77 except Exception as e: 78 message = f"Error getting proxies from {self.__class__.__name__}: {e}" 79 message += f"URL: {self.url}" 80 self.logger.error(message) 81 return [] 82 83 # Reesponse is OK, parse the response and return the proxies 84 proxies = [] 85 if response.status_code == 200: 86 proxies = self._parse_request(response) 87 self.logger.debug(f"Found {len(proxies)} proxies from {self.__class__.__name__}") 88 else: 89 message = f"Error getting proxies from {self.__class__.__name__}" 90 message += f"Request returned status code: {response.status_code}" 91 self.logger.error(message) 92 93 return proxies 94 95 @abstractmethod 96 def _parse_request(self, response: requests.Response) -> List[str]: 97 ''' 98 Abstract method to parse the response from the request 99 100 Parameters 101 ---------- 102 response : requests.Response 103 The response from the request 104 105 Raises 106 ------ 107 NotImplementedError 108 If the method is not implemented in the subclass 109 110 Returns 111 ------- 112 List[str] 113 A list of proxies 114 ''' 115 116 raise NotImplementedError
Abstract class to represent a proxy builder
This class can't be instantiated directly, use a subclass instead
ProxyBuilder(url: Optional[str] = None, request_timeout: Optional[int] = None)
26 def __init__(self, url: Optional[str] = None, request_timeout: Optional[int] = None) -> None: 27 ''' 28 Constructor 29 30 Parameters 31 ---------- 32 url : str 33 URL of the proxy list website to get the proxies 34 request_timeout : int 35 Timeout for the proxy list requests 36 ''' 37 38 # Get logger name from config file 39 self.logger = MyLogger.get_logger('logger_myrequests') 40 41 # Check if this clas is not instantiated directly 42 if self.__class__ == ProxyBuilder: 43 raise TypeError("ProxyBuilder can't be instantiated directly, use a subclass instead") 44 45 # URL of the proxy list website to get the proxies 46 if url is None: 47 raise ValueError("url parameter can't be None") 48 self.url = url 49 50 # Timeout for the proxy list requests 51 self.request_timeout = request_timeout
Constructor
Parameters
- url (str): URL of the proxy list website to get the proxies
- request_timeout (int): Timeout for the proxy list requests
def
get_proxies(self) -> List[str]:
53 def get_proxies(self) -> List[str]: 54 ''' 55 Get the proxies from the website, parse the response and return a list of proxies 56 57 Returns 58 ------- 59 List[str] 60 A list of proxies 61 62 Raises 63 ------ 64 Exception 65 If there is an error getting the proxies 66 67 Examples 68 -------- 69 >>> from proxy_builders.ssl_proxies import SSLProxies 70 >>> ssl_proxies = SSLProxies() 71 >>> proxies = ssl_proxies.get_proxies() 72 ''' 73 74 # Do the request, if there is an error return an empty list 75 try: 76 response = requests.get(self.url, timeout=self.request_timeout) 77 except Exception as e: 78 message = f"Error getting proxies from {self.__class__.__name__}: {e}" 79 message += f"URL: {self.url}" 80 self.logger.error(message) 81 return [] 82 83 # Reesponse is OK, parse the response and return the proxies 84 proxies = [] 85 if response.status_code == 200: 86 proxies = self._parse_request(response) 87 self.logger.debug(f"Found {len(proxies)} proxies from {self.__class__.__name__}") 88 else: 89 message = f"Error getting proxies from {self.__class__.__name__}" 90 message += f"Request returned status code: {response.status_code}" 91 self.logger.error(message) 92 93 return proxies
Get the proxies from the website, parse the response and return a list of proxies
Returns
- List[str]: A list of proxies
Raises
- Exception: If there is an error getting the proxies
Examples
>>> from proxy_builders.ssl_proxies import SSLProxies
>>> ssl_proxies = SSLProxies()
>>> proxies = ssl_proxies.get_proxies()