olivia_finder.myrequests.proxy_builders.proxy_builder

  1from __future__ import annotations
  2from abc import ABC, abstractmethod
  3from typing import List, Optional
  4import requests
  5
  6from ...utilities.config import Configuration
  7from ...utilities.logger import MyLogger
  8
  9class ProxyBuilder(ABC):
 10    '''
 11    Abstract class to represent a proxy builder
 12
 13    .. warning::
 14        This class can't be instantiated directly, use a subclass instead
 15
 16            
 17    - :class:`olivia_finder.myrequests.proxy_builders.ssl_proxies.SSLProxiesBuilder`
 18    - :class:`olivia_finder.myrequests.proxy_builders.list_builder.ListProxyBuilder`
 19
 20    '''
 21    
 22    
 23
 24    def __init__(self, url: Optional[str] = None, request_timeout: Optional[int] = None) -> None:
 25        '''
 26        Constructor
 27        
 28        Parameters
 29        ----------
 30        url : str
 31            URL of the proxy list website to get the proxies
 32        request_timeout : int
 33            Timeout for the proxy list requests
 34        '''
 35        
 36        # Get logger name from config file
 37        self.logger = MyLogger.get_logger('logger_myrequests')
 38
 39        # Check if this clas is not instantiated directly
 40        if self.__class__ == ProxyBuilder:
 41            raise TypeError("ProxyBuilder can't be instantiated directly, use a subclass instead")
 42
 43        # URL of the proxy list website to get the proxies
 44        if url is None:
 45            raise ValueError("url parameter can't be None")
 46        self.url = url
 47
 48        # Timeout for the proxy list requests
 49        self.request_timeout = request_timeout
 50
 51    def get_proxies(self) -> List[str]:
 52        '''
 53        Get the proxies from the website, parse the response and return a list of proxies
 54
 55        Returns
 56        -------
 57        List[str]
 58            A list of proxies
 59
 60        Raises
 61        ------
 62        Exception
 63            If there is an error getting the proxies
 64
 65        Examples
 66        --------
 67        >>> from proxy_builders.ssl_proxies import SSLProxies
 68        >>> ssl_proxies = SSLProxies()
 69        >>> proxies = ssl_proxies.get_proxies()
 70        '''
 71
 72        # Do the request, if there is an error return an empty list
 73        try:
 74            response = requests.get(self.url, timeout=self.request_timeout)
 75        except Exception as e:
 76            message = f"Error getting proxies from {self.__class__.__name__}: {e}"
 77            message += f"URL: {self.url}"
 78            self.logger.error(message)
 79            return []
 80        
 81        # Reesponse is OK, parse the response and return the proxies
 82        proxies = []
 83        if response.status_code == 200:
 84            proxies = self._parse_request(response)
 85            self.logger.debug(f"Found {len(proxies)} proxies from {self.__class__.__name__}")
 86        else:
 87            message = f"Error getting proxies from {self.__class__.__name__}"
 88            message += f"Request returned status code: {response.status_code}"
 89            self.logger.error(message)
 90
 91        return proxies
 92    
 93    @abstractmethod
 94    def _parse_request(self, response: requests.Response) -> List[str]:
 95        '''
 96        Abstract method to parse the response from the request
 97
 98        Parameters
 99        ----------
100        response : requests.Response
101            The response from the request
102
103        Raises
104        ------
105        NotImplementedError
106            If the method is not implemented in the subclass
107
108        Returns
109        -------
110        List[str]
111            A list of proxies
112        '''
113
114        raise NotImplementedError
115
116
117        
class ProxyBuilder(abc.ABC):
 11class ProxyBuilder(ABC):
 12    '''
 13    Abstract class to represent a proxy builder
 14
 15    .. warning::
 16        This class can't be instantiated directly, use a subclass instead
 17
 18            
 19    - :class:`olivia_finder.myrequests.proxy_builders.ssl_proxies.SSLProxiesBuilder`
 20    - :class:`olivia_finder.myrequests.proxy_builders.list_builder.ListProxyBuilder`
 21
 22    '''
 23    
 24    
 25
 26    def __init__(self, url: Optional[str] = None, request_timeout: Optional[int] = None) -> None:
 27        '''
 28        Constructor
 29        
 30        Parameters
 31        ----------
 32        url : str
 33            URL of the proxy list website to get the proxies
 34        request_timeout : int
 35            Timeout for the proxy list requests
 36        '''
 37        
 38        # Get logger name from config file
 39        self.logger = MyLogger.get_logger('logger_myrequests')
 40
 41        # Check if this clas is not instantiated directly
 42        if self.__class__ == ProxyBuilder:
 43            raise TypeError("ProxyBuilder can't be instantiated directly, use a subclass instead")
 44
 45        # URL of the proxy list website to get the proxies
 46        if url is None:
 47            raise ValueError("url parameter can't be None")
 48        self.url = url
 49
 50        # Timeout for the proxy list requests
 51        self.request_timeout = request_timeout
 52
 53    def get_proxies(self) -> List[str]:
 54        '''
 55        Get the proxies from the website, parse the response and return a list of proxies
 56
 57        Returns
 58        -------
 59        List[str]
 60            A list of proxies
 61
 62        Raises
 63        ------
 64        Exception
 65            If there is an error getting the proxies
 66
 67        Examples
 68        --------
 69        >>> from proxy_builders.ssl_proxies import SSLProxies
 70        >>> ssl_proxies = SSLProxies()
 71        >>> proxies = ssl_proxies.get_proxies()
 72        '''
 73
 74        # Do the request, if there is an error return an empty list
 75        try:
 76            response = requests.get(self.url, timeout=self.request_timeout)
 77        except Exception as e:
 78            message = f"Error getting proxies from {self.__class__.__name__}: {e}"
 79            message += f"URL: {self.url}"
 80            self.logger.error(message)
 81            return []
 82        
 83        # Reesponse is OK, parse the response and return the proxies
 84        proxies = []
 85        if response.status_code == 200:
 86            proxies = self._parse_request(response)
 87            self.logger.debug(f"Found {len(proxies)} proxies from {self.__class__.__name__}")
 88        else:
 89            message = f"Error getting proxies from {self.__class__.__name__}"
 90            message += f"Request returned status code: {response.status_code}"
 91            self.logger.error(message)
 92
 93        return proxies
 94    
 95    @abstractmethod
 96    def _parse_request(self, response: requests.Response) -> List[str]:
 97        '''
 98        Abstract method to parse the response from the request
 99
100        Parameters
101        ----------
102        response : requests.Response
103            The response from the request
104
105        Raises
106        ------
107        NotImplementedError
108            If the method is not implemented in the subclass
109
110        Returns
111        -------
112        List[str]
113            A list of proxies
114        '''
115
116        raise NotImplementedError

Abstract class to represent a proxy builder

This class can't be instantiated directly, use a subclass instead

ProxyBuilder(url: Optional[str] = None, request_timeout: Optional[int] = None)
26    def __init__(self, url: Optional[str] = None, request_timeout: Optional[int] = None) -> None:
27        '''
28        Constructor
29        
30        Parameters
31        ----------
32        url : str
33            URL of the proxy list website to get the proxies
34        request_timeout : int
35            Timeout for the proxy list requests
36        '''
37        
38        # Get logger name from config file
39        self.logger = MyLogger.get_logger('logger_myrequests')
40
41        # Check if this clas is not instantiated directly
42        if self.__class__ == ProxyBuilder:
43            raise TypeError("ProxyBuilder can't be instantiated directly, use a subclass instead")
44
45        # URL of the proxy list website to get the proxies
46        if url is None:
47            raise ValueError("url parameter can't be None")
48        self.url = url
49
50        # Timeout for the proxy list requests
51        self.request_timeout = request_timeout

Constructor

Parameters
  • url (str): URL of the proxy list website to get the proxies
  • request_timeout (int): Timeout for the proxy list requests
def get_proxies(self) -> List[str]:
53    def get_proxies(self) -> List[str]:
54        '''
55        Get the proxies from the website, parse the response and return a list of proxies
56
57        Returns
58        -------
59        List[str]
60            A list of proxies
61
62        Raises
63        ------
64        Exception
65            If there is an error getting the proxies
66
67        Examples
68        --------
69        >>> from proxy_builders.ssl_proxies import SSLProxies
70        >>> ssl_proxies = SSLProxies()
71        >>> proxies = ssl_proxies.get_proxies()
72        '''
73
74        # Do the request, if there is an error return an empty list
75        try:
76            response = requests.get(self.url, timeout=self.request_timeout)
77        except Exception as e:
78            message = f"Error getting proxies from {self.__class__.__name__}: {e}"
79            message += f"URL: {self.url}"
80            self.logger.error(message)
81            return []
82        
83        # Reesponse is OK, parse the response and return the proxies
84        proxies = []
85        if response.status_code == 200:
86            proxies = self._parse_request(response)
87            self.logger.debug(f"Found {len(proxies)} proxies from {self.__class__.__name__}")
88        else:
89            message = f"Error getting proxies from {self.__class__.__name__}"
90            message += f"Request returned status code: {response.status_code}"
91            self.logger.error(message)
92
93        return proxies

Get the proxies from the website, parse the response and return a list of proxies

Returns
  • List[str]: A list of proxies
Raises
  • Exception: If there is an error getting the proxies
Examples
>>> from proxy_builders.ssl_proxies import SSLProxies
>>> ssl_proxies = SSLProxies()
>>> proxies = ssl_proxies.get_proxies()