olivia_finder.myrequests.proxy_handler

  1from threading import Lock
  2from typing import List, Optional, Union
  3from .proxy_builders.proxy_builder import ProxyBuilder
  4from .proxy_builders.ssl_proxies import SSLProxiesBuilder
  5from .proxy_builders.list_builder import ListProxyBuilder 
  6from ..utilities.logger import MyLogger
  7from ..utilities.singleton_decorator import singleton
  8from ..utilities.config import Configuration
  9
 10@singleton
 11class ProxyHandler():
 12    '''
 13    ProxyHandler class, handles the proxies, gets them from the builders and rotates them
 14
 15    Parameters
 16    ----------
 17    builders : List[ProxyBuilder]
 18        List of proxy builders to get the proxies, if None, get the default builders
 19    proxy_max_uses : int
 20        Maximum number of uses for a proxy before it is removed from the list
 21
 22        
 23    Attributes
 24    ----------
 25    proxy_list : List[str]
 26        List of proxies, each proxy is a string in the format ip:port, has to be accessed with a lock
 27    proxy_uses : dict
 28        A dictionary with the number of uses for each proxy, uses the proxy as key, has to be accessed with a lock
 29    proxy_builders : List[ProxyBuilder]
 30        List of proxy builders to get the proxies, has to be accessed with a lock
 31    lock : threading.Lock
 32        Lock to prevent concurrent access to the proxy list
 33
 34    Raises
 35    ------
 36    ValueError
 37        If there is no valid proxy builder   
 38
 39    '''
 40
 41    PROXY_MAX_USES = 50
 42
 43    def __init__(
 44        self,
 45        builders: Optional[List[ProxyBuilder]] = None,
 46        proxy_max_uses: int = PROXY_MAX_USES
 47    ):
 48        '''Constructor'''
 49
 50        # Lock to prevent concurrent access to the proxy list
 51        self.lock = Lock()
 52
 53        # Get logger name from config file
 54        self.logger = MyLogger.get_logger("logger_myrequests")
 55        
 56
 57        # Set attributes
 58        self.proxy_max_uses = proxy_max_uses             # override default value
 59        self.proxy_list: List[str] = []                  # proxy_list is a list str with the proxies
 60        self.proxy_uses: dict = {}                       # A dictionary with the number of uses for each proxy, uses the proxy as key
 61        self.proxy_builders: List[ProxyBuilder] = []     # List of proxy builders
 62
 63        # Set proxy builders, if none, get default builders
 64        if builders is None:                                
 65            self.proxy_builders = self._get_available_builders()
 66        else:
 67            # Check if builders are valid
 68            for builder in builders:
 69                # if any of the builders is not valid, ignore and continue with the next one
 70                if not isinstance(builder, ProxyBuilder):
 71                    self.logger.debug(f"Builder {builder} is not a valid ProxyBuilderABC, ignoring")
 72                else:
 73                    self.logger.debug(f"Builder {builder} is valid, adding to list")
 74                    self.proxy_builders.append(builder)
 75            
 76            # Check if any builder is valid
 77            if len(self.proxy_builders) == 0:
 78                self.logger.debug("No valid proxy builders were provided")
 79                raise ValueError("No valid proxy builders were provided")
 80
 81            # Set builders
 82            self.proxy_builders = builders
 83
 84        # Get proxies from builder
 85        self.proxy_list = self._request_proxies()
 86        self.logger.info(f"Proxy Handler initialized with {len(self.proxy_list)} proxies")
 87
 88    def get_next_proxy(self) -> Union[str, None]:
 89        '''
 90        Get the next proxy from the proxies list and rotate it to the end of the list
 91
 92        Returns
 93        -------
 94        str
 95            The next proxy
 96            
 97        Examples
 98        --------
 99        >>> from proxy_handler import ProxyHandler
100        >>> proxy_handler = ProxyHandler()
101        >>> proxy = proxy_handler.get_next_proxy()
102            'http://1.1.1.1:8080'
103        '''
104
105        # Check if proxies are empty and get new ones
106        if len(self.proxy_list) == 0:
107            self.logger.info("No proxies available, trying to get new ones")
108            self.proxy_list = self._request_proxies()
109
110        # Check if proxies are still empty
111        if len(self.proxy_list) == 0:
112            self.logger.warning("No proxies available after trying to get new ones")
113            return None
114
115        # proxy rotation
116        proxy = self.proxy_list.pop(0)
117        self.proxy_list.append(proxy)
118        self.logger.debug(f"Next proxy: {proxy}")
119
120        # Handle proxy usage lifetime
121        self._handle_lifetime(proxy)
122
123        # return proxy as fstring
124        return f"http://{proxy}"
125    
126    def _handle_lifetime(self, proxy: str):
127        '''
128        Handle proxy usage lifetime, if proxy has been used more than the limit it is removed from the list
129        Removes the proxy from the dict and the list
130
131        Parameters
132        ----------
133        proxy : str
134            Proxy as str f'http://{ip}:{port}'    
135        '''
136        # add proxy to dict if it is not there or increase its usage           
137        if proxy not in self.proxy_uses:
138            self.proxy_uses[proxy] = 1
139        else:
140            self.proxy_uses[proxy] += 1
141
142        # remove proxy if it has been used more than the limit
143        if self.proxy_uses[proxy] > self.proxy_max_uses:
144            del self.proxy_uses[proxy]
145            self.proxy_list.remove(proxy)
146            self.logger.debug(f"Proxy {proxy} removed from list")
147
148    def _request_proxies(self) -> List[str]:
149        '''
150        Get proxies from builders defined in the constructor and return a list of unique proxies
151        
152        Returns
153        -------
154        List[str]
155            List of proxies as str f'http://{ip}:{port}'
156        '''
157        # Load the proxie list from the builders
158        proxies = []
159        for builder in self.proxy_builders:
160            proxies += builder.get_proxies()
161
162        # remove duplicates
163        proxies = list(set(proxies))
164        self.logger.debug(f"Proxies len: {len(proxies)}")
165
166        return proxies
167    
168    def _get_available_builders(self) -> List[ProxyBuilder]:
169        '''
170        Get available proxy builders as a list of ProxyBuilder objects
171
172        Returns
173        -------
174        Union[List[ProxyBuilder], None]
175            List of ProxyBuilder objects or None if there are no builders
176
177        '''
178
179        # Get available builders
180        return [
181            SSLProxiesBuilder(),
182            ListProxyBuilder(url="https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt"),
183            ListProxyBuilder(url="https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt")
184        ]
185
186        
187