1from threading import Lock
2from typing import List, Optional, Union
3from .proxy_builders.proxy_builder import ProxyBuilder
4from .proxy_builders.ssl_proxies import SSLProxiesBuilder
5from .proxy_builders.list_builder import ListProxyBuilder
6from ..utilities.logger import MyLogger
7from ..utilities.singleton_decorator import singleton
8from ..utilities.config import Configuration
9
10@singleton
11class ProxyHandler():
12 '''
13 ProxyHandler class, handles the proxies, gets them from the builders and rotates them
14
15 Parameters
16 ----------
17 builders : List[ProxyBuilder]
18 List of proxy builders to get the proxies, if None, get the default builders
19 proxy_max_uses : int
20 Maximum number of uses for a proxy before it is removed from the list
21
22
23 Attributes
24 ----------
25 proxy_list : List[str]
26 List of proxies, each proxy is a string in the format ip:port, has to be accessed with a lock
27 proxy_uses : dict
28 A dictionary with the number of uses for each proxy, uses the proxy as key, has to be accessed with a lock
29 proxy_builders : List[ProxyBuilder]
30 List of proxy builders to get the proxies, has to be accessed with a lock
31 lock : threading.Lock
32 Lock to prevent concurrent access to the proxy list
33
34 Raises
35 ------
36 ValueError
37 If there is no valid proxy builder
38
39 '''
40
41 PROXY_MAX_USES = 50
42
43 def __init__(
44 self,
45 builders: Optional[List[ProxyBuilder]] = None,
46 proxy_max_uses: int = PROXY_MAX_USES
47 ):
48 '''Constructor'''
49
50 # Lock to prevent concurrent access to the proxy list
51 self.lock = Lock()
52
53 # Get logger name from config file
54 self.logger = MyLogger.get_logger("logger_myrequests")
55
56
57 # Set attributes
58 self.proxy_max_uses = proxy_max_uses # override default value
59 self.proxy_list: List[str] = [] # proxy_list is a list str with the proxies
60 self.proxy_uses: dict = {} # A dictionary with the number of uses for each proxy, uses the proxy as key
61 self.proxy_builders: List[ProxyBuilder] = [] # List of proxy builders
62
63 # Set proxy builders, if none, get default builders
64 if builders is None:
65 self.proxy_builders = self._get_available_builders()
66 else:
67 # Check if builders are valid
68 for builder in builders:
69 # if any of the builders is not valid, ignore and continue with the next one
70 if not isinstance(builder, ProxyBuilder):
71 self.logger.debug(f"Builder {builder} is not a valid ProxyBuilderABC, ignoring")
72 else:
73 self.logger.debug(f"Builder {builder} is valid, adding to list")
74 self.proxy_builders.append(builder)
75
76 # Check if any builder is valid
77 if len(self.proxy_builders) == 0:
78 self.logger.debug("No valid proxy builders were provided")
79 raise ValueError("No valid proxy builders were provided")
80
81 # Set builders
82 self.proxy_builders = builders
83
84 # Get proxies from builder
85 self.proxy_list = self._request_proxies()
86 self.logger.info(f"Proxy Handler initialized with {len(self.proxy_list)} proxies")
87
88 def get_next_proxy(self) -> Union[str, None]:
89 '''
90 Get the next proxy from the proxies list and rotate it to the end of the list
91
92 Returns
93 -------
94 str
95 The next proxy
96
97 Examples
98 --------
99 >>> from proxy_handler import ProxyHandler
100 >>> proxy_handler = ProxyHandler()
101 >>> proxy = proxy_handler.get_next_proxy()
102 'http://1.1.1.1:8080'
103 '''
104
105 # Check if proxies are empty and get new ones
106 if len(self.proxy_list) == 0:
107 self.logger.info("No proxies available, trying to get new ones")
108 self.proxy_list = self._request_proxies()
109
110 # Check if proxies are still empty
111 if len(self.proxy_list) == 0:
112 self.logger.warning("No proxies available after trying to get new ones")
113 return None
114
115 # proxy rotation
116 proxy = self.proxy_list.pop(0)
117 self.proxy_list.append(proxy)
118 self.logger.debug(f"Next proxy: {proxy}")
119
120 # Handle proxy usage lifetime
121 self._handle_lifetime(proxy)
122
123 # return proxy as fstring
124 return f"http://{proxy}"
125
126 def _handle_lifetime(self, proxy: str):
127 '''
128 Handle proxy usage lifetime, if proxy has been used more than the limit it is removed from the list
129 Removes the proxy from the dict and the list
130
131 Parameters
132 ----------
133 proxy : str
134 Proxy as str f'http://{ip}:{port}'
135 '''
136 # add proxy to dict if it is not there or increase its usage
137 if proxy not in self.proxy_uses:
138 self.proxy_uses[proxy] = 1
139 else:
140 self.proxy_uses[proxy] += 1
141
142 # remove proxy if it has been used more than the limit
143 if self.proxy_uses[proxy] > self.proxy_max_uses:
144 del self.proxy_uses[proxy]
145 self.proxy_list.remove(proxy)
146 self.logger.debug(f"Proxy {proxy} removed from list")
147
148 def _request_proxies(self) -> List[str]:
149 '''
150 Get proxies from builders defined in the constructor and return a list of unique proxies
151
152 Returns
153 -------
154 List[str]
155 List of proxies as str f'http://{ip}:{port}'
156 '''
157 # Load the proxie list from the builders
158 proxies = []
159 for builder in self.proxy_builders:
160 proxies += builder.get_proxies()
161
162 # remove duplicates
163 proxies = list(set(proxies))
164 self.logger.debug(f"Proxies len: {len(proxies)}")
165
166 return proxies
167
168 def _get_available_builders(self) -> List[ProxyBuilder]:
169 '''
170 Get available proxy builders as a list of ProxyBuilder objects
171
172 Returns
173 -------
174 Union[List[ProxyBuilder], None]
175 List of ProxyBuilder objects or None if there are no builders
176
177 '''
178
179 # Get available builders
180 return [
181 SSLProxiesBuilder(),
182 ListProxyBuilder(url="https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt"),
183 ListProxyBuilder(url="https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt")
184 ]
185
186
187