#!/usr/bin/env python3 "A Privoxy Helper Program" __author__ = 'phoenix' __version__ = 'v1.4' from socketserver import ThreadingMixIn from http.server import HTTPServer from urllib.parse import urlparse from proxy import Counter, ProxyRequestHandler, get_cert from urllib3.contrib.socks import SOCKSProxyManager import os import time import configparser import fnmatch import logging import threading import ssl import urllib3 import argparse import collections _name = 'privoxy-tls' class LoadConfig: def __init__(self, configfile): self.config = configparser.ConfigParser(allow_no_value=True, inline_comment_prefixes=('#',)) self.config.read(configfile) self.PROXADDR = self.config['General'].get('ProxAddr') self.FRONTPORT = int(self.config['General'].get('FrontPort')) self.REARPORT = int(self.config['General'].get('RearPort')) self.GeneralPROXY = self.config['General'].get('DefaultProxy') self.LOGLEVEL = self.config['General'].get('LogLevel') self.CA = self.config['General'].get('CACert') self.CERTDIR = self.config['General'].get('CertDir') class ConnectionPools: """ self.pools is a list of {'proxy': 'http://127.0.0.1:8080', 'pool': urllib3.ProxyManager() object, 'patterns': ['ab.com', 'bc.net', ...]} self.getpool() is a method that returns pool based on host matching """ sslparams = dict( cert_reqs="REQUIRED", ca_certs='/etc/ssl/certs/ca-bundle.crt') timeout = urllib3.util.timeout.Timeout(connect=90.0, read=90.0) def __init__(self, config): self.file = config self.file_timestamp = os.path.getmtime(config) self.loadConfig() def loadConfig(self): # self.conf has to be inited each time for reloading self.conf = configparser.ConfigParser( allow_no_value=True, delimiters=('=',), inline_comment_prefixes=('#',)) self.conf.read(self.file) self.pools = [] proxy_sections = [section for section in self.conf.sections() if section.startswith('Proxy')] for section in proxy_sections: proxy = section.split()[1] self.pools.append(dict(proxy=proxy, pool=self.setProxyPool(proxy), patterns=list(self.conf[section].keys()))) default_proxy = self.conf['General'].get('DefaultProxy') if default_proxy: default_pool = self.setProxyPool(default_proxy) else: default_pool = [ urllib3.PoolManager(num_pools=10, maxsize=8, timeout=self.timeout, **self.sslparams), urllib3.PoolManager(num_pools=10, maxsize=8, timeout=self.timeout)] self.pools.append(dict(proxy=default_proxy, pool=default_pool, patterns='*')) # handle missing sections sections = collections.defaultdict(dict) for name in self.conf.sections(): sections[name] = self.conf[name] self.noverifylist = list(sections['TLS NoVerify'].keys()) self.sslpasslist = list(sections['TLS Passthru'].keys()) self.blacklist = list(sections['Blacklist'].keys()) self.bypasslist = list(sections['Bypass URL'].keys()) def reloadConfig(self): while True: mtime = os.path.getmtime(self.file) if mtime > self.file_timestamp: self.file_timestamp = mtime self.loadConfig() logger.info("*" * 20 + " CONFIG RELOADED " + "*" * 20) time.sleep(1) def getpool(self, host, httpmode=False): things = (fnmatch.fnmatch(host, pattern) for pattern in self.noverifylist) noverify = True if httpmode or any(things) else False logger.debug(f'host: {host}, noverify: {noverify}') for pool in self.pools: things = (fnmatch.fnmatch(host, pattern) for pattern in pool['patterns']) if any(things): return pool['proxy'], pool['pool'][noverify], noverify def setProxyPool(self, proxy): scheme = proxy.split(':')[0] if scheme in ('http', 'https'): ProxyManager = urllib3.ProxyManager elif scheme in ('socks4', 'socks5'): ProxyManager = SOCKSProxyManager else: print("Wrong Proxy Format: " + proxy) print("Proxy should start with http/https/socks4/socks5 .") input() raise SystemExit # maxsize is the max. number of connections to the same server return [ ProxyManager(proxy, num_pools=10, maxsize=8, timeout=self.timeout, **self.sslparams), ProxyManager(proxy, num_pools=10, maxsize=8, timeout=self.timeout)] class FrontServer(ThreadingMixIn, HTTPServer): """Handle requests in a separate thread.""" pass class RearServer(ThreadingMixIn, HTTPServer): """Handle requests in a separate thread.""" pass class FrontRequestHandler(ProxyRequestHandler): """ Sit between the client and Privoxy Convert https request to http """ server_version = "%s front/%s" % (_name, __version__) def do_CONNECT(self): "Descrypt https request and dispatch to http handler" # request line: CONNECT www.example.com:443 HTTP/1.1 self.host, self.port = self.path.split(":") self.proxy, self.pool, self.noverify = pools.getpool(self.host) things = (fnmatch.fnmatch(self.host, pattern) for pattern in pools.blacklist) if any(things): # BLACK LIST self.deny_request() logger.info("%03d " % self.reqNum + 'Denied by blacklist: s' % self.host) elif any((fnmatch.fnmatch(self.host, pattern) for pattern in pools.sslpasslist)): # SSL Pass-Thru if self.proxy and self.proxy.startswith('https'): self.forward_to_https_proxy() elif self.proxy and self.proxy.startswith('socks5'): self.forward_to_socks5_proxy() else: self.tunnel_traffic() # Upstream server or proxy of the tunnel is # closed explictly, so we close the local connection too self.close_connection = 1 else: # SSL MITM self.wfile.write(("HTTP/1.1 200 Connection established\r\n" + "Proxy-agent: %s\r\n" % self.version_string() + "\r\n").encode('ascii')) if self.host.count('.') >= 2: commonname = '.' + self.host.partition('.')[-1] else: commonname = self.host dummycert = get_cert(commonname, config) # set a flag for do_METHOD self.ssltunnel = True ssl_sock = ssl.wrap_socket(self.connection, keyfile=dummycert, certfile=dummycert, server_side=True) # Ref: Lib/socketserver.py#StreamRequestHandler.setup() self.connection = ssl_sock self.rfile = self.connection.makefile('rb', self.rbufsize) self.wfile = self.connection.makefile('wb', self.wbufsize) # dispatch to do_METHOD() self.handle_one_request() def do_METHOD(self): "Forward request to Privoxy" counter.increment_and_set(self, 'reqNum') if self.ssltunnel: # https request host = (self.host if self.port == '443' else "%s:%s" % (self.host, self.port)) url = "https://%s%s" % (host, self.path) self.bypass = any((fnmatch.fnmatch(url, pattern) for pattern in pools.bypasslist)) if not self.bypass: url = "http://%s%s" % (host, self.path) # Tag the request so Privoxy can recognize it self.headers["Tagged"] = (self.version_string() + ":%d" % self.reqNum) else: # http request self.host = urlparse(self.path).hostname if any((fnmatch.fnmatch(self.host, pattern) for pattern in pools.blacklist)): # BLACK LIST self.deny_request() logger.info("%03d " % self.reqNum + 'Denied by blacklist: %s' % self.host) return host = urlparse(self.path).netloc self.proxy, self.pool, self.noverify = pools.getpool( self.host, httpmode=True) self.bypass = any((fnmatch.fnmatch('http://' + host + urlparse(self.path).path, pattern) for pattern in pools.bypasslist)) url = self.path self.url = url pool = self.pool if self.bypass else proxpool data_length = self.headers.get("Content-Length") self.postdata = (self.rfile.read(int(data_length)) if data_length and int(data_length) > 0 else None) if self.command == "POST" and "Content-Length" not in self.headers: buffer = self.rfile.read() if buffer: logger.warning( "%03d " % self.reqNum + 'POST w/o "Content-Length" header (Bytes: %d |' ' Transfer-Encoding: %s | HTTPS: %s', len(buffer), "Transfer-Encoding" in self.headers, self.ssltunnel) # Remove hop-by-hop headers self.purge_headers(self.headers) r = None # Merge the proxy headers. Only do this in HTTP. We have to copy the # headers dict so we can safely change it without those changes being # reflected in anyone else's copy. # if self.scheme == 'http': # headers = headers.copy() # headers.update(self.proxy_headers) headers = urllib3._collections.HTTPHeaderDict(self.headers) try: # Sometimes 302 redirect would fail with "BadStatusLine" # exception, and IE11 doesn't restart the request. # retries=1 instead of retries=False fixes it. # ! Retry may cause the requests with the same reqNum appear # in the log window r = pool.urlopen( self.command, url, body=self.postdata, headers=headers, retries=1, redirect=False, preload_content=False, decode_content=False) if not self.ssltunnel: if self.bypass: prefix = '[BP]' if self.proxy else '[BD]' else: prefix = '[D]' if self.command in ("GET", "HEAD"): logger.info("%03d " % self.reqNum + '%s "%s %s" %s %s' % (prefix, self.command, url, r.status, r.getheader('Content-Length', '-'))) else: logger.info("%03d " % self.reqNum + '%s "%s %s %s" %s %s' % (prefix, self.command, url, data_length, r.status, r.getheader('Content-Length', '-'))) self.send_response_only(r.status, r.reason) # HTTPResponse.msg is easier to handle # than urllib3._collections.HTTPHeaderDict r.headers = r._original_response.msg self.purge_write_headers(r.headers) if (self.command == 'HEAD' or r.status in (100, 101, 204, 304) or r.getheader("Content-Length") == '0'): written = None else: written = self.stream_to_client(r) if ("Content-Length" not in r.headers and 'Transfer-Encoding' not in r.headers): self.close_connection = 1 # Intend to catch regular http and bypass # http/https requests exceptions # Regular https request exceptions should be handled by rear server except urllib3.exceptions.TimeoutError as e: self.sendout_error(url, 504, message="Timeout", explain=e) logger.warning("%03d " % self.reqNum + '[F] %s on "%s %s"', e, self.command, url) except (urllib3.exceptions.HTTPError,) as e: self.sendout_error(url, 502, message="HTTPError", explain=e) logger.warning("%03d " % self.reqNum + '[F] %s on "%s %s"', e, self.command, url) finally: if r: # Release the connection back into the pool r.release_conn() do_GET = do_POST = do_HEAD = do_PUT = do_DELETE = do_OPTIONS = do_METHOD class RearRequestHandler(ProxyRequestHandler): """ Supposed to be the parent proxy for Privoxy for tagged requests Convert http request to https """ server_version = "%s rear/%s" % (_name, __version__) def do_METHOD(self): "Convert http request to https" if (self.headers.get("Tagged") and self.headers["Tagged"].startswith(_name)): self.reqNum = int(self.headers["Tagged"].split(":")[1]) # Remove the tag del self.headers["Tagged"] else: self.sendout_error( self.path, 400, explain="The proxy setting of the client" " is misconfigured.\n\n" + "Please set the HTTPS proxy port to %s " % config.FRONTPORT + "and check the Docs for other settings.") logger.error("[Misconfigured HTTPS proxy port] " + self.path) return # request line: GET http://somehost.com/path?attr=value HTTP/1.1 url = "https" + self.path[4:] self.host = urlparse(self.path).hostname proxy, pool, noverify = pools.getpool(self.host) prefix = '[P]' if proxy else '[D]' data_length = self.headers.get("Content-Length") self.postdata = (self.rfile.read(int(data_length)) if data_length else None) self.purge_headers(self.headers) r = None # Below code in connectionpool.py expect the headers # to has a copy() and update() method # That's why we can't use self.headers directly when # call pool.urlopen() # # Merge the proxy headers. Only do this in HTTP. We have to copy the # headers dict so we can safely change it without those changes being # reflected in anyone else's copy. # if self.scheme == 'http': # headers = headers.copy() # headers.update(self.proxy_headers) headers = urllib3._collections.HTTPHeaderDict(self.headers) try: r = pool.urlopen( self.command, url, body=self.postdata, headers=headers, retries=1, redirect=False, preload_content=False, decode_content=False) if proxy: logger.debug('Using Proxy - %s' % proxy) if self.command in ("GET", "HEAD"): logger.info( "%03d " % self.reqNum + '%s "%s %s" %s %s' % (prefix, self.command, url, r.status, r.getheader('Content-Length', '-'))) else: logger.info( "%03d " % self.reqNum + '%s "%s %s %s" %s %s' % (prefix, self.command, url, data_length, r.status, r.getheader('Content-Length', '-'))) self.send_response_only(r.status, r.reason) # HTTPResponse.msg is easier to handle than # urllib3._collections.HTTPHeaderDict r.headers = r._original_response.msg self.purge_write_headers(r.headers) if (self.command == 'HEAD' or r.status in (100, 101, 204, 304) or r.getheader("Content-Length") == '0'): written = None else: written = self.stream_to_client(r) if ("Content-Length" not in r.headers and 'Transfer-Encoding' not in r.headers): self.close_connection = 1 except urllib3.exceptions.SSLError as e: self.sendout_error(url, 417, message="SSL Certificate Failed", explain=e) logger.error("%03d " % self.reqNum + "[SSL Certificate Error] " + url) except urllib3.exceptions.TimeoutError as e: self.sendout_error(url, 504, message="Timeout", explain=e) logger.warning("%03d " % self.reqNum + '[R]%s "%s %s" %s', prefix, self.command, url, e) except (urllib3.exceptions.HTTPError,) as e: self.sendout_error(url, 502, message="HTTPError", explain=e) logger.warning("%03d " % self.reqNum + '[R]%s "%s %s" %s', prefix, self.command, url, e) finally: if r: # Release the connection back into the pool r.release_conn() do_GET = do_POST = do_HEAD = do_PUT = do_DELETE = do_OPTIONS = do_METHOD def main(): urllib3.disable_warnings() logger.setLevel(getattr(logging, config.LOGLEVEL, logging.INFO)) handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s %(message)s', datefmt='[%H:%M]') handler.setFormatter(formatter) logger.addHandler(handler) frontserver = FrontServer(('', config.FRONTPORT), FrontRequestHandler) rearserver = RearServer(('', config.REARPORT), RearRequestHandler) frontserver.config = config for worker in (frontserver.serve_forever, rearserver.serve_forever, pools.reloadConfig): thread = threading.Thread(target=worker) thread.daemon = True thread.start() print("=" * 40) print('%s %s (urllib3/%s)' % (_name, __version__, urllib3.__version__)) print('Front : localhost:%s' % config.FRONTPORT) print('Privoxy :', config.PROXADDR) print('Rear : localhost:%s' % config.REARPORT) print('Parent : %s' % config.GeneralPROXY) print("=" * 40) while True: time.sleep(1) if __name__ == '__main__': parser = argparse.ArgumentParser('Privoxy TLS proxy wrapper.') parser.add_argument('-c', '--config', type=argparse.FileType('r'), default='config.ini', help='Privoxy TLS configuration file.') args = parser.parse_args() # globals CONFIG = args.config.name logger = logging.getLogger(__name__) config = LoadConfig(CONFIG) proxpool = urllib3.ProxyManager( config.PROXADDR, num_pools=10, maxsize=8, timeout=urllib3.util.timeout.Timeout( connect=90.0, read=310.0)) pools = ConnectionPools(CONFIG) counter = Counter() try: main() except KeyboardInterrupt: print("Quitting...")