#!/usr/bin/env python3 "A Privoxy Helper Program" __author__ = 'phoenix' __version__ = 'v1.4' from socketserver import ThreadingMixIn from http.server import HTTPServer from urllib.parse import urlparse from proxy import Counter, ProxyRequestHandler, get_cert from urllib3.contrib.socks import SOCKSProxyManager import os import time import configparser import fnmatch import logging import threading import ssl import urllib3 import argparse import collections _name = 'privoxy-tls' class LoadConfig: def __init__(self, configfile): self.config = configparser.ConfigParser( allow_no_value=True, delimiters=('=',), inline_comment_prefixes=('#',)) self.config.read(configfile) self.proxy_name = self.config['General'].get('ProxAddr') self.front_name = self.config['General'].get('FrontAddr', 'localhost') self.rear_name = self.config['General'].get('RearAddr', 'localhost') self.front_port = int(self.config['General'].get('FrontPort')) self.rear_port = int(self.config['General'].get('RearPort')) self.proxy = self.config['General'].get('DefaultProxy') self.loglevel = self.config['General'].get('LogLevel') self.ca = self.config['General'].get('CACert') self.certdir = self.config['General'].get('CertDir') class ConnectionPools: """ self.pools is a list of {'proxy': 'http://127.0.0.1:8080', 'pool': urllib3.ProxyManager() object, 'patterns': ['ab.com', 'bc.net', ...]} self.getpool() is a method that returns pool based on host matching """ sslparams = dict( cert_reqs="REQUIRED", ca_certs='/etc/ssl/certs/ca-bundle.crt') timeout = urllib3.util.timeout.Timeout(connect=90.0, read=90.0) def __init__(self, config): self.file = config self.file_timestamp = os.path.getmtime(config) self.loadConfig() def loadConfig(self): # self.conf has to be inited each time for reloading self.conf = configparser.ConfigParser( allow_no_value=True, delimiters=('=',), inline_comment_prefixes=('#',)) self.conf.read(self.file) self.pools = [] proxy_sections = [section for section in self.conf.sections() if section.startswith('Proxy')] for section in proxy_sections: proxy = section.split()[1] self.pools.append(dict(proxy=proxy, pool=self.setProxyPool(proxy), patterns=list(self.conf[section].keys()))) default_proxy = self.conf['General'].get('DefaultProxy') if default_proxy: default_pool = self.setProxyPool(default_proxy) else: default_pool = [ urllib3.PoolManager(num_pools=10, maxsize=8, timeout=self.timeout, **self.sslparams), urllib3.PoolManager(num_pools=10, maxsize=8, timeout=self.timeout)] self.pools.append(dict(proxy=default_proxy, pool=default_pool, patterns='*')) # handle missing sections sections = collections.defaultdict(dict) for name in self.conf.sections(): sections[name] = self.conf[name] self.noverifylist = list(sections['TLS NoVerify'].keys()) self.sslpasslist = list(sections['TLS Passthru'].keys()) self.blacklist = list(sections['Blacklist'].keys()) self.bypasslist = list(sections['Bypass URL'].keys()) def reloadConfig(self): while True: mtime = os.path.getmtime(self.file) if mtime > self.file_timestamp: self.file_timestamp = mtime self.loadConfig() logger.info("*" * 20 + " CONFIG RELOADED " + "*" * 20) time.sleep(1) def getpool(self, host, httpmode=False): things = (fnmatch.fnmatch(host, pattern) for pattern in self.noverifylist) noverify = True if httpmode or any(things) else False logger.debug(f'host: {host}, noverify: {noverify}') for pool in self.pools: things = (fnmatch.fnmatch(host, pattern) for pattern in pool['patterns']) if any(things): return pool['proxy'], pool['pool'][noverify], noverify def setProxyPool(self, proxy): scheme = proxy.split(':')[0] if scheme in ('http', 'https'): ProxyManager = urllib3.ProxyManager elif scheme in ('socks4', 'socks5'): ProxyManager = SOCKSProxyManager else: print("Wrong Proxy Format: " + proxy) print("Proxy should start with http/https/socks4/socks5 .") input() raise SystemExit # maxsize is the max. number of connections to the same server return [ ProxyManager(proxy, num_pools=10, maxsize=8, timeout=self.timeout, **self.sslparams), ProxyManager(proxy, num_pools=10, maxsize=8, timeout=self.timeout)] class FrontServer(ThreadingMixIn, HTTPServer): """Handle requests in a separate thread.""" pass class RearServer(ThreadingMixIn, HTTPServer): """Handle requests in a separate thread.""" pass class FrontRequestHandler(ProxyRequestHandler): """ Sit between the client and Privoxy Convert https request to http """ server_version = f'{_name} front/{__version__}' def do_CONNECT(self): "Descrypt https request and dispatch to http handler" # request line: CONNECT www.example.com:443 HTTP/1.1 self.host, self.port = self.path.split(":") self.proxy, self.pool, self.noverify = pools.getpool(self.host) things = (fnmatch.fnmatch(self.host, pattern) for pattern in pools.blacklist) if any(things): # blacklist self.deny_request() logger.info('{:03d} denied by blacklist: {}'.format( self.reqNum, self.host)) elif any((fnmatch.fnmatch(self.host, pattern) for pattern in pools.sslpasslist)): # TLS passthru if self.proxy and self.proxy.startswith('https'): self.forward_to_https_proxy() elif self.proxy and self.proxy.startswith('socks5'): self.forward_to_socks5_proxy() else: self.tunnel_traffic() # Upstream server or proxy of the tunnel is # closed explictly, so we close the local connection too self.close_connection = 1 else: # TLS MITM self.wfile.write(('HTTP/1.1 200 Connection established\r\n' f'Proxy-agent: {self.version_string()}\r\n' '\r\n').encode('ascii')) if self.host.count('.') >= 2: commonname = '.' + self.host.partition('.')[-1] else: commonname = self.host dummycert = get_cert(commonname, config) # set a flag for do_METHOD self.ssltunnel = True ssl_sock = ssl.wrap_socket(self.connection, keyfile=dummycert, certfile=dummycert, server_side=True) # Ref: Lib/socketserver.py#StreamRequestHandler.setup() self.connection = ssl_sock self.rfile = self.connection.makefile('rb', self.rbufsize) self.wfile = self.connection.makefile('wb', self.wbufsize) # dispatch to do_METHOD() self.handle_one_request() def do_METHOD(self): "Forward request to Privoxy" counter.increment_and_set(self, 'reqNum') if self.ssltunnel: # https request host = (self.host if self.port == '443' else ':'.join((self.host, self.port))) url = 'https://' + ''.join((host, self.path)) self.bypass = any((fnmatch.fnmatch(url, pattern) for pattern in pools.bypasslist)) if not self.bypass: url = 'http://' + ''.join((host, self.path)) # Tag the request so Privoxy can recognize it self.headers["Tagged"] = (self.version_string() + f':{self.reqNum:d}') else: # http request self.host = urlparse(self.path).hostname if any((fnmatch.fnmatch(self.host, pattern) for pattern in pools.blacklist)): # blacklist self.deny_request() logger.info('{:03d} denied by blacklist: {}'.format( self.reqNum, self.host)) return host = urlparse(self.path).netloc self.proxy, self.pool, self.noverify = pools.getpool( self.host, httpmode=True) self.bypass = any((fnmatch.fnmatch('http://' + host + urlparse(self.path).path, pattern) for pattern in pools.bypasslist)) url = self.path self.url = url pool = self.pool if self.bypass else proxpool data_length = self.headers.get("Content-Length") self.postdata = (self.rfile.read(int(data_length)) if data_length and int(data_length) > 0 else None) if self.command == "POST" and "Content-Length" not in self.headers: buffer = self.rfile.read() if buffer: logger.warning( '{:03d} ' 'POST w/o "Content-Length" header (Bytes: {} | ' 'Transfer-Encoding: {} | ' 'HTTPS: {}'.format( self.reqNum, len(buffer), "Transfer-Encoding" in self.headers, self.ssltunnel)) # Remove hop-by-hop headers self.purge_headers(self.headers) r = None # Merge the proxy headers. Only do this in HTTP. We have to copy the # headers dict so we can safely change it without those changes being # reflected in anyone else's copy. # if self.scheme == 'http': # headers = headers.copy() # headers.update(self.proxy_headers) headers = urllib3._collections.HTTPHeaderDict(self.headers) try: # Sometimes 302 redirect would fail with "BadStatusLine" # exception, and IE11 doesn't restart the request. # retries=1 instead of retries=False fixes it. # ! Retry may cause the requests with the same reqNum appear # in the log window r = pool.urlopen( self.command, url, body=self.postdata, headers=headers, retries=1, redirect=False, preload_content=False, decode_content=False) if not self.ssltunnel: if self.bypass: prefix = '[BP]' if self.proxy else '[BD]' else: prefix = '[D]' if self.command in ("GET", "HEAD"): logger.info('{:03d} {} "{} {}" {} {}'.format( self.reqNum, prefix, self.command, url, r.status, r.getheader('Content-Length', '-'))) else: logger.info('{:03d} {} "{} {} {}" {} {}'.format( self.reqNum, prefix, self.command, url, data_length, r.status, r.getheader('Content-Length', '-'))) self.send_response_only(r.status, r.reason) # HTTPResponse.msg is easier to handle # than urllib3._collections.HTTPHeaderDict r.headers = r._original_response.msg self.purge_write_headers(r.headers) if (self.command == 'HEAD' or r.status in (100, 101, 204, 304) or r.getheader("Content-Length") == '0'): written = None else: written = self.stream_to_client(r) if ("Content-Length" not in r.headers and 'Transfer-Encoding' not in r.headers): self.close_connection = 1 # Intend to catch regular http and bypass # http/https requests exceptions # Regular https request exceptions should be handled by rear server except urllib3.exceptions.TimeoutError as e: self.sendout_error(url, 504, message="Timeout", explain=e) logger.warning(f'{self.reqNum:03d} [F] {e} on ' f'"{self.command} {url}"') except (urllib3.exceptions.HTTPError,) as e: self.sendout_error(url, 502, message="HTTP Error", explain=e) logger.warning(f'{self.reqNum:03d} [F] {e} on ' f'"{self.command} {url}"') finally: if r: # Release the connection back into the pool r.release_conn() do_GET = do_POST = do_HEAD = do_PUT = do_DELETE = do_OPTIONS = do_METHOD class RearRequestHandler(ProxyRequestHandler): """ Supposed to be the parent proxy for Privoxy for tagged requests Convert http request to https """ server_version = f'{_name} rear/{__version__}' def do_METHOD(self): "Convert http request to https" if (self.headers.get("Tagged") and self.headers["Tagged"].startswith(_name)): self.reqNum = int(self.headers["Tagged"].split(":")[1]) # Remove the tag del self.headers["Tagged"] else: self.sendout_error( self.path, 400, explain='The proxy setting of the client' ' is misconfigured.\n\n' f'Please set the HTTPS proxy port to {config.FRONTPORT} ' 'and check the Docs for other settings.') logger.error("[Misconfigured HTTPS proxy port] " + self.path) return # request line: GET http://somehost.com/path?attr=value HTTP/1.1 url = "https" + self.path[4:] self.host = urlparse(self.path).hostname proxy, pool, noverify = pools.getpool(self.host) prefix = '[P]' if proxy else '[D]' data_length = self.headers.get("Content-Length") self.postdata = (self.rfile.read(int(data_length)) if data_length else None) self.purge_headers(self.headers) r = None # Below code in connectionpool.py expect the headers # to has a copy() and update() method # That's why we can't use self.headers directly when # call pool.urlopen() # # Merge the proxy headers. Only do this in HTTP. We have to copy the # headers dict so we can safely change it without those changes being # reflected in anyone else's copy. # if self.scheme == 'http': # headers = headers.copy() # headers.update(self.proxy_headers) headers = urllib3._collections.HTTPHeaderDict(self.headers) try: r = pool.urlopen( self.command, url, body=self.postdata, headers=headers, retries=1, redirect=False, preload_content=False, decode_content=False) if proxy: logger.debug('Using Proxy - ' + proxy) if self.command in ("GET", "HEAD"): logger.info('{:03d} {} "{} {}" {} {}'.format( self.reqNum, prefix, self.command, url, r.status, r.getheader('Content-Length', '-'))) else: logger.info('{:03d} {} "{} {} {}" {} {}'.format( self.reqNum, prefix, self.command, url, data_length, r.status, r.getheader('Content-Length', '-'))) self.send_response_only(r.status, r.reason) # HTTPResponse.msg is easier to handle than # urllib3._collections.HTTPHeaderDict r.headers = r._original_response.msg self.purge_write_headers(r.headers) if (self.command == 'HEAD' or r.status in (100, 101, 204, 304) or r.getheader("Content-Length") == '0'): written = None else: written = self.stream_to_client(r) if ("Content-Length" not in r.headers and 'Transfer-Encoding' not in r.headers): self.close_connection = 1 except urllib3.exceptions.SSLError as e: self.sendout_error(url, 417, message='TLS Certificate Failed', explain=e) logger.error(f'{self.reqNum:03d} [TLS Certificate Error] {url}') except urllib3.exceptions.TimeoutError as e: self.sendout_error(url, 504, message='Timeout', explain=e) logger.warning(f'{self.reqNum:03d} [R]{prefix} ' f'"{self.command} {url}" {e}') except (urllib3.exceptions.HTTPError,) as e: self.sendout_error(url, 502, message="HTTP Error", explain=e) logger.warning(f'{self.reqNum:03d} [R]{prefix} ' f'"{self.command} {url}" {e}') finally: if r: # Release the connection back into the pool r.release_conn() do_GET = do_POST = do_HEAD = do_PUT = do_DELETE = do_OPTIONS = do_METHOD def main(): urllib3.disable_warnings() logger.setLevel(getattr(logging, config.loglevel, logging.INFO)) handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s %(message)s', datefmt='[%H:%M]') handler.setFormatter(formatter) logger.addHandler(handler) frontserver = FrontServer((config.front_name, config.front_port), FrontRequestHandler) rearserver = RearServer((config.rear_name, config.rear_port), RearRequestHandler) for worker in (frontserver.serve_forever, rearserver.serve_forever, pools.reloadConfig): thread = threading.Thread(target=worker) thread.daemon = True thread.start() print('=' * 40) print(f'{_name} {__version__} (urllib3/{urllib3.__version__})') print(f'Front : {config.front_name}:{config.front_port}') print(f'Privoxy : {config.proxy_name}') print(f'Rear : {config.rear_name}:{config.rear_port}') print(f'Proxy : {config.proxy}') print('=' * 40) while True: time.sleep(1) if __name__ == '__main__': parser = argparse.ArgumentParser('Privoxy TLS proxy wrapper.') parser.add_argument('-c', '--config', type=argparse.FileType('r'), default='config.ini', help='Privoxy TLS configuration file.') args = parser.parse_args() # globals CONFIG = args.config.name logger = logging.getLogger(__name__) config = LoadConfig(CONFIG) proxpool = urllib3.ProxyManager( config.proxy_name, num_pools=10, maxsize=8, timeout=urllib3.util.timeout.Timeout( connect=90.0, read=310.0)) pools = ConnectionPools(CONFIG) counter = Counter() try: main() except KeyboardInterrupt: print("Quitting...")