privoxy-tls/main.py

478 lines
19 KiB
Python
Raw Normal View History

2015-01-07 04:57:51 +01:00
#!/usr/bin/env python3
2019-05-07 12:06:21 +02:00
"A Privoxy Helper Program"
2015-01-07 04:57:51 +01:00
__author__ = 'phoenix'
2016-01-16 15:15:48 +01:00
__version__ = 'v1.4'
2015-01-07 04:57:51 +01:00
2019-05-07 12:06:21 +02:00
from socketserver import ThreadingMixIn
from http.server import HTTPServer
from urllib.parse import urlparse
from proxy import Counter, ProxyRequestHandler, get_cert
from urllib3.contrib.socks import SOCKSProxyManager
2015-01-07 04:57:51 +01:00
import os
import time
import configparser
import fnmatch
import logging
import threading
import ssl
import urllib3
2019-05-07 12:06:21 +02:00
import argparse
import collections
2015-01-07 04:57:51 +01:00
2019-05-07 12:06:21 +02:00
_name = 'privoxy-tls'
2015-01-07 04:57:51 +01:00
class LoadConfig:
def __init__(self, configfile):
self.config = configparser.ConfigParser(
allow_no_value=True, delimiters=('=',),
inline_comment_prefixes=('#',))
2015-01-07 04:57:51 +01:00
self.config.read(configfile)
self.proxy_name = self.config['General'].get('ProxAddr')
self.front_name = self.config['General'].get('FrontAddr', 'localhost')
self.rear_name = self.config['General'].get('RearAddr', 'localhost')
self.front_port = int(self.config['General'].get('FrontPort'))
self.rear_port = int(self.config['General'].get('RearPort'))
self.proxy = self.config['General'].get('DefaultProxy')
self.loglevel = self.config['General'].get('LogLevel')
self.ca = self.config['General'].get('CACert')
self.certdir = self.config['General'].get('CertDir')
2019-05-07 12:06:21 +02:00
2015-01-07 04:57:51 +01:00
class ConnectionPools:
"""
self.pools is a list of {'proxy': 'http://127.0.0.1:8080',
'pool': urllib3.ProxyManager() object,
'patterns': ['ab.com', 'bc.net', ...]}
self.getpool() is a method that returns pool based on host matching
"""
2019-05-07 12:06:21 +02:00
sslparams = dict(
cert_reqs="REQUIRED",
ca_certs='/etc/ssl/certs/ca-bundle.crt')
timeout = urllib3.util.timeout.Timeout(connect=90.0, read=90.0)
2015-01-07 04:57:51 +01:00
def __init__(self, config):
self.file = config
self.file_timestamp = os.path.getmtime(config)
self.loadConfig()
def loadConfig(self):
# self.conf has to be inited each time for reloading
2019-05-07 12:06:21 +02:00
self.conf = configparser.ConfigParser(
allow_no_value=True, delimiters=('=',),
inline_comment_prefixes=('#',))
2015-01-07 04:57:51 +01:00
self.conf.read(self.file)
self.pools = []
proxy_sections = [section for section in self.conf.sections()
2019-05-07 12:06:21 +02:00
if section.startswith('Proxy')]
2015-01-07 04:57:51 +01:00
for section in proxy_sections:
proxy = section.split()[1]
self.pools.append(dict(proxy=proxy,
2016-01-16 15:15:48 +01:00
pool=self.setProxyPool(proxy),
2015-01-07 04:57:51 +01:00
patterns=list(self.conf[section].keys())))
2019-05-07 12:06:21 +02:00
default_proxy = self.conf['General'].get('DefaultProxy')
2015-01-07 04:57:51 +01:00
2019-05-07 12:06:21 +02:00
if default_proxy:
default_pool = self.setProxyPool(default_proxy)
else:
default_pool = [
urllib3.PoolManager(num_pools=10, maxsize=8,
timeout=self.timeout, **self.sslparams),
urllib3.PoolManager(num_pools=10, maxsize=8,
timeout=self.timeout)]
self.pools.append(dict(proxy=default_proxy,
pool=default_pool, patterns='*'))
# handle missing sections
sections = collections.defaultdict(dict)
for name in self.conf.sections():
sections[name] = self.conf[name]
self.noverifylist = list(sections['TLS NoVerify'].keys())
self.sslpasslist = list(sections['TLS Passthru'].keys())
self.blacklist = list(sections['Blacklist'].keys())
self.bypasslist = list(sections['Bypass URL'].keys())
2015-01-07 04:57:51 +01:00
def reloadConfig(self):
while True:
mtime = os.path.getmtime(self.file)
if mtime > self.file_timestamp:
self.file_timestamp = mtime
self.loadConfig()
2019-05-07 12:06:21 +02:00
logger.info("*" * 20 + " CONFIG RELOADED " + "*" * 20)
2015-01-07 04:57:51 +01:00
time.sleep(1)
def getpool(self, host, httpmode=False):
2019-05-07 12:06:21 +02:00
things = (fnmatch.fnmatch(host, pattern)
for pattern in self.noverifylist)
noverify = True if httpmode or any(things) else False
logger.debug(f'host: {host}, noverify: {noverify}')
2015-01-07 04:57:51 +01:00
for pool in self.pools:
2019-05-07 12:06:21 +02:00
things = (fnmatch.fnmatch(host, pattern)
for pattern in pool['patterns'])
if any(things):
2015-01-07 04:57:51 +01:00
return pool['proxy'], pool['pool'][noverify], noverify
2016-01-16 15:15:48 +01:00
def setProxyPool(self, proxy):
scheme = proxy.split(':')[0]
if scheme in ('http', 'https'):
ProxyManager = urllib3.ProxyManager
elif scheme in ('socks4', 'socks5'):
ProxyManager = SOCKSProxyManager
else:
print("Wrong Proxy Format: " + proxy)
print("Proxy should start with http/https/socks4/socks5 .")
input()
raise SystemExit
# maxsize is the max. number of connections to the same server
2019-05-07 12:06:21 +02:00
return [
ProxyManager(proxy, num_pools=10, maxsize=8, timeout=self.timeout,
**self.sslparams),
ProxyManager(proxy, num_pools=10, maxsize=8, timeout=self.timeout)]
2016-01-16 15:15:48 +01:00
2015-01-07 04:57:51 +01:00
class FrontServer(ThreadingMixIn, HTTPServer):
"""Handle requests in a separate thread."""
pass
2019-05-07 12:06:21 +02:00
2015-01-07 04:57:51 +01:00
class RearServer(ThreadingMixIn, HTTPServer):
"""Handle requests in a separate thread."""
pass
2019-05-07 12:06:21 +02:00
2015-01-07 04:57:51 +01:00
class FrontRequestHandler(ProxyRequestHandler):
"""
2019-05-07 12:06:21 +02:00
Sit between the client and Privoxy
2015-01-07 04:57:51 +01:00
Convert https request to http
"""
2019-05-14 19:47:54 +02:00
server_version = f'{_name} front/{__version__}'
2015-01-07 04:57:51 +01:00
def do_CONNECT(self):
"Descrypt https request and dispatch to http handler"
2015-12-11 21:49:48 +01:00
2015-01-07 04:57:51 +01:00
# request line: CONNECT www.example.com:443 HTTP/1.1
self.host, self.port = self.path.split(":")
self.proxy, self.pool, self.noverify = pools.getpool(self.host)
2019-05-07 12:06:21 +02:00
things = (fnmatch.fnmatch(self.host, pattern)
for pattern in pools.blacklist)
if any(things):
2019-05-14 19:47:54 +02:00
# blacklist
2015-01-07 04:57:51 +01:00
self.deny_request()
2019-05-14 19:47:54 +02:00
logger.info('{:03d} denied by blacklist: {}'.format(
self.reqNum, self.host))
2019-05-07 12:06:21 +02:00
elif any((fnmatch.fnmatch(self.host, pattern)
for pattern in pools.sslpasslist)):
2019-05-14 19:47:54 +02:00
# TLS passthru
2015-01-07 04:57:51 +01:00
if self.proxy and self.proxy.startswith('https'):
self.forward_to_https_proxy()
elif self.proxy and self.proxy.startswith('socks5'):
self.forward_to_socks5_proxy()
else:
self.tunnel_traffic()
2019-05-07 12:06:21 +02:00
# Upstream server or proxy of the tunnel is
# closed explictly, so we close the local connection too
2015-01-07 04:57:51 +01:00
self.close_connection = 1
else:
2019-05-14 19:47:54 +02:00
# TLS MITM
self.wfile.write(('HTTP/1.1 200 Connection established\r\n'
f'Proxy-agent: {self.version_string()}\r\n'
'\r\n').encode('ascii'))
2019-05-07 12:06:21 +02:00
if self.host.count('.') >= 2:
commonname = '.' + self.host.partition('.')[-1]
else:
commonname = self.host
dummycert = get_cert(commonname, config)
2015-01-07 04:57:51 +01:00
# set a flag for do_METHOD
self.ssltunnel = True
2019-05-07 12:06:21 +02:00
ssl_sock = ssl.wrap_socket(self.connection, keyfile=dummycert,
certfile=dummycert, server_side=True)
2015-01-07 04:57:51 +01:00
# Ref: Lib/socketserver.py#StreamRequestHandler.setup()
self.connection = ssl_sock
self.rfile = self.connection.makefile('rb', self.rbufsize)
self.wfile = self.connection.makefile('wb', self.wbufsize)
# dispatch to do_METHOD()
self.handle_one_request()
def do_METHOD(self):
2019-05-07 12:06:21 +02:00
"Forward request to Privoxy"
2015-12-11 21:49:48 +01:00
counter.increment_and_set(self, 'reqNum')
2015-01-07 04:57:51 +01:00
if self.ssltunnel:
# https request
2019-05-07 12:06:21 +02:00
host = (self.host if self.port == '443'
2019-05-14 19:47:54 +02:00
else ':'.join((self.host, self.port)))
url = 'https://' + ''.join((host, self.path))
2019-05-07 12:06:21 +02:00
self.bypass = any((fnmatch.fnmatch(url, pattern)
for pattern in pools.bypasslist))
2015-01-07 04:57:51 +01:00
if not self.bypass:
2019-05-14 19:47:54 +02:00
url = 'http://' + ''.join((host, self.path))
2019-05-07 12:06:21 +02:00
# Tag the request so Privoxy can recognize it
self.headers["Tagged"] = (self.version_string()
2019-05-14 19:47:54 +02:00
+ f':{self.reqNum:d}')
2015-01-07 04:57:51 +01:00
else:
# http request
self.host = urlparse(self.path).hostname
2019-05-07 12:06:21 +02:00
if any((fnmatch.fnmatch(self.host, pattern)
for pattern in pools.blacklist)):
2019-05-14 19:47:54 +02:00
# blacklist
2015-01-07 04:57:51 +01:00
self.deny_request()
2019-05-14 19:47:54 +02:00
logger.info('{:03d} denied by blacklist: {}'.format(
self.reqNum, self.host))
2015-01-07 04:57:51 +01:00
return
host = urlparse(self.path).netloc
2019-05-07 12:06:21 +02:00
self.proxy, self.pool, self.noverify = pools.getpool(
self.host, httpmode=True)
self.bypass = any((fnmatch.fnmatch('http://' + host +
urlparse(self.path).path, pattern)
for pattern in pools.bypasslist))
2015-01-07 04:57:51 +01:00
url = self.path
self.url = url
pool = self.pool if self.bypass else proxpool
data_length = self.headers.get("Content-Length")
2019-05-07 12:06:21 +02:00
self.postdata = (self.rfile.read(int(data_length))
if data_length and int(data_length) > 0 else None)
2015-12-11 21:49:48 +01:00
if self.command == "POST" and "Content-Length" not in self.headers:
buffer = self.rfile.read()
if buffer:
2019-05-07 12:06:21 +02:00
logger.warning(
2019-05-14 19:47:54 +02:00
'{:03d} '
'POST w/o "Content-Length" header (Bytes: {} | '
'Transfer-Encoding: {} | '
'HTTPS: {}'.format(
self.reqNum, len(buffer),
"Transfer-Encoding" in self.headers,
self.ssltunnel))
2015-01-07 04:57:51 +01:00
# Remove hop-by-hop headers
self.purge_headers(self.headers)
r = None
2015-12-11 22:01:25 +01:00
# Merge the proxy headers. Only do this in HTTP. We have to copy the
# headers dict so we can safely change it without those changes being
# reflected in anyone else's copy.
# if self.scheme == 'http':
# headers = headers.copy()
# headers.update(self.proxy_headers)
headers = urllib3._collections.HTTPHeaderDict(self.headers)
2015-01-07 04:57:51 +01:00
try:
2019-05-07 12:06:21 +02:00
# Sometimes 302 redirect would fail with "BadStatusLine"
# exception, and IE11 doesn't restart the request.
2015-01-07 04:57:51 +01:00
# retries=1 instead of retries=False fixes it.
2019-05-07 12:06:21 +02:00
# ! Retry may cause the requests with the same reqNum appear
# in the log window
r = pool.urlopen(
self.command, url, body=self.postdata, headers=headers,
retries=1, redirect=False, preload_content=False,
decode_content=False)
2015-01-07 04:57:51 +01:00
if not self.ssltunnel:
2016-01-16 15:15:48 +01:00
if self.bypass:
prefix = '[BP]' if self.proxy else '[BD]'
else:
prefix = '[D]'
2015-12-11 21:49:48 +01:00
if self.command in ("GET", "HEAD"):
2019-05-14 19:47:54 +02:00
logger.info('{:03d} {} "{} {}" {} {}'.format(
self.reqNum, prefix, self.command, url,
r.status, r.getheader('Content-Length', '-')))
2015-12-11 21:49:48 +01:00
else:
2019-05-14 19:47:54 +02:00
logger.info('{:03d} {} "{} {} {}" {} {}'.format(
self.reqNum, prefix, self.command, url,
data_length, r.status,
r.getheader('Content-Length', '-')))
2015-01-07 04:57:51 +01:00
self.send_response_only(r.status, r.reason)
2019-05-07 12:06:21 +02:00
# HTTPResponse.msg is easier to handle
# than urllib3._collections.HTTPHeaderDict
2015-01-07 04:57:51 +01:00
r.headers = r._original_response.msg
2015-12-11 21:49:48 +01:00
self.purge_write_headers(r.headers)
2015-01-07 04:57:51 +01:00
2019-05-07 12:06:21 +02:00
if (self.command == 'HEAD' or r.status in (100, 101, 204, 304)
or r.getheader("Content-Length") == '0'):
2015-01-07 04:57:51 +01:00
written = None
else:
written = self.stream_to_client(r)
2019-05-07 12:06:21 +02:00
if ("Content-Length" not in r.headers
and 'Transfer-Encoding' not in r.headers):
2015-01-07 04:57:51 +01:00
self.close_connection = 1
2019-05-07 12:06:21 +02:00
# Intend to catch regular http and bypass
# http/https requests exceptions
2015-01-07 04:57:51 +01:00
# Regular https request exceptions should be handled by rear server
except urllib3.exceptions.TimeoutError as e:
self.sendout_error(url, 504, message="Timeout", explain=e)
2019-05-14 19:47:54 +02:00
logger.warning(f'{self.reqNum:03d} [F] {e} on '
f'"{self.command} {url}"')
2015-01-07 04:57:51 +01:00
except (urllib3.exceptions.HTTPError,) as e:
self.sendout_error(url, 502, message="HTTP Error", explain=e)
2019-05-14 19:47:54 +02:00
logger.warning(f'{self.reqNum:03d} [F] {e} on '
f'"{self.command} {url}"')
2015-01-07 04:57:51 +01:00
finally:
if r:
# Release the connection back into the pool
r.release_conn()
do_GET = do_POST = do_HEAD = do_PUT = do_DELETE = do_OPTIONS = do_METHOD
2019-05-07 12:06:21 +02:00
2015-01-07 04:57:51 +01:00
class RearRequestHandler(ProxyRequestHandler):
"""
2019-05-07 12:06:21 +02:00
Supposed to be the parent proxy for Privoxy for tagged requests
2015-01-07 04:57:51 +01:00
Convert http request to https
"""
server_version = f'{_name} rear/{__version__}'
2019-05-07 12:06:21 +02:00
2015-01-07 04:57:51 +01:00
def do_METHOD(self):
"Convert http request to https"
2015-12-11 21:49:48 +01:00
2019-05-07 12:06:21 +02:00
if (self.headers.get("Tagged")
and self.headers["Tagged"].startswith(_name)):
2015-12-11 21:49:48 +01:00
self.reqNum = int(self.headers["Tagged"].split(":")[1])
# Remove the tag
del self.headers["Tagged"]
else:
2019-05-07 12:06:21 +02:00
self.sendout_error(
self.path, 400,
2019-05-14 19:47:54 +02:00
explain='The proxy setting of the client'
' is misconfigured.\n\n'
f'Please set the HTTPS proxy port to {config.FRONTPORT} '
'and check the Docs for other settings.')
2019-05-07 12:06:21 +02:00
logger.error("[Misconfigured HTTPS proxy port] " + self.path)
2015-12-11 21:49:48 +01:00
return
2015-01-07 04:57:51 +01:00
# request line: GET http://somehost.com/path?attr=value HTTP/1.1
url = "https" + self.path[4:]
self.host = urlparse(self.path).hostname
proxy, pool, noverify = pools.getpool(self.host)
prefix = '[P]' if proxy else '[D]'
data_length = self.headers.get("Content-Length")
2019-05-07 12:06:21 +02:00
self.postdata = (self.rfile.read(int(data_length))
if data_length else None)
2015-01-07 04:57:51 +01:00
self.purge_headers(self.headers)
r = None
2015-12-11 22:01:25 +01:00
2019-05-07 12:06:21 +02:00
# Below code in connectionpool.py expect the headers
# to has a copy() and update() method
# That's why we can't use self.headers directly when
# call pool.urlopen()
2015-12-11 22:01:25 +01:00
#
# Merge the proxy headers. Only do this in HTTP. We have to copy the
# headers dict so we can safely change it without those changes being
# reflected in anyone else's copy.
# if self.scheme == 'http':
# headers = headers.copy()
# headers.update(self.proxy_headers)
headers = urllib3._collections.HTTPHeaderDict(self.headers)
2015-01-07 04:57:51 +01:00
try:
2019-05-07 12:06:21 +02:00
r = pool.urlopen(
self.command, url, body=self.postdata, headers=headers,
retries=1, redirect=False, preload_content=False,
decode_content=False)
2015-01-07 04:57:51 +01:00
if proxy:
2019-05-14 19:47:54 +02:00
logger.debug('Using Proxy - ' + proxy)
2015-12-11 21:49:48 +01:00
if self.command in ("GET", "HEAD"):
2019-05-14 19:47:54 +02:00
logger.info('{:03d} {} "{} {}" {} {}'.format(
self.reqNum, prefix,
self.command, url, r.status,
r.getheader('Content-Length', '-')))
2015-12-11 21:49:48 +01:00
else:
2019-05-14 19:47:54 +02:00
logger.info('{:03d} {} "{} {} {}" {} {}'.format(
self.reqNum, prefix,
self.command, url, data_length, r.status,
r.getheader('Content-Length', '-')))
2015-01-07 04:57:51 +01:00
self.send_response_only(r.status, r.reason)
2019-05-07 12:06:21 +02:00
# HTTPResponse.msg is easier to handle than
# urllib3._collections.HTTPHeaderDict
2015-01-07 04:57:51 +01:00
r.headers = r._original_response.msg
2015-12-11 21:49:48 +01:00
self.purge_write_headers(r.headers)
2019-05-07 12:06:21 +02:00
if (self.command == 'HEAD' or r.status in (100, 101, 204, 304)
or r.getheader("Content-Length") == '0'):
2015-01-07 04:57:51 +01:00
written = None
else:
written = self.stream_to_client(r)
2019-05-07 12:06:21 +02:00
if ("Content-Length" not in r.headers
and 'Transfer-Encoding' not in r.headers):
2015-01-07 04:57:51 +01:00
self.close_connection = 1
except urllib3.exceptions.SSLError as e:
2019-05-14 19:47:54 +02:00
self.sendout_error(url, 417, message='TLS Certificate Failed',
2019-05-07 12:06:21 +02:00
explain=e)
2019-05-14 19:47:54 +02:00
logger.error(f'{self.reqNum:03d} [TLS Certificate Error] {url}')
2015-01-07 04:57:51 +01:00
except urllib3.exceptions.TimeoutError as e:
2019-05-14 19:47:54 +02:00
self.sendout_error(url, 504, message='Timeout', explain=e)
logger.warning(f'{self.reqNum:03d} [R]{prefix} '
f'"{self.command} {url}" {e}')
2015-01-07 04:57:51 +01:00
except (urllib3.exceptions.HTTPError,) as e:
self.sendout_error(url, 502, message="HTTP Error", explain=e)
2019-05-14 19:47:54 +02:00
logger.warning(f'{self.reqNum:03d} [R]{prefix} '
f'"{self.command} {url}" {e}')
2015-12-11 21:49:48 +01:00
2015-01-07 04:57:51 +01:00
finally:
if r:
# Release the connection back into the pool
r.release_conn()
do_GET = do_POST = do_HEAD = do_PUT = do_DELETE = do_OPTIONS = do_METHOD
2019-05-07 12:06:21 +02:00
def main():
urllib3.disable_warnings()
logger.setLevel(getattr(logging, config.loglevel, logging.INFO))
2015-01-07 04:57:51 +01:00
handler = logging.StreamHandler()
2015-12-11 21:49:48 +01:00
formatter = logging.Formatter('%(asctime)s %(message)s', datefmt='[%H:%M]')
2015-01-07 04:57:51 +01:00
handler.setFormatter(formatter)
logger.addHandler(handler)
frontserver = FrontServer((config.front_name, config.front_port),
FrontRequestHandler)
rearserver = RearServer((config.rear_name, config.rear_port),
RearRequestHandler)
for worker in (frontserver.serve_forever,
rearserver.serve_forever,
2015-01-07 04:57:51 +01:00
pools.reloadConfig):
2019-05-07 12:06:21 +02:00
thread = threading.Thread(target=worker)
thread.daemon = True
thread.start()
2015-01-07 04:57:51 +01:00
2019-05-14 19:47:54 +02:00
print('=' * 40)
print(f'{_name} {__version__} (urllib3/{urllib3.__version__})')
print(f'Front : {config.front_name}:{config.front_port}')
print(f'Privoxy : {config.proxy_name}')
print(f'Rear : {config.rear_name}:{config.rear_port}')
print(f'Proxy : {config.proxy}')
2019-05-14 19:47:54 +02:00
print('=' * 40)
2015-12-11 22:01:25 +01:00
while True:
time.sleep(1)
2019-05-07 12:06:21 +02:00
if __name__ == '__main__':
parser = argparse.ArgumentParser('Privoxy TLS proxy wrapper.')
parser.add_argument('-c', '--config', type=argparse.FileType('r'),
default='config.ini',
help='Privoxy TLS configuration file.')
args = parser.parse_args()
# globals
CONFIG = args.config.name
logger = logging.getLogger(__name__)
config = LoadConfig(CONFIG)
proxpool = urllib3.ProxyManager(
config.proxy_name, num_pools=10, maxsize=8,
2019-05-07 12:06:21 +02:00
timeout=urllib3.util.timeout.Timeout(
connect=90.0, read=310.0))
pools = ConnectionPools(CONFIG)
counter = Counter()
try:
main()
except KeyboardInterrupt:
print("Quitting...")