From 14979c96cea08aff95bab7d18205472192bf797a Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Sun, 30 Apr 2017 21:00:33 +0200 Subject: [PATCH 1/9] Limit metadata size Malicious or malfunctioning peer can try send a huge metadata size what causes huge memory usage and to overflow them. --- magneticod/magneticod/bittorrent.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/magneticod/magneticod/bittorrent.py b/magneticod/magneticod/bittorrent.py index 9e4ac54..2e37492 100644 --- a/magneticod/magneticod/bittorrent.py +++ b/magneticod/magneticod/bittorrent.py @@ -22,6 +22,7 @@ import os from . import bencode +MAX_METADATA_SIZE = 5*1024*1024 InfoHash = bytes PeerAddress = typing.Tuple[str, int] @@ -209,7 +210,8 @@ class DisposablePeer: # Just to make sure that the remote peer supports ut_metadata extension: ut_metadata = msg_dict[b"m"][b"ut_metadata"] metadata_size = msg_dict[b"metadata_size"] - assert metadata_size > 0 + assert metadata_size > 0, "Invalid (empty) metada size" + assert metadata_size < MAX_METADATA_SIZE, "Malicious or malfunctioning peer tried send a huge metadata size" except (AssertionError, KeyError): self.when_error() return From e82649445352972a1dfd6bc64274a8755f440649 Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Sun, 30 Apr 2017 23:10:09 +0200 Subject: [PATCH 2/9] Extract constants (fixes #37), allows you to specify a size limit and database path as an argument --- magneticod/magneticod/__main__.py | 36 ++++++++++++++++++++-------- magneticod/magneticod/bittorrent.py | 10 ++++---- magneticod/magneticod/constants.py | 11 +++++++++ magneticod/magneticod/dht.py | 16 +++++-------- magneticod/magneticod/persistence.py | 6 ++--- magneticod/setup.py | 3 ++- 6 files changed, 53 insertions(+), 29 deletions(-) create mode 100644 magneticod/magneticod/constants.py diff --git a/magneticod/magneticod/__main__.py b/magneticod/magneticod/__main__.py index 32bd4ba..08a237f 100644 --- a/magneticod/magneticod/__main__.py +++ b/magneticod/magneticod/__main__.py @@ -27,18 +27,15 @@ import time import typing import appdirs +import humanfriendly +from .constants import TICK_INTERVAL, MAX_ACTIVE_PEERS_PER_INFO_HASH, DEFAULT_MAX_METADATA_SIZE from . import __version__ from . import bittorrent from . import dht from . import persistence -TICK_INTERVAL = 1 # in seconds (soft constraint) -# maximum (inclusive) number of active (disposable) peers to fetch the metadata per info hash at the same time: -MAX_ACTIVE_PEERS_PER_INFO_HASH = 5 - - # Global variables are bad bla bla bla, BUT these variables are used so many times that I think it is justified; else # the signatures of many functions are literally cluttered. # @@ -64,7 +61,7 @@ def main(): # noinspection PyBroadException try: - path = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3") + path =arguments.database_file database = persistence.Database(path) except: logging.exception("could NOT connect to the database!") @@ -72,7 +69,7 @@ def main(): complete_info_hashes = database.get_complete_info_hashes() - node = dht.SybilNode(arguments.node_addr) + node = dht.SybilNode(arguments.node_addr, max_metadata_size=arguments.metadata_size_limit) node.when_peer_found = on_peer_found selector.register(node, selectors.EVENT_READ) @@ -92,14 +89,14 @@ def main(): return 0 -def on_peer_found(info_hash: dht.InfoHash, peer_address) -> None: +def on_peer_found(info_hash: dht.InfoHash, peer_address, max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE) -> None: global selector, peers, complete_info_hashes if len(peers[info_hash]) > MAX_ACTIVE_PEERS_PER_INFO_HASH or info_hash in complete_info_hashes: return try: - peer = bittorrent.DisposablePeer(info_hash, peer_address) + peer = bittorrent.DisposablePeer(info_hash, peer_address, max_metadata_size) except ConnectionError: return @@ -171,6 +168,13 @@ def loop() -> None: selector.modify(fileobj, selectors.EVENT_READ) +def parse_size(value: str) -> int: + try: + return humanfriendly.parse_size(value) + except humanfriendly.InvalidSize as e: + raise argparse.ArgumentTypeError("Invalid argument. {}".format(e)) + + def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: parser = argparse.ArgumentParser( description="Autonomous BitTorrent DHT crawler and metadata fetcher.", @@ -194,13 +198,25 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: allow_abbrev=False, formatter_class=argparse.RawDescriptionHelpFormatter ) + parser.add_argument( "--node-addr", action="store", type=str, required=False, help="the address of the (DHT) node magneticod will use" ) - args = parser.parse_args(sys.argv[1:]) + parser.add_argument( + "--metadata-size-limit", type=parse_size, default=DEFAULT_MAX_METADATA_SIZE, + help="Limit metadata size to protect memory overflow" + ) + default_database_dir = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3") + parser.add_argument( + "--database-file", type=str, default=default_database_dir, + help="Path to database file (default: {})".format(default_database_dir) + ) + + args = parser.parse_args(sys.argv[1:]) + print(args.metadata_size_limit) args.node_addr = parse_ip_port(args.node_addr) if args.node_addr else ("0.0.0.0", 0) if args.node_addr is None: logging.critical("Invalid node address supplied!") diff --git a/magneticod/magneticod/bittorrent.py b/magneticod/magneticod/bittorrent.py index 2e37492..e573bd3 100644 --- a/magneticod/magneticod/bittorrent.py +++ b/magneticod/magneticod/bittorrent.py @@ -21,15 +21,14 @@ import typing import os from . import bencode - -MAX_METADATA_SIZE = 5*1024*1024 +from .constants import DEFAULT_MAX_METADATA_SIZE InfoHash = bytes PeerAddress = typing.Tuple[str, int] class DisposablePeer: - def __init__(self, info_hash: InfoHash, peer_addr: PeerAddress): + def __init__(self, info_hash: InfoHash, peer_addr: PeerAddress, max_metadata_size: int= DEFAULT_MAX_METADATA_SIZE): self.__socket = socket.socket() self.__socket.setblocking(False) # To reduce the latency: @@ -43,6 +42,8 @@ class DisposablePeer: self.__info_hash = info_hash + self.__max_metadata_size = max_metadata_size + self.__incoming_buffer = bytearray() self.__outgoing_buffer = bytearray() @@ -211,7 +212,8 @@ class DisposablePeer: ut_metadata = msg_dict[b"m"][b"ut_metadata"] metadata_size = msg_dict[b"metadata_size"] assert metadata_size > 0, "Invalid (empty) metada size" - assert metadata_size < MAX_METADATA_SIZE, "Malicious or malfunctioning peer tried send a huge metadata size" + assert metadata_size < self.__max_metadata_size, "Malicious or malfunctioning peer tried send above " \ + "{} limit metadata size".format(self.__max_metadata_size) except (AssertionError, KeyError): self.when_error() return diff --git a/magneticod/magneticod/constants.py b/magneticod/magneticod/constants.py new file mode 100644 index 0000000..3b76e4c --- /dev/null +++ b/magneticod/magneticod/constants.py @@ -0,0 +1,11 @@ +# coding=utf-8 +DEFAULT_MAX_METADATA_SIZE = 10 * 1024 * 1024 +BOOTSTRAPPING_NODES = [ + ("router.bittorrent.com", 6881), + ("dht.transmissionbt.com", 6881) +] +PENDING_INFO_HASHES = 10 + +TICK_INTERVAL = 1 # in seconds (soft constraint) + # maximum (inclusive) number of active (disposable) peers to fetch the metadata per info hash at the same time: +MAX_ACTIVE_PEERS_PER_INFO_HASH = 5 diff --git a/magneticod/magneticod/dht.py b/magneticod/magneticod/dht.py index dfbfb36..767c01b 100644 --- a/magneticod/magneticod/dht.py +++ b/magneticod/magneticod/dht.py @@ -20,6 +20,7 @@ import socket import typing import os +from .constants import BOOTSTRAPPING_NODES, DEFAULT_MAX_METADATA_SIZE from . import bencode NodeID = bytes @@ -28,14 +29,8 @@ PeerAddress = typing.Tuple[str, int] InfoHash = bytes -BOOTSTRAPPING_NODES = [ - ("router.bittorrent.com", 6881), - ("dht.transmissionbt.com", 6881) -] - - class SybilNode: - def __init__(self, address: typing.Tuple[str, int]): + def __init__(self, address: typing.Tuple[str, int], max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE): self.__true_id = self.__random_bytes(20) self.__socket = socket.socket(type=socket.SOCK_DGRAM) @@ -48,7 +43,7 @@ class SybilNode: self.__routing_table = {} # type: typing.Dict[NodeID, NodeAddress] self.__token_secret = self.__random_bytes(4) - + self.__max_metadata_size = max_metadata_size # Maximum number of neighbours (this is a THRESHOLD where, once reached, the search for new neighbours will # stop; but until then, the total number of neighbours might exceed the threshold). self.__n_max_neighbours = 2000 @@ -56,7 +51,8 @@ class SybilNode: logging.info("SybilNode %s on %s initialized!", self.__true_id.hex().upper(), address) @staticmethod - def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress) -> None: + def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress, + max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE) -> None: raise NotImplementedError() def on_tick(self) -> None: @@ -208,7 +204,7 @@ class SybilNode: else: peer_addr = (addr[0], port) - self.when_peer_found(info_hash, peer_addr) + self.when_peer_found(info_hash, peer_addr, self.max_metadata_size) def fileno(self) -> int: return self.__socket.fileno() diff --git a/magneticod/magneticod/persistence.py b/magneticod/magneticod/persistence.py index afd9077..231dd47 100644 --- a/magneticod/magneticod/persistence.py +++ b/magneticod/magneticod/persistence.py @@ -18,11 +18,9 @@ import time import typing import os -from . import bencode - - # threshold for pending info hashes before being committed to database: -PENDING_INFO_HASHES = 10 + +from .constants import PENDING_INFO_HASHES class Database: diff --git a/magneticod/setup.py b/magneticod/setup.py index 1219fe4..b8ba4d9 100644 --- a/magneticod/setup.py +++ b/magneticod/setup.py @@ -23,7 +23,8 @@ setup( install_requires=[ "appdirs >= 1.4.3", - "bencoder.pyx >= 1.1.3" + "bencoder.pyx >= 1.1.3", + "humanfriendly" ], classifiers=[ From ed081e93994622772627b196f5ffb7d4d45dc8f5 Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Sun, 30 Apr 2017 23:38:28 +0200 Subject: [PATCH 3/9] Clean up parse_ip_port --- magneticod/magneticod/__main__.py | 48 ++++++++++++++----------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/magneticod/magneticod/__main__.py b/magneticod/magneticod/__main__.py index 08a237f..3f83005 100644 --- a/magneticod/magneticod/__main__.py +++ b/magneticod/magneticod/__main__.py @@ -168,6 +168,26 @@ def loop() -> None: selector.modify(fileobj, selectors.EVENT_READ) +def parse_ip_port(netloc) -> typing.Optional[typing.Tuple[str, int]]: + # In case no port supplied + try: + return str(ipaddress.ip_address(netloc)), 0 + except ValueError: + pass + + # In case port supplied + try: + parsed = urllib.parse.urlparse("//{}".format(netloc)) + ip = str(ipaddress.ip_address(parsed.hostname)) + port = parsed.port + if port is None: + raise argparse.ArgumentParser("Invalid node address supplied!") + except ValueError: + raise argparse.ArgumentParser("Invalid node address supplied!") + + return ip, port + + def parse_size(value: str) -> int: try: return humanfriendly.parse_size(value) @@ -200,7 +220,7 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: ) parser.add_argument( - "--node-addr", action="store", type=str, required=False, + "--node-addr", action="store", type=parse_ip_port, required=False, default="0.0.0.0:0", help="the address of the (DHT) node magneticod will use" ) @@ -216,34 +236,8 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: ) args = parser.parse_args(sys.argv[1:]) - print(args.metadata_size_limit) - args.node_addr = parse_ip_port(args.node_addr) if args.node_addr else ("0.0.0.0", 0) - if args.node_addr is None: - logging.critical("Invalid node address supplied!") - return None - return args -def parse_ip_port(netloc) -> typing.Optional[typing.Tuple[str, int]]: - # In case no port supplied - try: - return str(ipaddress.ip_address(netloc)), 0 - except ValueError: - pass - - # In case port supplied - try: - parsed = urllib.parse.urlparse("//{}".format(netloc)) - ip = str(ipaddress.ip_address(parsed.hostname)) - port = parsed.port - if port is None: - # Invalid port - return None - except ValueError: - return None - - return ip, port - if __name__ == "__main__": sys.exit(main()) From fa2a135e0671b1187d05e8f013fd2d75cc7ebdaa Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Sun, 30 Apr 2017 23:47:08 +0200 Subject: [PATCH 4/9] Add --debug argument --- magneticod/magneticod/__main__.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/magneticod/magneticod/__main__.py b/magneticod/magneticod/__main__.py index 3f83005..e467f83 100644 --- a/magneticod/magneticod/__main__.py +++ b/magneticod/magneticod/__main__.py @@ -52,16 +52,14 @@ complete_info_hashes = set() def main(): global complete_info_hashes, database, node, peers, selector - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s") - logging.info("magneticod v%d.%d.%d started", *__version__) - arguments = parse_cmdline_arguments() - if arguments is None: - return 2 + + logging.basicConfig(level=arguments.loglevel, format="%(asctime)s %(levelname)-8s %(message)s") + logging.info("magneticod v%d.%d.%d started", *__version__) # noinspection PyBroadException try: - path =arguments.database_file + path = arguments.database_file database = persistence.Database(path) except: logging.exception("could NOT connect to the database!") @@ -234,9 +232,12 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: "--database-file", type=str, default=default_database_dir, help="Path to database file (default: {})".format(default_database_dir) ) - - args = parser.parse_args(sys.argv[1:]) - return args + parser.add_argument( + '-d', '--debug', + action="store_const", dest="loglevel", const=logging.DEBUG, default=logging.INFO, + help="Print debugging information in addition to normal processing.", + ) + return parser.parse_args(sys.argv[1:]) if __name__ == "__main__": From cde5728019024a6c8d36d8196c4b417be0b1d0e6 Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Mon, 1 May 2017 00:28:34 +0200 Subject: [PATCH 5/9] Fix typo in SybilNode & __max_metadata_size --- magneticod/magneticod/dht.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magneticod/magneticod/dht.py b/magneticod/magneticod/dht.py index 767c01b..399ec18 100644 --- a/magneticod/magneticod/dht.py +++ b/magneticod/magneticod/dht.py @@ -204,7 +204,7 @@ class SybilNode: else: peer_addr = (addr[0], port) - self.when_peer_found(info_hash, peer_addr, self.max_metadata_size) + self.when_peer_found(info_hash, peer_addr, self.__max_metadata_size) def fileno(self) -> int: return self.__socket.fileno() From 9b12c25966a1f76fdced12095bb742ce38b6c62d Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Mon, 1 May 2017 00:40:54 +0200 Subject: [PATCH 6/9] Add missing comment to PENDING_INFO_HASHES --- magneticod/magneticod/constants.py | 2 +- magneticod/magneticod/persistence.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/magneticod/magneticod/constants.py b/magneticod/magneticod/constants.py index 3b76e4c..bc2c954 100644 --- a/magneticod/magneticod/constants.py +++ b/magneticod/magneticod/constants.py @@ -4,7 +4,7 @@ BOOTSTRAPPING_NODES = [ ("router.bittorrent.com", 6881), ("dht.transmissionbt.com", 6881) ] -PENDING_INFO_HASHES = 10 +PENDING_INFO_HASHES = 10 # threshold for pending info hashes before being committed to database: TICK_INTERVAL = 1 # in seconds (soft constraint) # maximum (inclusive) number of active (disposable) peers to fetch the metadata per info hash at the same time: diff --git a/magneticod/magneticod/persistence.py b/magneticod/magneticod/persistence.py index 231dd47..f9d02c1 100644 --- a/magneticod/magneticod/persistence.py +++ b/magneticod/magneticod/persistence.py @@ -18,8 +18,6 @@ import time import typing import os -# threshold for pending info hashes before being committed to database: - from .constants import PENDING_INFO_HASHES From 0bb1a9841562456705325a91314ee9b3857bd1cf Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Mon, 1 May 2017 06:22:53 +0200 Subject: [PATCH 7/9] Introduce requested changes, log when malicious / empty metadata received --- magneticod/magneticod/__main__.py | 11 +++++++---- magneticod/magneticod/bittorrent.py | 15 +++++++++++---- magneticod/magneticod/dht.py | 8 +++----- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/magneticod/magneticod/__main__.py b/magneticod/magneticod/__main__.py index e467f83..d5604e7 100644 --- a/magneticod/magneticod/__main__.py +++ b/magneticod/magneticod/__main__.py @@ -67,8 +67,11 @@ def main(): complete_info_hashes = database.get_complete_info_hashes() - node = dht.SybilNode(arguments.node_addr, max_metadata_size=arguments.metadata_size_limit) - node.when_peer_found = on_peer_found + node = dht.SybilNode(arguments.node_addr) + + node.when_peer_found = lambda info_hash, peer_address: on_peer_found(info_hash=info_hash, + peer_address=peer_address, + max_metadata_size=arguments.max_metadata_size) selector.register(node, selectors.EVENT_READ) @@ -223,8 +226,8 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: ) parser.add_argument( - "--metadata-size-limit", type=parse_size, default=DEFAULT_MAX_METADATA_SIZE, - help="Limit metadata size to protect memory overflow" + "--max-metadata-size", type=parse_size, default=DEFAULT_MAX_METADATA_SIZE, + help="Limit metadata size to protect memory overflow. Provide in human friendly format eg. 1 M, 1 GB" ) default_database_dir = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3") diff --git a/magneticod/magneticod/bittorrent.py b/magneticod/magneticod/bittorrent.py index e573bd3..5a5b4be 100644 --- a/magneticod/magneticod/bittorrent.py +++ b/magneticod/magneticod/bittorrent.py @@ -40,6 +40,7 @@ class DisposablePeer: if res != errno.EINPROGRESS: raise ConnectionError() + self.__peer_addr = peer_addr self.__info_hash = info_hash self.__max_metadata_size = max_metadata_size @@ -211,10 +212,16 @@ class DisposablePeer: # Just to make sure that the remote peer supports ut_metadata extension: ut_metadata = msg_dict[b"m"][b"ut_metadata"] metadata_size = msg_dict[b"metadata_size"] - assert metadata_size > 0, "Invalid (empty) metada size" - assert metadata_size < self.__max_metadata_size, "Malicious or malfunctioning peer tried send above " \ - "{} limit metadata size".format(self.__max_metadata_size) - except (AssertionError, KeyError): + assert metadata_size > 0, "Invalid (empty) metadata size" + assert metadata_size < self.__max_metadata_size, "Malicious or malfunctioning peer {}:{} tried send above" \ + " {} max metadata size".format(self.__peer_addr[0], + self.__peer_addr[1], + self.__max_metadata_size) + except KeyError: + self.when_error() + return + except AssertionError as e: + logging.debug(str(e)) self.when_error() return diff --git a/magneticod/magneticod/dht.py b/magneticod/magneticod/dht.py index 399ec18..5e2f057 100644 --- a/magneticod/magneticod/dht.py +++ b/magneticod/magneticod/dht.py @@ -30,7 +30,7 @@ InfoHash = bytes class SybilNode: - def __init__(self, address: typing.Tuple[str, int], max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE): + def __init__(self, address: typing.Tuple[str, int]): self.__true_id = self.__random_bytes(20) self.__socket = socket.socket(type=socket.SOCK_DGRAM) @@ -43,7 +43,6 @@ class SybilNode: self.__routing_table = {} # type: typing.Dict[NodeID, NodeAddress] self.__token_secret = self.__random_bytes(4) - self.__max_metadata_size = max_metadata_size # Maximum number of neighbours (this is a THRESHOLD where, once reached, the search for new neighbours will # stop; but until then, the total number of neighbours might exceed the threshold). self.__n_max_neighbours = 2000 @@ -51,8 +50,7 @@ class SybilNode: logging.info("SybilNode %s on %s initialized!", self.__true_id.hex().upper(), address) @staticmethod - def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress, - max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE) -> None: + def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress) -> None: raise NotImplementedError() def on_tick(self) -> None: @@ -204,7 +202,7 @@ class SybilNode: else: peer_addr = (addr[0], port) - self.when_peer_found(info_hash, peer_addr, self.__max_metadata_size) + self.when_peer_found(info_hash, peer_addr) def fileno(self) -> int: return self.__socket.fileno() From 17bd5639090563d25f3486d921fea21c115f6cea Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Mon, 1 May 2017 06:37:51 +0200 Subject: [PATCH 8/9] Fix import error in persistence --- magneticod/magneticod/persistence.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/magneticod/magneticod/persistence.py b/magneticod/magneticod/persistence.py index f9d02c1..25f8755 100644 --- a/magneticod/magneticod/persistence.py +++ b/magneticod/magneticod/persistence.py @@ -18,6 +18,8 @@ import time import typing import os +from magneticod import bencode + from .constants import PENDING_INFO_HASHES From 18f6617214c772e0f6251004d56bfc89f414fd54 Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Mon, 1 May 2017 06:47:21 +0200 Subject: [PATCH 9/9] Add humanfriendly.format_path in --database-file help --- magneticod/magneticod/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magneticod/magneticod/__main__.py b/magneticod/magneticod/__main__.py index d5604e7..9e3d80b 100644 --- a/magneticod/magneticod/__main__.py +++ b/magneticod/magneticod/__main__.py @@ -233,7 +233,7 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: default_database_dir = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3") parser.add_argument( "--database-file", type=str, default=default_database_dir, - help="Path to database file (default: {})".format(default_database_dir) + help="Path to database file (default: {})".format(humanfriendly.format_path(default_database_dir)) ) parser.add_argument( '-d', '--debug',