From e82649445352972a1dfd6bc64274a8755f440649 Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Sun, 30 Apr 2017 23:10:09 +0200 Subject: [PATCH] Extract constants (fixes #37), allows you to specify a size limit and database path as an argument --- magneticod/magneticod/__main__.py | 36 ++++++++++++++++++++-------- magneticod/magneticod/bittorrent.py | 10 ++++---- magneticod/magneticod/constants.py | 11 +++++++++ magneticod/magneticod/dht.py | 16 +++++-------- magneticod/magneticod/persistence.py | 6 ++--- magneticod/setup.py | 3 ++- 6 files changed, 53 insertions(+), 29 deletions(-) create mode 100644 magneticod/magneticod/constants.py diff --git a/magneticod/magneticod/__main__.py b/magneticod/magneticod/__main__.py index 32bd4ba..08a237f 100644 --- a/magneticod/magneticod/__main__.py +++ b/magneticod/magneticod/__main__.py @@ -27,18 +27,15 @@ import time import typing import appdirs +import humanfriendly +from .constants import TICK_INTERVAL, MAX_ACTIVE_PEERS_PER_INFO_HASH, DEFAULT_MAX_METADATA_SIZE from . import __version__ from . import bittorrent from . import dht from . import persistence -TICK_INTERVAL = 1 # in seconds (soft constraint) -# maximum (inclusive) number of active (disposable) peers to fetch the metadata per info hash at the same time: -MAX_ACTIVE_PEERS_PER_INFO_HASH = 5 - - # Global variables are bad bla bla bla, BUT these variables are used so many times that I think it is justified; else # the signatures of many functions are literally cluttered. # @@ -64,7 +61,7 @@ def main(): # noinspection PyBroadException try: - path = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3") + path =arguments.database_file database = persistence.Database(path) except: logging.exception("could NOT connect to the database!") @@ -72,7 +69,7 @@ def main(): complete_info_hashes = database.get_complete_info_hashes() - node = dht.SybilNode(arguments.node_addr) + node = dht.SybilNode(arguments.node_addr, max_metadata_size=arguments.metadata_size_limit) node.when_peer_found = on_peer_found selector.register(node, selectors.EVENT_READ) @@ -92,14 +89,14 @@ def main(): return 0 -def on_peer_found(info_hash: dht.InfoHash, peer_address) -> None: +def on_peer_found(info_hash: dht.InfoHash, peer_address, max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE) -> None: global selector, peers, complete_info_hashes if len(peers[info_hash]) > MAX_ACTIVE_PEERS_PER_INFO_HASH or info_hash in complete_info_hashes: return try: - peer = bittorrent.DisposablePeer(info_hash, peer_address) + peer = bittorrent.DisposablePeer(info_hash, peer_address, max_metadata_size) except ConnectionError: return @@ -171,6 +168,13 @@ def loop() -> None: selector.modify(fileobj, selectors.EVENT_READ) +def parse_size(value: str) -> int: + try: + return humanfriendly.parse_size(value) + except humanfriendly.InvalidSize as e: + raise argparse.ArgumentTypeError("Invalid argument. {}".format(e)) + + def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: parser = argparse.ArgumentParser( description="Autonomous BitTorrent DHT crawler and metadata fetcher.", @@ -194,13 +198,25 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]: allow_abbrev=False, formatter_class=argparse.RawDescriptionHelpFormatter ) + parser.add_argument( "--node-addr", action="store", type=str, required=False, help="the address of the (DHT) node magneticod will use" ) - args = parser.parse_args(sys.argv[1:]) + parser.add_argument( + "--metadata-size-limit", type=parse_size, default=DEFAULT_MAX_METADATA_SIZE, + help="Limit metadata size to protect memory overflow" + ) + default_database_dir = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3") + parser.add_argument( + "--database-file", type=str, default=default_database_dir, + help="Path to database file (default: {})".format(default_database_dir) + ) + + args = parser.parse_args(sys.argv[1:]) + print(args.metadata_size_limit) args.node_addr = parse_ip_port(args.node_addr) if args.node_addr else ("0.0.0.0", 0) if args.node_addr is None: logging.critical("Invalid node address supplied!") diff --git a/magneticod/magneticod/bittorrent.py b/magneticod/magneticod/bittorrent.py index 2e37492..e573bd3 100644 --- a/magneticod/magneticod/bittorrent.py +++ b/magneticod/magneticod/bittorrent.py @@ -21,15 +21,14 @@ import typing import os from . import bencode - -MAX_METADATA_SIZE = 5*1024*1024 +from .constants import DEFAULT_MAX_METADATA_SIZE InfoHash = bytes PeerAddress = typing.Tuple[str, int] class DisposablePeer: - def __init__(self, info_hash: InfoHash, peer_addr: PeerAddress): + def __init__(self, info_hash: InfoHash, peer_addr: PeerAddress, max_metadata_size: int= DEFAULT_MAX_METADATA_SIZE): self.__socket = socket.socket() self.__socket.setblocking(False) # To reduce the latency: @@ -43,6 +42,8 @@ class DisposablePeer: self.__info_hash = info_hash + self.__max_metadata_size = max_metadata_size + self.__incoming_buffer = bytearray() self.__outgoing_buffer = bytearray() @@ -211,7 +212,8 @@ class DisposablePeer: ut_metadata = msg_dict[b"m"][b"ut_metadata"] metadata_size = msg_dict[b"metadata_size"] assert metadata_size > 0, "Invalid (empty) metada size" - assert metadata_size < MAX_METADATA_SIZE, "Malicious or malfunctioning peer tried send a huge metadata size" + assert metadata_size < self.__max_metadata_size, "Malicious or malfunctioning peer tried send above " \ + "{} limit metadata size".format(self.__max_metadata_size) except (AssertionError, KeyError): self.when_error() return diff --git a/magneticod/magneticod/constants.py b/magneticod/magneticod/constants.py new file mode 100644 index 0000000..3b76e4c --- /dev/null +++ b/magneticod/magneticod/constants.py @@ -0,0 +1,11 @@ +# coding=utf-8 +DEFAULT_MAX_METADATA_SIZE = 10 * 1024 * 1024 +BOOTSTRAPPING_NODES = [ + ("router.bittorrent.com", 6881), + ("dht.transmissionbt.com", 6881) +] +PENDING_INFO_HASHES = 10 + +TICK_INTERVAL = 1 # in seconds (soft constraint) + # maximum (inclusive) number of active (disposable) peers to fetch the metadata per info hash at the same time: +MAX_ACTIVE_PEERS_PER_INFO_HASH = 5 diff --git a/magneticod/magneticod/dht.py b/magneticod/magneticod/dht.py index dfbfb36..767c01b 100644 --- a/magneticod/magneticod/dht.py +++ b/magneticod/magneticod/dht.py @@ -20,6 +20,7 @@ import socket import typing import os +from .constants import BOOTSTRAPPING_NODES, DEFAULT_MAX_METADATA_SIZE from . import bencode NodeID = bytes @@ -28,14 +29,8 @@ PeerAddress = typing.Tuple[str, int] InfoHash = bytes -BOOTSTRAPPING_NODES = [ - ("router.bittorrent.com", 6881), - ("dht.transmissionbt.com", 6881) -] - - class SybilNode: - def __init__(self, address: typing.Tuple[str, int]): + def __init__(self, address: typing.Tuple[str, int], max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE): self.__true_id = self.__random_bytes(20) self.__socket = socket.socket(type=socket.SOCK_DGRAM) @@ -48,7 +43,7 @@ class SybilNode: self.__routing_table = {} # type: typing.Dict[NodeID, NodeAddress] self.__token_secret = self.__random_bytes(4) - + self.__max_metadata_size = max_metadata_size # Maximum number of neighbours (this is a THRESHOLD where, once reached, the search for new neighbours will # stop; but until then, the total number of neighbours might exceed the threshold). self.__n_max_neighbours = 2000 @@ -56,7 +51,8 @@ class SybilNode: logging.info("SybilNode %s on %s initialized!", self.__true_id.hex().upper(), address) @staticmethod - def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress) -> None: + def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress, + max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE) -> None: raise NotImplementedError() def on_tick(self) -> None: @@ -208,7 +204,7 @@ class SybilNode: else: peer_addr = (addr[0], port) - self.when_peer_found(info_hash, peer_addr) + self.when_peer_found(info_hash, peer_addr, self.max_metadata_size) def fileno(self) -> int: return self.__socket.fileno() diff --git a/magneticod/magneticod/persistence.py b/magneticod/magneticod/persistence.py index afd9077..231dd47 100644 --- a/magneticod/magneticod/persistence.py +++ b/magneticod/magneticod/persistence.py @@ -18,11 +18,9 @@ import time import typing import os -from . import bencode - - # threshold for pending info hashes before being committed to database: -PENDING_INFO_HASHES = 10 + +from .constants import PENDING_INFO_HASHES class Database: diff --git a/magneticod/setup.py b/magneticod/setup.py index 1219fe4..b8ba4d9 100644 --- a/magneticod/setup.py +++ b/magneticod/setup.py @@ -23,7 +23,8 @@ setup( install_requires=[ "appdirs >= 1.4.3", - "bencoder.pyx >= 1.1.3" + "bencoder.pyx >= 1.1.3", + "humanfriendly" ], classifiers=[