Extract constants (fixes #37), allows you to specify a size limit and database path as an argument
This commit is contained in:
parent
14979c96ce
commit
e826494453
@ -27,18 +27,15 @@ import time
|
||||
import typing
|
||||
|
||||
import appdirs
|
||||
import humanfriendly
|
||||
|
||||
from .constants import TICK_INTERVAL, MAX_ACTIVE_PEERS_PER_INFO_HASH, DEFAULT_MAX_METADATA_SIZE
|
||||
from . import __version__
|
||||
from . import bittorrent
|
||||
from . import dht
|
||||
from . import persistence
|
||||
|
||||
|
||||
TICK_INTERVAL = 1 # in seconds (soft constraint)
|
||||
# maximum (inclusive) number of active (disposable) peers to fetch the metadata per info hash at the same time:
|
||||
MAX_ACTIVE_PEERS_PER_INFO_HASH = 5
|
||||
|
||||
|
||||
# Global variables are bad bla bla bla, BUT these variables are used so many times that I think it is justified; else
|
||||
# the signatures of many functions are literally cluttered.
|
||||
#
|
||||
@ -64,7 +61,7 @@ def main():
|
||||
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
path = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3")
|
||||
path =arguments.database_file
|
||||
database = persistence.Database(path)
|
||||
except:
|
||||
logging.exception("could NOT connect to the database!")
|
||||
@ -72,7 +69,7 @@ def main():
|
||||
|
||||
complete_info_hashes = database.get_complete_info_hashes()
|
||||
|
||||
node = dht.SybilNode(arguments.node_addr)
|
||||
node = dht.SybilNode(arguments.node_addr, max_metadata_size=arguments.metadata_size_limit)
|
||||
node.when_peer_found = on_peer_found
|
||||
|
||||
selector.register(node, selectors.EVENT_READ)
|
||||
@ -92,14 +89,14 @@ def main():
|
||||
return 0
|
||||
|
||||
|
||||
def on_peer_found(info_hash: dht.InfoHash, peer_address) -> None:
|
||||
def on_peer_found(info_hash: dht.InfoHash, peer_address, max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE) -> None:
|
||||
global selector, peers, complete_info_hashes
|
||||
|
||||
if len(peers[info_hash]) > MAX_ACTIVE_PEERS_PER_INFO_HASH or info_hash in complete_info_hashes:
|
||||
return
|
||||
|
||||
try:
|
||||
peer = bittorrent.DisposablePeer(info_hash, peer_address)
|
||||
peer = bittorrent.DisposablePeer(info_hash, peer_address, max_metadata_size)
|
||||
except ConnectionError:
|
||||
return
|
||||
|
||||
@ -171,6 +168,13 @@ def loop() -> None:
|
||||
selector.modify(fileobj, selectors.EVENT_READ)
|
||||
|
||||
|
||||
def parse_size(value: str) -> int:
|
||||
try:
|
||||
return humanfriendly.parse_size(value)
|
||||
except humanfriendly.InvalidSize as e:
|
||||
raise argparse.ArgumentTypeError("Invalid argument. {}".format(e))
|
||||
|
||||
|
||||
def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Autonomous BitTorrent DHT crawler and metadata fetcher.",
|
||||
@ -194,13 +198,25 @@ def parse_cmdline_arguments() -> typing.Optional[argparse.Namespace]:
|
||||
allow_abbrev=False,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--node-addr", action="store", type=str, required=False,
|
||||
help="the address of the (DHT) node magneticod will use"
|
||||
)
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
parser.add_argument(
|
||||
"--metadata-size-limit", type=parse_size, default=DEFAULT_MAX_METADATA_SIZE,
|
||||
help="Limit metadata size to protect memory overflow"
|
||||
)
|
||||
|
||||
default_database_dir = os.path.join(appdirs.user_data_dir("magneticod"), "database.sqlite3")
|
||||
parser.add_argument(
|
||||
"--database-file", type=str, default=default_database_dir,
|
||||
help="Path to database file (default: {})".format(default_database_dir)
|
||||
)
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
print(args.metadata_size_limit)
|
||||
args.node_addr = parse_ip_port(args.node_addr) if args.node_addr else ("0.0.0.0", 0)
|
||||
if args.node_addr is None:
|
||||
logging.critical("Invalid node address supplied!")
|
||||
|
@ -21,15 +21,14 @@ import typing
|
||||
import os
|
||||
|
||||
from . import bencode
|
||||
|
||||
MAX_METADATA_SIZE = 5*1024*1024
|
||||
from .constants import DEFAULT_MAX_METADATA_SIZE
|
||||
|
||||
InfoHash = bytes
|
||||
PeerAddress = typing.Tuple[str, int]
|
||||
|
||||
|
||||
class DisposablePeer:
|
||||
def __init__(self, info_hash: InfoHash, peer_addr: PeerAddress):
|
||||
def __init__(self, info_hash: InfoHash, peer_addr: PeerAddress, max_metadata_size: int= DEFAULT_MAX_METADATA_SIZE):
|
||||
self.__socket = socket.socket()
|
||||
self.__socket.setblocking(False)
|
||||
# To reduce the latency:
|
||||
@ -43,6 +42,8 @@ class DisposablePeer:
|
||||
|
||||
self.__info_hash = info_hash
|
||||
|
||||
self.__max_metadata_size = max_metadata_size
|
||||
|
||||
self.__incoming_buffer = bytearray()
|
||||
self.__outgoing_buffer = bytearray()
|
||||
|
||||
@ -211,7 +212,8 @@ class DisposablePeer:
|
||||
ut_metadata = msg_dict[b"m"][b"ut_metadata"]
|
||||
metadata_size = msg_dict[b"metadata_size"]
|
||||
assert metadata_size > 0, "Invalid (empty) metada size"
|
||||
assert metadata_size < MAX_METADATA_SIZE, "Malicious or malfunctioning peer tried send a huge metadata size"
|
||||
assert metadata_size < self.__max_metadata_size, "Malicious or malfunctioning peer tried send above " \
|
||||
"{} limit metadata size".format(self.__max_metadata_size)
|
||||
except (AssertionError, KeyError):
|
||||
self.when_error()
|
||||
return
|
||||
|
11
magneticod/magneticod/constants.py
Normal file
11
magneticod/magneticod/constants.py
Normal file
@ -0,0 +1,11 @@
|
||||
# coding=utf-8
|
||||
DEFAULT_MAX_METADATA_SIZE = 10 * 1024 * 1024
|
||||
BOOTSTRAPPING_NODES = [
|
||||
("router.bittorrent.com", 6881),
|
||||
("dht.transmissionbt.com", 6881)
|
||||
]
|
||||
PENDING_INFO_HASHES = 10
|
||||
|
||||
TICK_INTERVAL = 1 # in seconds (soft constraint)
|
||||
# maximum (inclusive) number of active (disposable) peers to fetch the metadata per info hash at the same time:
|
||||
MAX_ACTIVE_PEERS_PER_INFO_HASH = 5
|
@ -20,6 +20,7 @@ import socket
|
||||
import typing
|
||||
import os
|
||||
|
||||
from .constants import BOOTSTRAPPING_NODES, DEFAULT_MAX_METADATA_SIZE
|
||||
from . import bencode
|
||||
|
||||
NodeID = bytes
|
||||
@ -28,14 +29,8 @@ PeerAddress = typing.Tuple[str, int]
|
||||
InfoHash = bytes
|
||||
|
||||
|
||||
BOOTSTRAPPING_NODES = [
|
||||
("router.bittorrent.com", 6881),
|
||||
("dht.transmissionbt.com", 6881)
|
||||
]
|
||||
|
||||
|
||||
class SybilNode:
|
||||
def __init__(self, address: typing.Tuple[str, int]):
|
||||
def __init__(self, address: typing.Tuple[str, int], max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE):
|
||||
self.__true_id = self.__random_bytes(20)
|
||||
|
||||
self.__socket = socket.socket(type=socket.SOCK_DGRAM)
|
||||
@ -48,7 +43,7 @@ class SybilNode:
|
||||
self.__routing_table = {} # type: typing.Dict[NodeID, NodeAddress]
|
||||
|
||||
self.__token_secret = self.__random_bytes(4)
|
||||
|
||||
self.__max_metadata_size = max_metadata_size
|
||||
# Maximum number of neighbours (this is a THRESHOLD where, once reached, the search for new neighbours will
|
||||
# stop; but until then, the total number of neighbours might exceed the threshold).
|
||||
self.__n_max_neighbours = 2000
|
||||
@ -56,7 +51,8 @@ class SybilNode:
|
||||
logging.info("SybilNode %s on %s initialized!", self.__true_id.hex().upper(), address)
|
||||
|
||||
@staticmethod
|
||||
def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress) -> None:
|
||||
def when_peer_found(info_hash: InfoHash, peer_addr: PeerAddress,
|
||||
max_metadata_size: int=DEFAULT_MAX_METADATA_SIZE) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def on_tick(self) -> None:
|
||||
@ -208,7 +204,7 @@ class SybilNode:
|
||||
else:
|
||||
peer_addr = (addr[0], port)
|
||||
|
||||
self.when_peer_found(info_hash, peer_addr)
|
||||
self.when_peer_found(info_hash, peer_addr, self.max_metadata_size)
|
||||
|
||||
def fileno(self) -> int:
|
||||
return self.__socket.fileno()
|
||||
|
@ -18,11 +18,9 @@ import time
|
||||
import typing
|
||||
import os
|
||||
|
||||
from . import bencode
|
||||
|
||||
|
||||
# threshold for pending info hashes before being committed to database:
|
||||
PENDING_INFO_HASHES = 10
|
||||
|
||||
from .constants import PENDING_INFO_HASHES
|
||||
|
||||
|
||||
class Database:
|
||||
|
@ -23,7 +23,8 @@ setup(
|
||||
|
||||
install_requires=[
|
||||
"appdirs >= 1.4.3",
|
||||
"bencoder.pyx >= 1.1.3"
|
||||
"bencoder.pyx >= 1.1.3",
|
||||
"humanfriendly"
|
||||
],
|
||||
|
||||
classifiers=[
|
||||
|
Loading…
Reference in New Issue
Block a user