diff --git a/magneticod/magneticod/bencode.py b/magneticod/magneticod/bencode.py index e65e209..126af0c 100755 --- a/magneticod/magneticod/bencode.py +++ b/magneticod/magneticod/bencode.py @@ -13,26 +13,14 @@ # You should have received a copy of the GNU Affero General Public License along with this program. If not, see # . - """ -bencode - Wrapper around bencoder.pyx library. - -bencoder.pyx - Copyright (c) 2016, whtsky - All rights reserved. - https://github.com/whtsky/bencoder.pyx - Warning: Encoders do NOT check for circular objects! (and will NEVER check due to speed concerns). - -TODO: - Support bytearrays as well! (Currently, only bytes). """ + import typing -import bencoder - +from magneticod.bencoder import dumps, loads, loads2 Types = typing.Union[int, bytes, list, "KRPCDict"] KRPCDict = typing.Dict[bytes, Types] @@ -40,14 +28,14 @@ KRPCDict = typing.Dict[bytes, Types] def dumps(obj) -> bytes: try: - return bencoder.bencode(obj) + return dumps(obj) except: raise BencodeEncodingError() def loads(bytes_object: bytes) -> Types: try: - return bencoder.decode_func[bytes_object[0]](bytes_object, 0)[0] + return loads(bytes_object) except Exception as exc: raise BencodeDecodingError(exc) @@ -61,7 +49,7 @@ def loads2(bytes_object: bytes) -> typing.Tuple[Types, int]: print(">>>", dump[i:]) # OUTPUT: >>> b'OH YEAH' """ try: - return bencoder.decode_func[bytes_object[0]](bytes_object, 0) + return loads2(bytes_object) except Exception as exc: raise BencodeDecodingError(exc) diff --git a/magneticod/magneticod/bencoder/LICENSE b/magneticod/magneticod/bencoder/LICENSE new file mode 100644 index 0000000..70a2b4e --- /dev/null +++ b/magneticod/magneticod/bencoder/LICENSE @@ -0,0 +1,32 @@ +Copyright (c) 2015 by Krzysztof Kosyl + +Some rights reserved. + +Redistribution and use in source and binary forms of the software as well +as documentation, with or without modification, are permitted provided +that the following conditions are met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +* The names of the contributors may not be used to endorse or + promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. diff --git a/magneticod/magneticod/bencoder/README.rst b/magneticod/magneticod/bencoder/README.rst new file mode 100644 index 0000000..52f3fc3 --- /dev/null +++ b/magneticod/magneticod/bencoder/README.rst @@ -0,0 +1,10 @@ +This sub-module is a fork of Krzysztof Kosyl's [better-bencode](https://github.com/kosqx/better-bencode/) for the +specific needs of **magetico**. + +The original repository is forked at commit `46bdc09f1b3003b39aa4263e0a052883a5209c2a`. + +Key Differenes from *better-bencode*: + +* Python 2 support is removed. +* Removed `dump` and `load` functions, as they are not used and most likely will not be maintained. It's better not to + have them than to have two different set of functions with inconsistent, confusing behaviour. diff --git a/magneticod/magneticod/bencoder/__init__.py b/magneticod/magneticod/bencoder/__init__.py new file mode 100644 index 0000000..6a753fd --- /dev/null +++ b/magneticod/magneticod/bencoder/__init__.py @@ -0,0 +1,6 @@ +try: + from magneticod.bencoder._fast import dumps, loads, loads2 + from magneticod.bencoder._fast import BencodeValueError, BencodeTypeError +except ImportError: + from magneticod.bencoder._pure import dumps, loads, loads2 + from magneticod.bencoder._pure import BencodeValueError, BencodeTypeError diff --git a/magneticod/magneticod/bencoder/_fast.c b/magneticod/magneticod/bencoder/_fast.c new file mode 100644 index 0000000..d2aeb01 --- /dev/null +++ b/magneticod/magneticod/bencoder/_fast.c @@ -0,0 +1,545 @@ +#include + + +#define PY_BUILD_VALUE_BYTES "y#" +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyString_AsStringAndSize PyBytes_AsStringAndSize +#define PyString_Size PyBytes_Size +#define PyInt_CheckExact(obj) 0 + + +struct benc_state { + unsigned int cast : 1; + + int size; + int offset; + char* buffer; + PyObject* file; + + PyObject** references_stack; + int references_size; + int references_top; +}; + + +PyObject* BencodeValueError; +PyObject* BencodeTypeError; + + +static void benc_state_init(struct benc_state* bs) { + bs->size = 256; + bs->offset = 0; + bs->buffer = malloc(bs->size); + bs->file = NULL; + + bs->references_size = 8; + bs->references_top = 0; + bs->references_stack = malloc(sizeof(PyObject*) * bs->references_size); +} + + +static void benc_state_free(struct benc_state* bs) { + if (bs->buffer != NULL) { + free(bs->buffer); + } + if (bs->references_stack != NULL) { + free(bs->references_stack); + } +} + + +static void benc_state_flush(struct benc_state* bs) { + if (bs->offset > 0) { + PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, bs->buffer, bs->offset); + bs->offset = 0; + } +} + + +static void benc_state_write_char(struct benc_state* bs, char c) { + if (bs->file == NULL) { + if ((bs->offset + 1) >= bs->size) { + bs->buffer = realloc(bs->buffer, bs->size * 2); + } + bs->buffer[bs->offset++] = c; + } else { + if ((bs->offset + 1) >= bs->size) { + PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, bs->buffer, bs->offset); + bs->offset = 0; + } + bs->buffer[bs->offset++] = c; + } +} + + +static void benc_state_write_buffer(struct benc_state* bs, char* buff, int size) { + if (bs->file == NULL) { + int new_size; + for (new_size = bs->size; new_size <= (bs->offset + size); new_size *= 2); + if (new_size > bs->size) { + bs->buffer = realloc(bs->buffer, new_size); + bs->size = new_size; + } + memcpy(bs->buffer + bs->offset, buff, size); + bs->offset += size; + } else { + if (bs->offset + size >= bs->size) { + PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, bs->buffer, bs->offset); + bs->offset = 0; + } + if (size >= bs->size) { + PyObject_CallMethod(bs->file, "write", PY_BUILD_VALUE_BYTES, buff, size); + } else { + memcpy(bs->buffer + bs->offset, buff, size); + bs->offset += size; + } + } +} + + +static void benc_state_write_format(struct benc_state* bs, const int limit, const void *format, ...) { + char buffer[limit + 1]; // moze by malloca()? + + va_list ap; + va_start(ap, format); + int size = vsnprintf(buffer, limit, format, ap); + va_end(ap); + + return benc_state_write_buffer(bs, buffer, (size < limit) ? size : (limit - 1)); +} + + +static int benc_state_read_char(struct benc_state* bs) { + if (bs->file == NULL) { + if (bs->offset < bs->size) { + return bs->buffer[bs->offset++]; + } else { + return -1; + } + } else { + char *buffer; + int result; + Py_ssize_t length; + PyObject *data = PyObject_CallMethod(bs->file, "read", "i", 1); + if (-1 == PyString_AsStringAndSize(data, &buffer, &length)) { + return -1; + } + if (length == 1) { + result = buffer[0]; + } else { + result = -1; + } + Py_DECREF(data); + return result; + } +} + + +static PyObject *benc_state_read_pystring(struct benc_state* bs, int size) { + if (bs->file == NULL) { + if (bs->offset + size <= bs->size) { + PyObject *result = PyString_FromStringAndSize(bs->buffer + bs->offset, size); + bs->offset += size; + return result; + } else { + PyErr_Format( + BencodeValueError, + "unexpected end of data" + ); + return NULL; + } + } else { + PyObject *result = PyObject_CallMethod(bs->file, "read", "i", size); + if (PyString_Size(result) == size) { + return result; + } else { + Py_DECREF(result); + PyErr_Format( + BencodeValueError, + "unexpected end of data" + ); + return NULL; + } + } +} + +static void benc_state_references_push(struct benc_state* bs, PyObject *obj) { + if ((bs->references_top + 1) == bs->references_size) { + bs->references_size *= 2; + bs->references_stack = realloc( + bs->references_stack, + sizeof(PyObject*) * bs->references_size + ); + } + bs->references_stack[bs->references_top++] = obj; +} + +static void benc_state_references_pop(struct benc_state* bs) { + bs->references_top--; +} + +static int benc_state_references_contains(struct benc_state* bs, PyObject *obj) { + int i; + for (i = 0; i < bs->references_top; i++) { + if (bs->references_stack[i] == obj) { + return 1; + } + } + return 0; +} + + +static int do_dump(struct benc_state *bs, PyObject* obj); + +static int do_dump(struct benc_state *bs, PyObject* obj) { + int i = 0, n = 0; + + if (benc_state_references_contains(bs, obj)) { + PyErr_Format( + BencodeValueError, + "circular reference detected" + ); + return 0; + } + + if (PyBytes_CheckExact(obj)) { + char *buff = PyBytes_AS_STRING(obj); + int size = PyBytes_GET_SIZE(obj); + + benc_state_write_format(bs, 12, "%d:", size); + benc_state_write_buffer(bs, buff, size); + } else if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) { + long x = PyLong_AsLong(obj); + benc_state_write_format(bs, 23, "i%lde", x); + } else if (bs->cast && PyBool_Check(obj)) { + long x = PyLong_AsLong(obj); + benc_state_write_format(bs, 4, "i%lde", x); + } else if (PyList_CheckExact(obj) || (bs->cast && PyList_Check(obj))) { + n = PyList_GET_SIZE(obj); + benc_state_references_push(bs, obj); + benc_state_write_char(bs, 'l'); + for (i = 0; i < n; i++) { + do_dump(bs, PyList_GET_ITEM(obj, i)); + } + benc_state_write_char(bs, 'e'); + benc_state_references_pop(bs); + } else if (bs->cast && PyTuple_Check(obj)) { + n = PyTuple_GET_SIZE(obj); + benc_state_references_push(bs, obj); + benc_state_write_char(bs, 'l'); + for (i = 0; i < n; i++) { + do_dump(bs, PyTuple_GET_ITEM(obj, i)); + } + benc_state_write_char(bs, 'e'); + benc_state_references_pop(bs); + } else if (PyDict_CheckExact(obj)) { + Py_ssize_t index = 0; + PyObject *keys, *key, *value; + keys = PyDict_Keys(obj); + PyList_Sort(keys); + + benc_state_references_push(bs, obj); + benc_state_write_char(bs, 'd'); + for (index = 0; index < PyList_Size(keys); index++) { + key = PyList_GetItem(keys, index); + value = PyDict_GetItem(obj, key); + do_dump(bs, key); + do_dump(bs, value); + } + benc_state_write_char(bs, 'e'); + benc_state_references_pop(bs); + + Py_DECREF(keys); + } else { + PyErr_Format( + BencodeTypeError, + "type %s is not Bencode serializable", + Py_TYPE(obj)->tp_name + ); + } + return 0; +} + + +static PyObject* dumps(PyObject* self, PyObject* args, PyObject* kwargs) { + static char *kwlist[] = {"obj", "cast", NULL}; + + PyObject* obj; + PyObject* result; + int cast = 0; + + struct benc_state bs; + benc_state_init(&bs); + + if (!PyArg_ParseTupleAndKeywords( + args, kwargs, "O|i", kwlist, + &obj, &cast + )) + { + return NULL; + } + + bs.cast = !!cast; + + do_dump(&bs, obj); + + if (PyErr_Occurred()) { + benc_state_free(&bs); + return NULL; + } else { + result = Py_BuildValue(PY_BUILD_VALUE_BYTES, bs.buffer, bs.offset); + benc_state_free(&bs); + return result; + } +} + + +static PyObject *do_load(struct benc_state *bs) { + PyObject *retval = NULL; + + int first = benc_state_read_char(bs); + + switch (first) { + case 'i': { + int sign = 1; + int read_cnt = 0; + long long value = 0; + int current = benc_state_read_char(bs); + if (current == '-') { + sign = -1; + current = benc_state_read_char(bs); + } + while (('0' <= current) && (current <= '9')) { + value = value * 10 + (current - '0'); + current = benc_state_read_char(bs); + read_cnt++; + } + + if ('e' == current) { + if (read_cnt > 0) { + value *= sign; + retval = PyLong_FromLongLong(value); + } else { + PyErr_Format( + BencodeValueError, + "unexpected end of data" + ); + retval = NULL; + } + } else if (-1 == current) { + PyErr_Format( + BencodeValueError, + "unexpected end of data" + ); + retval = NULL; + } else { + PyErr_Format( + BencodeValueError, + "unexpected byte 0x%.2x", + current + ); + retval = NULL; + } + + } break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + int size = first - '0'; + char current = benc_state_read_char(bs); + while (('0' <= current) && (current <= '9')) { + size = size * 10 + (current - '0'); + current = benc_state_read_char(bs); + } + if (':' == current) { + retval = benc_state_read_pystring(bs, size); + } else if (-1 == current) { + PyErr_Format( + BencodeValueError, + "unexpected end of data" + ); + retval = NULL; + } else { + PyErr_Format( + BencodeValueError, + "unexpected byte 0x%.2x", + current + ); + retval = NULL; + } + + } break; + case 'e': + Py_INCREF(PyExc_StopIteration); + retval = PyExc_StopIteration; + break; + case 'l': { + PyObject *v = PyList_New(0); + PyObject *item; + + while (1) { + item = do_load(bs); + + if (item == PyExc_StopIteration) { + Py_DECREF(PyExc_StopIteration); + break; + } + + if (item == NULL) { + if (!PyErr_Occurred()) { + PyErr_SetString( + BencodeTypeError, + "unexpected error in list" + ); + } + Py_DECREF(v); + v = NULL; + break; + } + + PyList_Append(v, item); + Py_DECREF(item); + } + + retval = v; + } break; + case 'd': { + PyObject *v = PyDict_New(); + + while (1) { + PyObject *key, *val; + key = val = NULL; + key = do_load(bs); + + if (key == PyExc_StopIteration) { + Py_DECREF(PyExc_StopIteration); + break; + } + + if (key == NULL) { + if (!PyErr_Occurred()) { + PyErr_SetString(BencodeTypeError, "unexpected error in dict"); + } + break; + } + + val = do_load(bs); + if (val != NULL) { + PyDict_SetItem(v, key, val); + } else { + if (!PyErr_Occurred()) { + PyErr_SetString(BencodeTypeError, "unexpected error in dict"); + } + break; + } + Py_DECREF(key); + Py_XDECREF(val); + } + if (PyErr_Occurred()) { + Py_DECREF(v); + v = NULL; + } + retval = v; + } break; + case -1: { + PyErr_Format( + BencodeValueError, + "unexpected end of data" + ); + retval = NULL; + } break; + default: + PyErr_Format( + BencodeValueError, + "unexpected byte 0x%.2x", + first + ); + retval = NULL; + break; + } + return retval; +} + + +static PyObject* loads(PyObject* self, PyObject* args) { + struct benc_state bs; + memset(&bs, 0, sizeof(struct benc_state)); + + if (!PyArg_ParseTuple(args, PY_BUILD_VALUE_BYTES, &(bs.buffer), &(bs.size))) + return NULL; + + PyObject* obj = do_load(&bs); + + return obj; +} + + +static PyObject* loads2(PyObject* self, PyObject* args) { + /* TODO: + * + * PyLong_FromLong and PyTuple_Pack might return NULL. How to handle these errors? + */ + struct benc_state bs; + memset(&bs, 0, sizeof(struct benc_state)); + + if (!PyArg_ParseTuple(args, PY_BUILD_VALUE_BYTES, &(bs.buffer), &(bs.size))) + return NULL; + + PyObject* obj = do_load(&bs); + PyObject* offset = PyLong_FromLong((long) bs.offset); + + return PyTuple_Pack(2, obj, offset); +} + + +static PyObject *add_errors(PyObject *module) { + BencodeValueError = PyErr_NewException( + "bencoder._fast.BencodeValueError", PyExc_ValueError, NULL + ); + Py_INCREF(BencodeValueError); + PyModule_AddObject(module, "BencodeValueError", BencodeValueError); + + BencodeTypeError = PyErr_NewException( + "bencoder._fast.BencodeTypeError", PyExc_TypeError, NULL + ); + Py_INCREF(BencodeTypeError); + PyModule_AddObject(module, "BencodeTypeError", BencodeTypeError); + + return module; +} + + +static PyMethodDef bencoder_fastMethods[] = { + {"loads", loads, METH_VARARGS, "Deserialize ``s`` to a Python object."}, + {"loads2", loads2, METH_VARARGS, "Deserialize ``s`` to a Python object and return end index."}, + {"dumps", dumps, METH_VARARGS|METH_KEYWORDS, "Serialize ``obj`` to a Bencode formatted ``str``."}, + {NULL, NULL, 0, NULL} +}; + + +static struct PyModuleDef bencoder_fast_module = { + PyModuleDef_HEAD_INIT, + "bencoder._fast", + NULL, + -1, + bencoder_fastMethods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__fast(void) { + PyObject *module = PyModule_Create(&bencoder_fast_module); + return add_errors(module); + +} diff --git a/magneticod/magneticod/bencoder/_pure.py b/magneticod/magneticod/bencoder/_pure.py new file mode 100644 index 0000000..9ee204d --- /dev/null +++ b/magneticod/magneticod/bencoder/_pure.py @@ -0,0 +1,152 @@ +""" +Pure Python implementation of Bencode serialization format. +To be used when fast C Extension cannot be compiled. +""" +from io import BytesIO as StringIO + + +INTEGER_TYPES = (int,) +BINARY_TYPES = (bytes, ) +int_to_binary = lambda val: bytes(str(val), 'ascii') + + +class BencodeValueError(ValueError): + pass + + +class BencodeTypeError(TypeError): + pass + + +def _dump_implementation(obj, write, path, cast): + """ dump()/dumps() implementation """ + + t = type(obj) + + if id(obj) in path: + raise BencodeValueError('circular reference detected') + + if t in INTEGER_TYPES: + write(b'i') + write(int_to_binary(obj)) + write(b'e') + elif t in BINARY_TYPES: + write(int_to_binary(len(obj))) + write(b':') + write(obj) + elif t is list or (cast and issubclass(t, (list, tuple))): + write(b'l') + for item in obj: + _dump_implementation(item, write, path + [id(obj)], cast) + write(b'e') + elif t is dict: + write(b'd') + + data = sorted(obj.items()) + for key, val in data: + _dump_implementation(key, write, path + [id(obj)], cast) + _dump_implementation(val, write, path + [id(obj)], cast) + write(b'e') + elif cast and t is bool: + write(b'i') + write(int_to_binary(int(obj))) + write(b'e') + else: + raise BencodeTypeError( + 'type %s is not Bencode serializable' % type(obj).__name__ + ) + + +def dumps(obj, cast=False): + """Serialize ``obj`` to a Bencode formatted ``str``.""" + + fp = [] + _dump_implementation(obj, fp.append, [], cast) + return b''.join(fp) + + +def _read_until(delimiter, read): + """ Read char by char until ``delimiter`` occurs. """ + + result = b'' + ch = read(1) + if not ch: + raise BencodeValueError('unexpected end of data') + while ch != delimiter: + result += ch + ch = read(1) + if not ch: + raise BencodeValueError('unexpected end of data') + return result + + +def _load_implementation(read): + """ load()/loads() implementation """ + + first = read(1) + + if first == b'e': + return StopIteration + elif first == b'i': + value = b'' + ch = read(1) + while (b'0' <= ch <= b'9') or (ch == b'-'): + value += ch + ch = read(1) + if ch == b'' or (ch == b'e' and value in (b'', b'-')): + raise BencodeValueError('unexpected end of data') + if ch != b'e': + raise BencodeValueError('unexpected byte 0x%.2x' % ord(ch)) + return int(value) + elif b'0' <= first <= b'9': + size = 0 + while b'0' <= first <= b'9': + size = size * 10 + (ord(first) - ord('0')) + first = read(1) + if first == b'': + raise BencodeValueError('unexpected end of data') + if first != b':': + raise BencodeValueError('unexpected byte 0x%.2x' % ord(first)) + data = read(size) + if len(data) != size: + raise BencodeValueError('unexpected end of data') + return data + elif first == b'l': + result = [] + while True: + val = _load_implementation(read) + if val is StopIteration: + return result + result.append(val) + elif first == b'd': + result = {} + while True: + this = read(1) + if this == b'e': + return result + elif this == b'': + raise BencodeValueError('unexpected end of data') + elif not this.isdigit(): + raise BencodeValueError('unexpected byte 0x%.2x' % ord(this)) + size = int(this + _read_until(b':', read)) + key = read(size) + val = _load_implementation(read) + result[key] = val + elif first == b'': + raise BencodeValueError('unexpected end of data') + else: + raise BencodeValueError('unexpected byte 0x%.2x' % ord(first)) + + +def loads(data): + """Deserialize ``s`` to a Python object.""" + + fp = StringIO(data) + return _load_implementation(fp.read) + + +def loads2(data): + """Deserialize ``s`` to a Python object.""" + + fp = StringIO(data) + return _load_implementation(fp.read), fp.tell() diff --git a/magneticod/setup.py b/magneticod/setup.py index b71ad1a..4c90787 100644 --- a/magneticod/setup.py +++ b/magneticod/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup +from setuptools import find_packages, setup, Extension def read_file(path): @@ -15,7 +15,7 @@ setup( author="Mert Bora ALPER", author_email="bora@boramalper.org", license="GNU Affero General Public License v3 or later (AGPLv3+)", - packages=["magneticod"], + packages=find_packages(), zip_safe=False, entry_points={ "console_scripts": ["magneticod=magneticod.__main__:main"] @@ -36,5 +36,12 @@ setup( "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: Implementation :: CPython", + ], + + ext_modules=[ + Extension( + "magneticod.bencoder._fast", + sources=["magneticod/bencoder/_fast.c"], + ), ] )