From 0d21cb88622393fa3d4ddc1932f96942a2bd8f62 Mon Sep 17 00:00:00 2001 From: Joannah Nanjekye Date: Tue, 12 Nov 2024 18:57:42 -0400 Subject: [PATCH 1/4] Track without evading PyObject --- Include/object.h | 2 ++ Objects/object.c | 51 +++++++++++++++++++++++++++++++++++++++++ Objects/stringobject.c | 5 ++++ Objects/unicodeobject.c | 1 + 4 files changed, 59 insertions(+) diff --git a/Include/object.h b/Include/object.h index 807b24188a75b2..46b735e7b9099b 100644 --- a/Include/object.h +++ b/Include/object.h @@ -465,6 +465,8 @@ PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); #ifdef Py_USING_UNICODE PyAPI_FUNC(PyObject *) PyObject_Unicode(PyObject *); #endif +PyAPI_DATA(Py_ssize_t) PyObject_GetBState(PyObject *); +PyAPI_FUNC() PyObject_Unicode(PyObject *); PyAPI_FUNC(int) PyObject_Compare(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); PyAPI_FUNC(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); diff --git a/Objects/object.c b/Objects/object.c index 65366b0b351b4c..d40805f4b358d0 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -471,6 +471,57 @@ PyObject_Str(PyObject *v) return res; } +PyObject * +PyObject_GetBState(PyObject *v) +{ + PyUnicodeObject *uni; + PyStringObject *str; + PyBytesObject *byt; + Py_ssize_t bbstate; + + if (v == NULL) { + return 0 + } + else { + if (!PyUnicode_CheckExact(v) || !PyByte_CheckExact(v)) { + return -1; + } + if (PyUnicode_CheckExact(v)) { + uni = (PyUnicodeObject *) PyUnicode_FromObject(v); + if (uni == NULL) + return 0; + bbstate = uni->ob_bstate; + if (bbstate == NULL) + Py_XDECREF(uni); + return 0; + Py_DECREF(uni); + return bbstate; + } + if (PyString_CheckExact(v)) { + str = (PyStringObject *) v; + if (str == NULL) + return 0; + bbstate = str->ob_bstate; + if (bbstate == NULL) + Py_XDECREF(str); + return 0; + Py_DECREF(str); + return bbstate; + } + if (PyBytes_CheckExact(v)) { + byt = (PyBytesObject *) v; + if (byt == NULL) + return 0; + bbstate = byt->ob_bstate; + if (bbstate == NULL) + Py_XDECREF(byt); + return 0; + Py_DECREF(byt); + return bbstate; + } + } +} + #ifdef Py_USING_UNICODE PyObject * PyObject_Unicode(PyObject *v) diff --git a/Objects/stringobject.c b/Objects/stringobject.c index d4b536565f760c..70eb6b6371df48 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3728,6 +3728,11 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; if (x == NULL) return PyString_FromString(""); + if (PyBytes_CheckExact(v)) { + (PyStringObject *)x->ob_bstate = BSTATE_NOT_SURE; + } else { + (PyBytesObject *)x->ob_bstate = BSTATE_BYTE; + } return PyObject_Str(x); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c00f7f365555d3..cadc04b8766856 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8859,6 +8859,7 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; if (x == NULL) return (PyObject *)_PyUnicode_New(0); + (PyUnicodeObject *)x->ob_bstate = BSTATE_UNICODE; if (encoding == NULL && errors == NULL) return PyObject_Unicode(x); else From fe04ae8f0793aa9c3013126816acdf815107a578 Mon Sep 17 00:00:00 2001 From: Joannah Nanjekye Date: Tue, 26 Nov 2024 18:38:28 -0400 Subject: [PATCH 2/4] Example m = x + by --- Include/object.h | 1 - Objects/object.c | 8 +++++--- Objects/stringobject.c | 32 ++++++++++++++++++++++++++++---- Objects/unicodeobject.c | 2 +- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/Include/object.h b/Include/object.h index 46b735e7b9099b..43181a4b5fde9d 100644 --- a/Include/object.h +++ b/Include/object.h @@ -466,7 +466,6 @@ PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); PyAPI_FUNC(PyObject *) PyObject_Unicode(PyObject *); #endif PyAPI_DATA(Py_ssize_t) PyObject_GetBState(PyObject *); -PyAPI_FUNC() PyObject_Unicode(PyObject *); PyAPI_FUNC(int) PyObject_Compare(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); PyAPI_FUNC(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); diff --git a/Objects/object.c b/Objects/object.c index d40805f4b358d0..e9b3dd9acf1e84 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -471,7 +471,7 @@ PyObject_Str(PyObject *v) return res; } -PyObject * +Py_ssize_t PyObject_GetBState(PyObject *v) { PyUnicodeObject *uni; @@ -480,10 +480,10 @@ PyObject_GetBState(PyObject *v) Py_ssize_t bbstate; if (v == NULL) { - return 0 + return 0; } else { - if (!PyUnicode_CheckExact(v) || !PyByte_CheckExact(v)) { + if (!PyUnicode_CheckExact(v) || !PyBytes_CheckExact(v)) { return -1; } if (PyUnicode_CheckExact(v)) { @@ -497,6 +497,8 @@ PyObject_GetBState(PyObject *v) Py_DECREF(uni); return bbstate; } + // Alias, what do we do? + // Lets revisit this discussion we might have had a year back if (PyString_CheckExact(v)) { str = (PyStringObject *) v; if (str == NULL) diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 70eb6b6371df48..780ab0609b1207 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1592,6 +1592,9 @@ string_rsplit(PyStringObject *self, PyObject *args) } +// Example we are testing +// m = "x" + b"y" + PyDoc_STRVAR(join__doc__, "S.join(iterable) -> string\n\ \n\ @@ -1692,6 +1695,26 @@ string_join(PyStringObject *self, PyObject *orig) } Py_DECREF(seq); + // Should have been set before + assert(PyObject_GetBState(self) == 0); + // A bit wierd, no?? + if (((PyStringObject *)item)->ob_bstate != BSTATE_NOT_SURE) { + if (PyBytes_CheckExact(item)) { + if (((PyBytesObject *)item)->ob_bstate == NULL) { + ((PyBytesObject *)item)->ob_bstate = BSTATE_BYTE; + } + self->ob_bstate = PyObject_GetBState(item); + ((PyBytesObject *)res)->ob_bstate = BSTATE_BYTE; + } + if (PyUnicode_Check(item)) { + if (((PyUnicodeObject *)item)->ob_bstate == NULL) { + ((PyUnicodeObject *)item)->ob_bstate = BSTATE_BYTE; + } + self->ob_bstate = PyObject_GetBState(item); + ((PyUnicodeObject *)res)->ob_bstate = BSTATE_BYTE; + } + + } return res; } @@ -3728,10 +3751,11 @@ string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; if (x == NULL) return PyString_FromString(""); - if (PyBytes_CheckExact(v)) { - (PyStringObject *)x->ob_bstate = BSTATE_NOT_SURE; - } else { - (PyBytesObject *)x->ob_bstate = BSTATE_BYTE; + if (PyString_CheckExact(((PyStringObject *)x))) { + ((PyStringObject *)x)->ob_bstate = BSTATE_NOT_SURE; + } + if (PyBytes_CheckExact(((PyBytesObject *)x))) { + ((PyBytesObject *)x)->ob_bstate = BSTATE_BYTE; } return PyObject_Str(x); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cadc04b8766856..10264d42ffe230 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8859,7 +8859,7 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; if (x == NULL) return (PyObject *)_PyUnicode_New(0); - (PyUnicodeObject *)x->ob_bstate = BSTATE_UNICODE; + ((PyUnicodeObject *)x)->ob_bstate = BSTATE_UNICODE; if (encoding == NULL && errors == NULL) return PyObject_Unicode(x); else From bec5fe56c4662864ea363bf862c70e571c5c11ba Mon Sep 17 00:00:00 2001 From: Joannah Nanjekye Date: Tue, 10 Dec 2024 21:45:06 -0400 Subject: [PATCH 3/4] test low level capis for state --- Lib/test/test_py3kwarn.py | 7 +++++++ Modules/_testcapimodule.c | 27 +++++++++++++++++++++++++++ Objects/object.c | 3 +-- Objects/stringobject.c | 26 +++++++------------------- Objects/unicodeobject.c | 9 +++++++++ 5 files changed, 51 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_py3kwarn.py b/Lib/test/test_py3kwarn.py index 9ec40a0b64b697..1a14e8ab345a7b 100644 --- a/Lib/test/test_py3kwarn.py +++ b/Lib/test/test_py3kwarn.py @@ -217,6 +217,13 @@ def test_str_parsing(self): with check_py3k_warnings(): "{0}-{1}: {2}".decode() + def test_str_join(self): + with check_py3k_warnings(): + a = "x" + u"y" + a = b"x" + u"y" + a = u"x" + b"y" + a = u"x" + "y" + def test_string_parsing(self): with check_py3k_warnings(): b"{0}-{1}: {2}"._formatter_parser() diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index af93d00daae3c9..6ea47323236853 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -80,6 +80,33 @@ test_config(PyObject *self) return Py_None; } +static PyObject* +test_join_api(PyObject *self) +{ + PyObject *s = PyObject_Str(NULL); + PyObject *pieces = PyObject_Unicode(NULL); + + PyObject *result = _PyString_Join(s, pieces); + if (((PyUnicodeObject *)result)->ob_bstate != BSTATE_UNICODE) { + PyErr_SetString(TestError, + "test_join_api:string bstate update failed"); + Py_DECREF(result); + return (PyObject*)NULL; + } + + result = PyUnicode_Join(pieces, s); + if (((PyStringObject *)result)->ob_bstate != BSTATE_BYTE) { + PyErr_SetString(TestError, + "test_join_api:unicode bstate update failed"); + Py_DECREF(result); + return (PyObject*)NULL; + } + + Py_DECREF(result); + Py_INCREF(Py_None); + return Py_None; +} + static PyObject* test_list_api(PyObject *self) { diff --git a/Objects/object.c b/Objects/object.c index e9b3dd9acf1e84..e057f7e262fab1 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -497,8 +497,6 @@ PyObject_GetBState(PyObject *v) Py_DECREF(uni); return bbstate; } - // Alias, what do we do? - // Lets revisit this discussion we might have had a year back if (PyString_CheckExact(v)) { str = (PyStringObject *) v; if (str == NULL) @@ -603,6 +601,7 @@ PyObject_Unicode(PyObject *v) Py_DECREF(res); res = str; } + ((PyUnicodeObject *)res)->ob_bstate = BSTATE_UNICODE; return res; } #endif diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 780ab0609b1207..4a0920cea33ce6 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1695,25 +1695,13 @@ string_join(PyStringObject *self, PyObject *orig) } Py_DECREF(seq); - // Should have been set before - assert(PyObject_GetBState(self) == 0); - // A bit wierd, no?? - if (((PyStringObject *)item)->ob_bstate != BSTATE_NOT_SURE) { - if (PyBytes_CheckExact(item)) { - if (((PyBytesObject *)item)->ob_bstate == NULL) { - ((PyBytesObject *)item)->ob_bstate = BSTATE_BYTE; - } - self->ob_bstate = PyObject_GetBState(item); - ((PyBytesObject *)res)->ob_bstate = BSTATE_BYTE; - } - if (PyUnicode_Check(item)) { - if (((PyUnicodeObject *)item)->ob_bstate == NULL) { - ((PyUnicodeObject *)item)->ob_bstate = BSTATE_BYTE; - } - self->ob_bstate = PyObject_GetBState(item); - ((PyUnicodeObject *)res)->ob_bstate = BSTATE_BYTE; - } - + if (PyUnicode_Check(item)) { + ((PyUnicodeObject *)item)->ob_bstate = BSTATE_UNICODE; + self->ob_bstate = PyObject_GetBState(item); + ((PyUnicodeObject *)res)->ob_bstate = BSTATE_UNICODE; + + if (PyErr_WarnPy3k("joining a String and a Unicode is not supported in 3.x", 1) < 0) + return NULL; } return res; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 10264d42ffe230..cf3afd535ff737 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -374,6 +374,7 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) unicode->length = length; unicode->hash = -1; unicode->defenc = NULL; + unicode->ob_bstate = BSTATE_UNICODE; return unicode; onError: @@ -5739,6 +5740,13 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) i, Py_TYPE(item)->tp_name); goto onError; } + if (PyBytes_CheckExact(item)) { + ((PyBytesObject *)item)->ob_bstate = BSTATE_BYTE; + ((PyBytesObject *)sep)->ob_bstate = PyObject_GetBState(item); + + if (PyErr_WarnPy3k("joining Unicode and a Byte is not supported in 3.x", 1) < 0) + goto onError; + } item = PyUnicode_FromObject(item); if (item == NULL) goto onError; @@ -5791,6 +5799,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) Done: Py_XDECREF(internal_separator); Py_DECREF(fseq); + res->ob_bstate = BSTATE_BYTE; return (PyObject *)res; Overflow: From 1c23e5e4ce1509b8592540352445183eb1211223 Mon Sep 17 00:00:00 2001 From: Joannah Nanjekye Date: Tue, 10 Dec 2024 21:45:47 -0400 Subject: [PATCH 4/4] register the capi test --- Modules/_testcapimodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 6ea47323236853..7c2a8c77be32cb 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2717,6 +2717,7 @@ static PyMethodDef TestMethods[] = { {"raise_exception", raise_exception, METH_VARARGS}, {"set_errno", set_errno, METH_VARARGS}, {"test_config", (PyCFunction)test_config, METH_NOARGS}, + {"test_join_api", (PyCFunction)test_join_api, METH_NOARGS}, #if defined(Py_USING_UNICODE) && !defined(Py_BUILD_CORE) {"test_datetime_capi", test_datetime_capi, METH_NOARGS}, #endif