softdevteam · nanjekyejoannah · May 1, 2024 · May 1, 2024 · May 1, 2024 · ltratt
diff --git a/Include/object.h b/Include/object.h
@@ -105,6 +105,7 @@ whose size is determined when the object is allocated.
  */
 typedef struct _object {
     PyObject_HEAD
+    Py_ssize_t ob_bstate;
 } PyObject;
 
 typedef struct {

diff --git a/Lib/test/test_StringIO.py b/Lib/test/test_StringIO.py
@@ -183,6 +183,10 @@ def test_py3k_string_cmp(self):
         with test_support.check_py3k_warnings(*deprecations):
             "test str" == u"test unicode"
 
+    def test_py3k_join(self):
+        with test_support.check_py3k_warnings(*deprecations):
+            "test str" + u"test unicode"
+
 class TestcStringIO(TestGenericStringIO):
     MODULE = cStringIO
 

diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
@@ -1020,6 +1020,11 @@ def test_codecs_idna(self):
         # Test whether trailing dot is preserved
         self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
 
+    def test_3k_join(self):
+        with test_support.check_py3k_warnings():
+            u"test unicode" + "test str"
+
+
     def test_codecs_errors(self):
         # Error handling (encoding)
         self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')

diff --git a/Objects/stringobject.c b/Objects/stringobject.c
@@ -1646,6 +1646,15 @@ string_join(PyStringObject *self, PyObject *orig)
                  * original sequence can be iterated over
                  * again, so we must pass seq here.
                  */
+                if (!PyString_Check(item) && !item->ob_bstate == NULL) {
+                    if (PyErr_WarnPy3k("Concatenation only works for bytes to bytes in 3.x: convert the string to bytes.", 1) < 0) {
+                        return NULL;
+                    }
+                }
+
+                if (!PyString_Check(orig) && item->ob_bstate == NULL) {
+                    item->ob_bstate = BSTATE_BYTE;
+                }
                 PyObject *result;
                 result = PyUnicode_Join((PyObject *)self, seq);
                 Py_DECREF(seq);
@@ -1700,6 +1709,7 @@ _PyString_Join(PyObject *sep, PyObject *x)
 {
     assert(sep != NULL && PyString_Check(sep));
     assert(x != NULL);
+
     return string_join((PyStringObject *)sep, x);
 }
 
@@ -3719,16 +3729,20 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
 static PyObject *
 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
+
     PyObject *x = NULL;
     static char *kwlist[] = {"object", 0};
+    PyStringObject *tmp = NULL;
 
     if (type != &PyString_Type)
         return str_subtype_new(type, args, kwds);
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
         return NULL;
     if (x == NULL)
         return PyString_FromString("");
-    return PyObject_Str(x);
+    tmp = PyObject_Str(x);
+    tmp->ob_bstate = BSTATE_BYTE;
+    return tmp;
 }
 
 static PyObject *

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -5673,6 +5673,16 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
     PyObject *item;
     Py_ssize_t i;
 
+    if (!PyUnicode_Check(seq) && !seq->ob_bstate == NULL) {
+        if (PyErr_WarnPy3k("Concatenation only works for unicode to unicode in 3.x: convert the string to unicode.", 1) < 0) {
+            return NULL;
+        }
+    }
+
+    if (!PyUnicode_Check(seq) && seq->ob_bstate == NULL) {
+        seq->ob_bstate = BSTATE_UNICODE;
+    }
+
     fseq = PySequence_Fast(seq, "can only join an iterable");
     if (fseq == NULL) {
         return NULL;
@@ -5694,7 +5704,16 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
     /* If singleton sequence with an exact Unicode, return that. */
     if (seqlen == 1) {
         item = PySequence_Fast_GET_ITEM(fseq, 0);
-        if (PyUnicode_CheckExact(item)) {
+        if (!PyUnicode_CheckExact(item) && !item->ob_bstate == NULL) {
+            if (PyErr_WarnPy3k("Concatenation only works for unicode to unicode in 3.x: convert the string to unicode.", 1) < 0) {
+                return NULL;
+            }
+        }
+
+        if (!PyUnicode_CheckExact(item) && item->ob_bstate == NULL) {
+            item->ob_bstate = BSTATE_UNICODE;
+        }
+         if (PyUnicode_CheckExact(item)) {
             Py_INCREF(item);
             res = (PyUnicodeObject *)item;
             goto Done;
@@ -8851,6 +8870,7 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     static char *kwlist[] = {"string", "encoding", "errors", 0};
     char *encoding = NULL;
     char *errors = NULL;
+    PyUnicodeObject *tmp = NULL;
 
     if (type != &PyUnicode_Type)
         return unicode_subtype_new(type, args, kwds);
@@ -8859,10 +8879,16 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         return NULL;
     if (x == NULL)
         return (PyObject *)_PyUnicode_New(0);
-    if (encoding == NULL && errors == NULL)
-        return PyObject_Unicode(x);
-    else
-        return PyUnicode_FromEncodedObject(x, encoding, errors);
+    if (encoding == NULL && errors == NULL) {
+        tmp = PyObject_Unicode(x);
+        tmp->ob_bstate = BSTATE_UNICODE;
+        return tmp;
+    }
+    else {
+        tmp = PyUnicode_FromEncodedObject(x, encoding, errors);
+        tmp->ob_bstate = BSTATE_UNICODE;
+        return tmp;
+    }
 }
 
 static PyObject *