From f54077c0d6e8d36c4e6562eb1fc3e9e8a7a6e943 Mon Sep 17 00:00:00 2001
From: Carbon225 <carbon225@protonmail.com>
Date: Sat, 30 Nov 2024 23:25:37 +0100
Subject: [PATCH 1/6] offset the disallowed sequence index by ignored
 characters

---
 ens_normalize/normalization.py | 19 +++++++++++++------
 tests/test_normalization.py    | 17 +++++++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/ens_normalize/normalization.py b/ens_normalize/normalization.py
index 6f95efb..fed1182 100644
--- a/ens_normalize/normalization.py
+++ b/ens_normalize/normalization.py
@@ -558,7 +558,10 @@ def post_check_empty(name: str, input: str) -> Optional[CurableSequence]:
         # fully ignorable name
         return CurableSequence(
             CurableSequenceType.EMPTY_LABEL,
-            index=0,
+            # We set the index to -1 to let offset_err_start()
+            # know that this is the special empty name case.
+            # Otherwise, it would offset the index past the ignored characters.
+            index=-1,
             sequence=input,
             suggested='',
         )
@@ -581,7 +584,7 @@ def post_check_empty(name: str, input: str) -> Optional[CurableSequence]:
         return CurableSequence(
             CurableSequenceType.EMPTY_LABEL,
             index=i,
-            sequence='..',
+            sequence='..',  # !!
             suggested='.',
         )
 
@@ -598,7 +601,7 @@ def post_check_underscore(label: str) -> Optional[CurableSequence]:
             return CurableSequence(
                 CurableSequenceType.UNDERSCORE,
                 index=i,
-                sequence='_' * cnt,
+                sequence='_' * cnt,  # !!
                 suggested='',
             )
 
@@ -608,7 +611,7 @@ def post_check_hyphen(label: str) -> Optional[CurableSequence]:
         return CurableSequence(
             CurableSequenceType.HYPHEN,
             index=2,
-            sequence='--',
+            sequence='--',  # !!
             suggested='',
         )
 
@@ -648,7 +651,7 @@ def make_fenced_error(cps: List[int], start: int, end: int) -> CurableSequence:
     return CurableSequence(
         type_,
         index=start,
-        sequence=''.join(map(chr, cps[start:end])),
+        sequence=''.join(map(chr, cps[start:end])),  # !!
         suggested=suggested,
     )
 
@@ -1097,12 +1100,16 @@ def offset_err_start(err: Optional[CurableSequence], tokens: List[Token]):
     Output of post_check() is not input aligned.
     This function offsets the error index (in-place) to match the input characters.
     """
+    if err.index < 0:
+        # empty name case
+        err.index = 0
+        return
     # index in string that was scanned
     i = 0
     # offset between input and scanned
     offset = 0
     for tok in tokens:
-        if i >= err.index:
+        if i > err.index:
             # everything before the error is aligned
             break
         if tok.type in (TY_IGNORED, TY_DISALLOWED):
diff --git a/tests/test_normalization.py b/tests/test_normalization.py
index 93950a2..37357c0 100644
--- a/tests/test_normalization.py
+++ b/tests/test_normalization.py
@@ -532,3 +532,20 @@ def test_simple_name_optimization():
     assert len(r.cures) == 0
     assert r.error is None
     assert r.normalizations is None
+
+
+@pytest.mark.parametrize(
+    'input_str, expected_code, expected_index, expected_sequence, expected_suggested',
+    [
+        ('nick.\ufe0f\ufe0f.eth', 'EMPTY_LABEL', 4, '.\ufe0f\ufe0f.', '.'),
+        ('01\ufe0f--345', 'HYPHEN', 3, '--', ''),
+        ('01-\ufe0f-345', 'HYPHEN', 2, '-\ufe0f-', ''),
+        ("\ufe0f'b", 'FENCED_LEADING', 1, '’', ''),
+    ],
+)
+def test_suggestions_with_ignored(input_str, expected_code, expected_index, expected_sequence, expected_suggested):
+    e = ens_process(input_str).error
+    assert e.code == expected_code
+    assert e.index == expected_index
+    assert e.sequence == expected_sequence
+    assert e.suggested == expected_suggested

From bd9d4d283c52a99f0ea69ea6d20fefc099249f1f Mon Sep 17 00:00:00 2001
From: Carbon225 <carbon225@protonmail.com>
Date: Sat, 30 Nov 2024 23:57:32 +0100
Subject: [PATCH 2/6] insert ignored chars into disallowed sequence

---
 ens_normalize/normalization.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/ens_normalize/normalization.py b/ens_normalize/normalization.py
index fed1182..ac28475 100644
--- a/ens_normalize/normalization.py
+++ b/ens_normalize/normalization.py
@@ -1060,7 +1060,7 @@ def ens_process(
         label_is_greek = []
         error = post_check(emojis_as_fe0f, label_is_greek, input)
         if isinstance(error, CurableSequence):  # or NormalizableSequence because of inheritance
-            offset_err_start(error, tokens)
+            offset_err_start(error, tokens, input)
 
     # else:
     # only the result of post_check() is not input aligned
@@ -1095,7 +1095,20 @@ def ens_process(
     )
 
 
-def offset_err_start(err: Optional[CurableSequence], tokens: List[Token]):
+def restore_ignored_in_sequence(seq: str, input: str) -> str:
+    seq_out = []
+    input_i = 0
+    for c in seq:
+        # TODO: needs to handle mapped characters
+        while input[input_i] != c:
+            seq_out.append(input[input_i])
+            input_i += 1
+        seq_out.append(c)
+        input_i += 1
+    return ''.join(seq_out)
+
+
+def offset_err_start(err: Optional[CurableSequence], tokens: List[Token], input: str):
     """
     Output of post_check() is not input aligned.
     This function offsets the error index (in-place) to match the input characters.
@@ -1134,6 +1147,7 @@ def offset_err_start(err: Optional[CurableSequence], tokens: List[Token]):
             # input: cps, scanned: cps
             i += len(tok.cps)
     err.index += offset
+    err.sequence = restore_ignored_in_sequence(err.sequence, input[err.index :])
 
 
 def ens_normalize(text: str) -> str:

From b8437739437bf06a86ff8b3a4dbd4c229e1493d1 Mon Sep 17 00:00:00 2001
From: Carbon225 <carbon225@protonmail.com>
Date: Tue, 31 Dec 2024 22:21:21 +0100
Subject: [PATCH 3/6] Handle mapped chars in ignored suggestions

---
 ens_normalize/normalization.py | 40 +++++++++++++++++++++++++++++-----
 tests/test_normalization.py    | 26 +++++++++++++++++++++-
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/ens_normalize/normalization.py b/ens_normalize/normalization.py
index ac28475..65d9435 100644
--- a/ens_normalize/normalization.py
+++ b/ens_normalize/normalization.py
@@ -1096,15 +1096,45 @@ def ens_process(
 
 
 def restore_ignored_in_sequence(seq: str, input: str) -> str:
+    """
+    Restore any ignored characters from the input string into the sequence.
+
+    Args:
+        seq: The sequence to restore ignored characters into
+        input: The input string that may contain ignored characters
+
+    Returns:
+        The sequence with ignored characters restored
+    """
+    if not seq:
+        return seq
+
     seq_out = []
     input_i = 0
-    for c in seq:
-        # TODO: needs to handle mapped characters
-        while input[input_i] != c:
+    seq_len = len(seq)
+    matched = 0
+
+    # Keep going until we've matched all characters in seq
+    while matched < seq_len and input_i < len(input):
+        # For mapped characters, we need to check if the current input char
+        # maps to our target sequence char
+        input_cp = ord(input[input_i])
+        mapped_cps = NORMALIZATION.mapped.get(input_cp, [input_cp])
+        target_cp = ord(seq[matched])
+
+        if input_cp == target_cp or target_cp in mapped_cps:
+            seq_out.append(input[input_i])
+            matched += 1
+        elif matched > 0:
+            # If we've started matching but hit a non-match,
+            # include ignored characters between matches
             seq_out.append(input[input_i])
-            input_i += 1
-        seq_out.append(c)
         input_i += 1
+
+    # If we didn't match everything, use the original sequence
+    if matched < seq_len:
+        return seq
+
     return ''.join(seq_out)
 
 
diff --git a/tests/test_normalization.py b/tests/test_normalization.py
index 37357c0..19b0c09 100644
--- a/tests/test_normalization.py
+++ b/tests/test_normalization.py
@@ -540,7 +540,7 @@ def test_simple_name_optimization():
         ('nick.\ufe0f\ufe0f.eth', 'EMPTY_LABEL', 4, '.\ufe0f\ufe0f.', '.'),
         ('01\ufe0f--345', 'HYPHEN', 3, '--', ''),
         ('01-\ufe0f-345', 'HYPHEN', 2, '-\ufe0f-', ''),
-        ("\ufe0f'b", 'FENCED_LEADING', 1, '’', ''),
+        ("\ufe0f'b", 'FENCED_LEADING', 1, "'", ''),
     ],
 )
 def test_suggestions_with_ignored(input_str, expected_code, expected_index, expected_sequence, expected_suggested):
@@ -549,3 +549,27 @@ def test_suggestions_with_ignored(input_str, expected_code, expected_index, expe
     assert e.index == expected_index
     assert e.sequence == expected_sequence
     assert e.suggested == expected_suggested
+
+
+@pytest.mark.parametrize(
+    'input_str, expected_type, expected_index, expected_sequence, expected_suggested',
+    [
+        # Test mapped characters with ignored characters
+        ('aA\ufe0fA', NormalizableSequenceType.MAPPED, 1, 'A', 'a'),  # Single capital A gets mapped
+        ('aAB', NormalizableSequenceType.MAPPED, 1, 'A', 'a'),  # First capital gets mapped
+        # Test FE0F normalization
+        ('a🚴‍♂️', NormalizableSequenceType.FE0F, 1, '🚴‍♂️', '🚴‍♂'),  # FE0F in emoji
+        # Test ignored characters
+        ('a\u00ad', NormalizableSequenceType.IGNORED, 1, '\u00ad', ''),  # Soft hyphen is ignored
+        # Test FE0F as ignored
+        ('a\ufe0f', NormalizableSequenceType.IGNORED, 1, '\ufe0f', ''),  # FE0F by itself is ignored
+    ],
+)
+def test_normalizations_with_ignored(input_str, expected_type, expected_index, expected_sequence, expected_suggested):
+    normalizations = ens_normalizations(input_str)
+    assert len(normalizations) > 0
+    e = normalizations[0]  # Get first normalization
+    assert e.type == expected_type
+    assert e.index == expected_index
+    assert e.sequence == expected_sequence
+    assert e.suggested == expected_suggested

From e11a77347f815802b0da16c4d7de5c93b35107b2 Mon Sep 17 00:00:00 2001
From: "kwrobel.eth" <djstrong@gmail.com>
Date: Thu, 23 Jan 2025 11:27:35 +0100
Subject: [PATCH 4/6] fix-worflow

Signed-off-by: kwrobel.eth <djstrong@gmail.com>
---
 .github/workflows/test.yml | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e7a07be..30deed2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,10 +18,16 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
         python-version: [3.8, 3.11]
-
+    
     steps:
-    - uses: actions/checkout@v3
-
+    - if: ${{ github.event_name == 'pull_request' }}
+      uses: actions/checkout@v4
+      with:
+          ref: ${{ github.event.pull_request.head.ref }}
+          
+    - if: ${{ github.event_name != 'pull_request' }}
+      uses: actions/checkout@v4
+          
     - name: Install poetry
       run: pipx install poetry
 

From 04d7286ef45ed57b90275fa4b2a3aa78104f5fad Mon Sep 17 00:00:00 2001
From: github-actions <actions@github.com>
Date: Thu, 23 Jan 2025 10:32:56 +0000
Subject: [PATCH 5/6] Update coverage badge

---
 coverage_badge.svg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/coverage_badge.svg b/coverage_badge.svg
index e5db27c..6bfc8fa 100644
--- a/coverage_badge.svg
+++ b/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">100%</text>
-        <text x="80" y="14">100%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">99%</text>
+        <text x="80" y="14">99%</text>
     </g>
 </svg>

From 902937c538de90ed983c07b3ae89e214de62e293 Mon Sep 17 00:00:00 2001
From: "kwrobel.eth" <djstrong@gmail.com>
Date: Thu, 23 Jan 2025 11:37:26 +0100
Subject: [PATCH 6/6] Update test.yml

Signed-off-by: kwrobel.eth <djstrong@gmail.com>
---
 .github/workflows/test.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 30deed2..2fe66d0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,7 +18,10 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
         python-version: [3.8, 3.11]
-    
+        
+    permissions:
+      contents: write
+      
     steps:
     - if: ${{ github.event_name == 'pull_request' }}
       uses: actions/checkout@v4