Skip to content

Commit 9f3b23e

Browse files
authored
Adjust embeds to be placed directly before empty end-of-verse paragraph markers (#182)
1 parent 0354cc7 commit 9f3b23e

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

machine/corpora/place_markers_usfm_update_block_handler.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def process_block(self, block: UsfmUpdateBlock) -> UsfmUpdateBlock:
7373
to_place = []
7474
adj_src_toks = []
7575
placed_elements = [elements.pop(0)] if elements[0].type == UsfmUpdateBlockElementType.OTHER else []
76+
embed_elements = []
7677
ignored_elements = []
7778
for element in elements:
7879
if element.type == UsfmUpdateBlockElementType.TEXT:
@@ -90,8 +91,10 @@ def process_block(self, block: UsfmUpdateBlock) -> UsfmUpdateBlock:
9091
else:
9192
trg_sent += element.tokens[0].to_usfm()
9293

93-
if element.marked_for_removal or element.type == UsfmUpdateBlockElementType.EMBED:
94+
if element.marked_for_removal:
9495
ignored_elements.append(element)
96+
elif element.type == UsfmUpdateBlockElementType.EMBED:
97+
embed_elements.append(element)
9598
elif element.type in [UsfmUpdateBlockElementType.PARAGRAPH, UsfmUpdateBlockElementType.STYLE]:
9699
to_place.append(element)
97100
adj_src_toks.append(src_tok_idx)
@@ -110,7 +113,7 @@ def process_block(self, block: UsfmUpdateBlock) -> UsfmUpdateBlock:
110113

111114
to_insert.append((trg_str_idx, element))
112115
to_insert.sort(key=lambda x: x[0])
113-
to_insert += [(len(trg_sent), element) for element in end_elements]
116+
to_insert += [(len(trg_sent), element) for element in embed_elements + end_elements]
114117

115118
# Construct new text tokens to put between markers
116119
# and reincorporate headers and empty end-of-verse paragraph markers

tests/corpora/test_place_markers_usfm_update_block_handler.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,10 @@ def test_trailing_empty_paragraphs() -> None:
161161
rows = [(scr_ref("MAT 1:1"), "New verse 1")]
162162
usfm = r"""\id MAT
163163
\c 1
164-
\v 1 Verse 1
164+
\v 1 \f embed 1 \f*Verse 1
165165
\p
166166
\b
167-
\q1 \f embed \f*
167+
\q1 \f embed 2 \f*
168168
"""
169169

170170
align_info = [
@@ -183,10 +183,10 @@ def test_trailing_empty_paragraphs() -> None:
183183
)
184184
result = r"""\id MAT
185185
\c 1
186-
\v 1 New verse 1
186+
\v 1 New verse 1 \f embed 1 \f*\f embed 2 \f*
187187
\p
188188
\b
189-
\q1 \f embed \f*
189+
\q1
190190
"""
191191
assess(target, result)
192192

@@ -419,6 +419,35 @@ def test_split_tokens() -> None:
419419
assess(target, result)
420420

421421

422+
def test_no_text() -> None:
423+
rows = [(scr_ref("MAT 1:1"), "")]
424+
usfm = r"""\id MAT
425+
\c 1
426+
\v 1 \w \w*
427+
"""
428+
429+
align_info = [
430+
PlaceMarkersAlignmentInfo(
431+
refs=["MAT 1:1"],
432+
source_tokens=[],
433+
translation_tokens=[],
434+
alignment=to_word_alignment_matrix(""),
435+
),
436+
]
437+
target = update_usfm(
438+
rows,
439+
usfm,
440+
paragraph_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
441+
style_behavior=UpdateUsfmMarkerBehavior.PRESERVE,
442+
update_block_handlers=[PlaceMarkersUsfmUpdateBlockHandler(align_info)],
443+
)
444+
result = r"""\id MAT
445+
\c 1
446+
\v 1 \w \w*
447+
"""
448+
assess(target, result)
449+
450+
422451
def scr_ref(*refs: str) -> List[ScriptureRef]:
423452
return [ScriptureRef.parse(ref) for ref in refs]
424453

0 commit comments

Comments
 (0)