-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
Run the test test/test_extract_text.py::ExtractTextTest::test_extract_title_id_para_from_ipcc_syr
System: Windows
Output Report
ExtractTextTest.test_extract_title_id_para_from_ipcc_syr ___________________________________________________
self = <test.test_extract_text.ExtractTextTest testMethod=test_extract_title_id_para_from_ipcc_syr>
def test_extract_title_id_para_from_ipcc_syr(self):
"""
read a chapter from IPCC and extract paras with ids and their section titles
Purpose is to create CSV for input to LLM/RAG
reads IPCC SYR report, finds all divs with paragraphs and returns the title and
first para.
"""
chapter = "longer-report"
wg = "syr"
infile = Path(Resources.TEST_RESOURCES_DIR, "ipcc", "cleaned_content",
wg, chapter, "html_with_ids.html")
outdir = Path(Resources.TEMP_DIR, "csv", "ipcc")
outfile_name = "syr_paras.csv"
csvout = Path(outdir, outfile_name)
> MiscLib.create_and_write_csv(
infile,
outdir,
csvout,
div_with_p_with_ids_xpath=".//body//div[p[@id]]",
para_xpath=".//p",
title_xpath=".//h2/text()|.//h3/text()|.//h4/text()")
test\test_extract_text.py:236:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test\test_extract_text.py:399: in create_and_write_csv
cls._write_csv(csvout, divs_with_p_with_ids, para_xpath, title_xpath, wg, chap)
test\test_extract_text.py:375: in _write_csv
cls._add_ids_and_text_as_csv_row(csvwriter, div_title, para, wg, chap)
test\test_extract_text.py:359: in _add_ids_and_text_as_csv_row
csvwriter.writerow(row)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <encodings.cp1252.IncrementalEncoder object at 0x0000018C792CDC40>
input = 'None_None_3.1.1_p3,NO_TITLE,"Modelled pathways consistent with the continuation of policies implemented by the end of...te sensitivity or carbon cycle feedbacks are higher than the best estimate (high confidence). {WGIII SPM C.1.3}"\r\r\n'
final = False
def encode(self, input, final=False):
> return codecs.charmap_encode(input,self.errors,encoding_table)[0]
E UnicodeEncodeError: 'charmap' codec can't encode character '\u2265' in position 256: character maps to <undefined>
..\..\..\anaconda3\envs\amienv\Lib\encodings\cp1252.py:19: UnicodeEncodeError
=================================================================== short test summary info ====================================================================
FAILED test/test_extract_text.py::ExtractTextTest::test_extract_title_id_para_from_ipcc_syr - UnicodeEncodeError: 'charmap' codec can't encode character '\u2265' in position 256: character maps to <undefined>
================================================================= 1 failed in 79.1
Metadata
Metadata
Assignees
Labels
No labels