|
1 | | -import os |
2 | | -import pytest |
3 | | -from unittest.mock import patch, MagicMock |
4 | 1 | from scrapegraphai.nodes import FetchNode |
| 2 | +from langchain_core.documents import Document |
5 | 3 |
|
6 | | -def get_file_path(file_name): |
7 | | - """ |
8 | | - Helper function to get the absolute file path. |
9 | | - """ |
10 | | - curr_dir = os.path.dirname(os.path.realpath(__file__)) |
11 | | - file_path = os.path.join(curr_dir, file_name) |
12 | | - return file_path |
13 | 4 |
|
14 | | -@patch('scrapegraphai.nodes.FetchNode.execute') |
15 | | -def test_fetch_node_html(mock_execute): |
16 | | - """ |
17 | | - Test FetchNode with HTML input. |
| 5 | +def test_fetch_html(mocker): |
| 6 | + title = "ScrapeGraph AI" |
| 7 | + link_url = "https://github.com/VinciGit00/Scrapegraph-ai" |
| 8 | + img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png" |
| 9 | + content = f""" |
| 10 | + <html> |
| 11 | + <head> |
| 12 | + <title>{title}</title> |
| 13 | + </head> |
| 14 | + <body> |
| 15 | + <a href="{link_url}">ScrapeGraphAI: You Only Scrape Once</a> |
| 16 | + <img src="{img_url}" alt="Scrapegraph-ai Logo"> |
| 17 | + </body> |
| 18 | + </html> |
18 | 19 | """ |
19 | | - mock_execute.return_value = MagicMock() |
20 | | - fetch_node = FetchNode( |
| 20 | + mock_loader_cls = mocker.patch("scrapegraphai.nodes.fetch_node.ChromiumLoader") |
| 21 | + mock_loader = mock_loader_cls.return_value |
| 22 | + mock_loader.load.return_value = [Document(page_content=content)] |
| 23 | + node = FetchNode( |
21 | 24 | input="url | local_dir", |
22 | | - output=["doc"], |
23 | | - node_config={ |
24 | | - "headless": False |
25 | | - } |
| 25 | + output=["doc", "links", "images"], |
| 26 | + node_config={"headless": False}, |
26 | 27 | ) |
27 | | - state = { |
28 | | - "url": "https://twitter.com/home" |
29 | | - } |
30 | | - result = fetch_node.execute(state) |
31 | | - assert result is not None |
32 | | - mock_execute.assert_called_once_with(state) |
| 28 | + result = node.execute({"url": "https://scrapegraph-ai.com/example"}) |
33 | 29 |
|
34 | | -@patch('scrapegraphai.nodes.FetchNode.execute') |
35 | | -def test_fetch_node_json(mock_execute): |
36 | | - """ |
37 | | - Test FetchNode with JSON input. |
38 | | - """ |
39 | | - mock_execute.return_value = MagicMock() |
40 | | - file_path_json = get_file_path("inputs/example.json") |
41 | | - state_json = { |
42 | | - "json": file_path_json |
43 | | - } |
44 | | - fetch_node_json = FetchNode( |
| 30 | + mock_loader.load.assert_called_once() |
| 31 | + doc = result["doc"][0] |
| 32 | + assert title in doc.page_content |
| 33 | + assert link_url in result["links"] |
| 34 | + assert img_url in result["images"] |
| 35 | + |
| 36 | + |
| 37 | +def test_fetch_json(): |
| 38 | + node = FetchNode( |
45 | 39 | input="json", |
46 | 40 | output=["doc"], |
47 | 41 | ) |
48 | | - result_json = fetch_node_json.execute(state_json) |
49 | | - assert result_json is not None |
50 | | - mock_execute.assert_called_once_with(state_json) |
| 42 | + result = node.execute({"json": "tests/nodes/inputs/example.json"}) |
| 43 | + assert result is not None |
51 | 44 |
|
52 | | -@patch('scrapegraphai.nodes.FetchNode.execute') |
53 | | -def test_fetch_node_xml(mock_execute): |
54 | | - """ |
55 | | - Test FetchNode with XML input. |
56 | | - """ |
57 | | - mock_execute.return_value = MagicMock() |
58 | | - file_path_xml = get_file_path("inputs/books.xml") |
59 | | - state_xml = { |
60 | | - "xml": file_path_xml |
61 | | - } |
62 | | - fetch_node_xml = FetchNode( |
| 45 | + |
| 46 | +def test_fetch_xml(): |
| 47 | + node = FetchNode( |
63 | 48 | input="xml", |
64 | 49 | output=["doc"], |
65 | 50 | ) |
66 | | - result_xml = fetch_node_xml.execute(state_xml) |
67 | | - assert result_xml is not None |
68 | | - mock_execute.assert_called_once_with(state_xml) |
| 51 | + result = node.execute({"xml": "tests/nodes/inputs/books.xml"}) |
| 52 | + assert result is not None |
69 | 53 |
|
70 | | -@patch('scrapegraphai.nodes.FetchNode.execute') |
71 | | -def test_fetch_node_csv(mock_execute): |
72 | | - """ |
73 | | - Test FetchNode with CSV input. |
74 | | - """ |
75 | | - mock_execute.return_value = MagicMock() |
76 | | - file_path_csv = get_file_path("inputs/username.csv") |
77 | | - state_csv = { |
78 | | - "csv": file_path_csv |
79 | | - } |
80 | | - fetch_node_csv = FetchNode( |
| 54 | + |
| 55 | +def test_fetch_csv(): |
| 56 | + node = FetchNode( |
81 | 57 | input="csv", |
82 | 58 | output=["doc"], |
83 | 59 | ) |
84 | | - result_csv = fetch_node_csv.execute(state_csv) |
85 | | - assert result_csv is not None |
86 | | - mock_execute.assert_called_once_with(state_csv) |
| 60 | + result = node.execute({"csv": "tests/nodes/inputs/username.csv"}) |
| 61 | + assert result is not None |
87 | 62 |
|
88 | | -@patch('scrapegraphai.nodes.FetchNode.execute') |
89 | | -def test_fetch_node_txt(mock_execute): |
90 | | - """ |
91 | | - Test FetchNode with TXT input. |
92 | | - """ |
93 | | - mock_execute.return_value = MagicMock() |
94 | | - file_path_txt = get_file_path("inputs/plain_html_example.txt") |
95 | | - state_txt = { |
96 | | - "txt": file_path_txt |
97 | | - } |
98 | | - fetch_node_txt = FetchNode( |
| 63 | + |
| 64 | +def test_fetch_txt(): |
| 65 | + node = FetchNode( |
99 | 66 | input="txt", |
100 | | - output=["doc"], |
| 67 | + output=["doc", "links", "images"], |
101 | 68 | ) |
102 | | - result_txt = fetch_node_txt.execute(state_txt) |
103 | | - assert result_txt is not None |
104 | | - mock_execute.assert_called_once_with(state_txt) |
| 69 | + with open("tests/nodes/inputs/plain_html_example.txt") as f: |
| 70 | + result = node.execute({"txt": f.read()}) |
| 71 | + assert result is not None |
0 commit comments