Skip to content

Commit 1e9a564

Browse files
committed
fix(proxy-rotation): removed duplicated arg and passed the loader_kwarhs correctly to the node
1 parent b54d984 commit 1e9a564

File tree

6 files changed

+35
-2
lines changed

6 files changed

+35
-2
lines changed

examples/openai/proxy.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from scrapegraphai.utils import search_proxy_servers
2+
3+
proxies = search_proxy_servers(
4+
anonymous=True,
5+
countryset={"IT"},
6+
# secure=True,
7+
timeout=1.0,
8+
max_shape=2
9+
)
10+
11+
print(proxies)

examples/openai/smart_scraper_openai.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,19 @@
2222
"model": "gpt-3.5-turbo",
2323
},
2424
"verbose": True,
25+
"headless": False,
26+
"loader_kwargs": {
27+
"proxy" : {
28+
"server": "broker",
29+
"criteria": {
30+
"anonymous": True,
31+
# "secure": True,
32+
"countryset": {"IT"},
33+
"timeout": 5.0,
34+
"max_shape": 2
35+
},
36+
},
37+
}
2538
}
2639

2740
# ************************************************

scrapegraphai/graphs/abstract_graph.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,11 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
5858
"verbose", False)
5959
self.headless = True if config is None else config.get(
6060
"headless", True)
61+
self.loader_kwargs = config.get("loader_kwargs", {})
62+
6163
common_params = {"headless": self.headless,
6264
"verbose": self.verbose,
65+
"loader_kwargs": self.loader_kwargs,
6366
"llm_model": self.llm_model,
6467
"embedder_model": self.embedder_model}
6568
self.set_common_params(common_params, overwrite=False)

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ def _create_graph(self) -> BaseGraph:
5757
"""
5858
fetch_node = FetchNode(
5959
input="url | local_dir",
60-
output=["doc"]
60+
output=["doc"],
61+
node_config={
62+
"loader_kwargs": self.config.get("loader_kwargs", {}),
63+
}
6164
)
6265
parse_node = ParseNode(
6366
input="doc",

scrapegraphai/nodes/fetch_node.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ def __init__(
4949
self.verbose = (
5050
False if node_config is None else node_config.get("verbose", False)
5151
)
52+
self.loader_kwargs = (
53+
{} if node_config is None else node_config.get("loader_kwargs", {})
54+
)
5255

5356
def execute(self, state):
5457
"""

scrapegraphai/utils/proxy_rotation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def _search_proxy(proxy: Proxy) -> ProxySettings:
161161
Returns:
162162
A 'playwright' compliant proxy configuration.
163163
"""
164-
server = search_proxy_servers(max_shape=1, **proxy.get("criteria", {}))[0]
164+
server = search_proxy_servers(**proxy.get("criteria", {}))[0]
165165

166166
return {"server": server}
167167

0 commit comments

Comments
 (0)