Skip to content

Commit 3d1ad10

Browse files
authored
fix html test (#8127)
1 parent bc153c2 commit 3d1ad10

File tree

1 file changed

+11
-8
lines changed

1 file changed

+11
-8
lines changed

test/components/converters/test_html_to_document.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pathlib import Path
66

77
import pytest
8+
from unittest.mock import patch
89

910
from haystack.components.converters import HTMLToDocument
1011
from haystack.dataclasses import ByteStream
@@ -161,21 +162,23 @@ def test_serde(self):
161162
assert new_converter.extraction_kwargs == converter.extraction_kwargs
162163

163164
def test_run_difficult_html(self, test_files_path):
164-
# boilerpy3's DefaultExtractor fails to extract text from this HTML file
165-
166165
converter = HTMLToDocument()
167166
result = converter.run(sources=[Path(test_files_path / "html" / "paul_graham_superlinear.html")])
168167

169168
assert len(result["documents"]) == 1
170169
assert "Superlinear" in result["documents"][0].content
171170

172-
def test_run_with_extraction_kwargs(self, test_files_path):
171+
@patch("haystack.components.converters.html.extract")
172+
def test_run_with_extraction_kwargs(self, mock_extract, test_files_path):
173173
sources = [test_files_path / "html" / "what_is_haystack.html"]
174174

175175
converter = HTMLToDocument()
176-
precise_converter = HTMLToDocument(extraction_kwargs={"favor_precision": True})
176+
converter.run(sources=sources)
177+
assert mock_extract.call_count == 1
178+
assert "favor_precision" not in mock_extract.call_args[1]
177179

178-
doc = converter.run(sources=sources)["documents"][0]
179-
precise_doc = precise_converter.run(sources=sources)["documents"][0]
180-
181-
assert len(doc.content) > len(precise_doc.content)
180+
precise_converter = HTMLToDocument(extraction_kwargs={"favor_precision": True})
181+
mock_extract.reset_mock()
182+
precise_converter.run(sources=sources)
183+
assert mock_extract.call_count == 1
184+
assert mock_extract.call_args[1]["favor_precision"] is True

0 commit comments

Comments
 (0)