Merge pull request #1862 from marctorsoc/clean-links-in-md-text-filter

Add link cleaning in MD text filter
This commit is contained in:
Mark Backman
2025-05-21 09:20:27 -04:00
committed by GitHub
2 changed files with 15 additions and 0 deletions

View File

@@ -100,6 +100,9 @@ class MarkdownTextFilter(BaseTextFilter):
# Restore leading and trailing spaces
filtered_text = re.sub("§", " ", filtered_text)
## Make links more readable
filtered_text = re.sub(r"https?://", "", filtered_text)
return filtered_text
else:
return text

View File

@@ -137,6 +137,18 @@ class TestMarkdownTextFilter(unittest.IsolatedAsyncioTestCase):
result, expected, f"Newline handling failed for:\n{input_text}\nGot:\n{result}"
)
async def test_links_cleaning(self):
"""Test cleaning of links and URLs, i.e. https?:// is removed."""
test_cases = {
"Please check http://example.com": "Please check example.com",
"Visit https://www.google.com for more": "Visit www.google.com for more",
"No link here": "No link here", # No link to clean
}
for input_text, expected in test_cases.items():
result = await self.filter.filter(input_text)
self.assertEqual(result, expected, f"Link cleaning failed for: '{input_text}'")
async def test_numbered_list_marker_handling(self):
"""Test handling of numbered lists with the special §NUM§ marker."""
test_cases = {