From ca35299dcd03f6283e6b78bf8303c4462115e7a0 Mon Sep 17 00:00:00 2001
From: "marc.torsoc" <marc.torsoc@gmail.com>
Date: Wed, 21 May 2025 12:08:53 +0200
Subject: [PATCH] add link cleaning and a test for it

---
 src/pipecat/utils/text/markdown_text_filter.py |  3 +++
 tests/test_markdown_text_filter.py             | 12 ++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/src/pipecat/utils/text/markdown_text_filter.py b/src/pipecat/utils/text/markdown_text_filter.py
index 6f5e16bd0..5ec960ad2 100644
--- a/src/pipecat/utils/text/markdown_text_filter.py
+++ b/src/pipecat/utils/text/markdown_text_filter.py
@@ -100,6 +100,9 @@ class MarkdownTextFilter(BaseTextFilter):
             # Restore leading and trailing spaces
             filtered_text = re.sub("§", " ", filtered_text)
 
+            ## Make links more readable
+            filtered_text = re.sub(r"https?://", "", filtered_text)
+
             return filtered_text
         else:
             return text
diff --git a/tests/test_markdown_text_filter.py b/tests/test_markdown_text_filter.py
index a82a85811..d1cd79a4a 100644
--- a/tests/test_markdown_text_filter.py
+++ b/tests/test_markdown_text_filter.py
@@ -137,6 +137,18 @@ class TestMarkdownTextFilter(unittest.IsolatedAsyncioTestCase):
                 result, expected, f"Newline handling failed for:\n{input_text}\nGot:\n{result}"
             )
 
+    async def test_links_cleaning(self):
+        """Test cleaning of links and URLs, i.e. https?:// is removed."""
+        test_cases = {
+            "Please check http://example.com": "Please check example.com",
+            "Visit https://www.google.com for more": "Visit www.google.com for more",
+            "No link here": "No link here",  # No link to clean
+        }
+
+        for input_text, expected in test_cases.items():
+            result = await self.filter.filter(input_text)
+            self.assertEqual(result, expected, f"Link cleaning failed for: '{input_text}'")
+
     async def test_numbered_list_marker_handling(self):
         """Test handling of numbered lists with the special §NUM§ marker."""
         test_cases = {