Initial commit

2025-10-05 13:55:28 +02:00
commit 68d521677a
767 changed files with 46947 additions and 0 deletions
@@ -0,0 +1,57 @@
+from datetime import datetime, timezone
+
+import pytest
+
+from basango.domain import DateRange
+
+
+def ts(y: int, m: int, d: int, hh: int = 0, mm: int = 0, ss: int = 0) -> int:
+    return int(datetime(y, m, d, hh, mm, ss, tzinfo=timezone.utc).timestamp())
+
+
+def test_from_parses_two_dates_with_default_format() -> None:
+    dr = DateRange.create("2024-10-01:2024-10-08")
+    assert dr.start == ts(2024, 10, 1)
+    assert dr.end == ts(2024, 10, 8)
+
+
+def test_str_and_format_roundtrip() -> None:
+    dr = DateRange.create("2024-10-01:2024-10-02")
+    assert str(dr) == f"{ts(2024, 10, 1)}:{ts(2024, 10, 2)}"
+    assert dr.format("%Y-%m-%d") == "2024-10-01:2024-10-02"
+
+
+def test_in_range_out_range_inclusive_boundaries() -> None:
+    dr = DateRange.create("2024-10-01:2024-10-02")
+    start = ts(2024, 10, 1)
+    end = ts(2024, 10, 2)
+    before = start - 1
+    after = end + 1
+    midday_end = ts(2024, 10, 2, 12, 0, 0)
+
+    assert dr.in_range(start) is True
+    assert dr.in_range(end) is True
+    assert dr.out_range(before) is True
+    # End is at 00:00 of end day; times later that day are outside
+    assert dr.out_range(midday_end) is True
+    assert dr.out_range(after) is True
+
+
+def test_backward_uses_days_and_next_day_end() -> None:
+    base = datetime(2024, 10, 31, tzinfo=timezone.utc)
+    dr = DateRange.backward(date=base, days=10)
+    assert dr.start == ts(2024, 10, 21)
+    assert dr.end == ts(2024, 11, 1)
+
+
+def test_from_raises_on_invalid_separator_or_spec() -> None:
+    with pytest.raises(AssertionError):
+        DateRange.create("2024-10-01:2024-10-08", separator="")
+    with pytest.raises(AssertionError):
+        DateRange.create("2024-10-01", separator=":")
+
+
+def test_from_accepts_python_format_string() -> None:
+    dr = DateRange.create("2024/10/01|2024/10/02", fmt="%Y/%m/%d", separator="|")
+    assert dr.start == ts(2024, 10, 1)
+    assert dr.end == ts(2024, 10, 2)
@@ -0,0 +1,19 @@
+import pytest
+
+from basango.domain import PageRange
+
+
+def test_it_should_create_page_range():
+    pr = PageRange.create("1:10")
+    assert pr.start == 1
+    assert pr.end == 10
+
+
+def test_end_page_should_be_greater_than_start_page():
+    with pytest.raises(AssertionError):
+        PageRange.create("10:1")
+
+
+def test_non_negative_pages():
+    with pytest.raises(AssertionError):
+        PageRange.create("-1:-10")
@@ -0,0 +1,292 @@
+from unittest.mock import patch
+
+import pytest
+from bs4 import BeautifulSoup
+from pydantic import HttpUrl
+
+from basango.core.config import WordPressSourceConfig
+from basango.core.config.fetch_config import CrawlerConfig, ClientConfig
+from basango.core.config.source_config import HtmlSourceConfig, SourceSelectors
+from basango.domain import SourceKind, PageRange
+from basango.services.crawler.html_crawler import HtmlCrawler
+
+
+class TestHtmlCrawler:
+    """Test suite for HtmlCrawler."""
+
+    @pytest.fixture
+    def mock_client_config(self):
+        return ClientConfig()
+
+    @pytest.fixture
+    def mock_html_source_config(self):
+        return HtmlSourceConfig(
+            source_id="test_source",
+            source_url=HttpUrl("https://example.com"),
+            pagination_template="news",
+            source_selectors=SourceSelectors(pagination="ul.pagination > li a"),
+            supports_categories=True,
+        )
+
+    @pytest.fixture
+    def mock_crawler_config(self, mock_html_source_config):
+        return CrawlerConfig(source=mock_html_source_config, category="tech")
+
+    @pytest.fixture
+    def html_crawler(self, mock_crawler_config, mock_client_config):
+        return HtmlCrawler(mock_crawler_config, mock_client_config)
+
+    def test_with_valid_html_source(self, html_crawler):
+        """Test __init__ with valid HTML source config."""
+        assert html_crawler.source.source_kind == SourceKind.HTML
+        assert isinstance(html_crawler.source, HtmlSourceConfig)
+
+    def test_with_invalid_source_kind_raises_error(self, mock_client_config):
+        """Test __init__ raises ValueError when source kind is not HTML."""
+        wordpress_source = WordPressSourceConfig(
+            source_id="test_wordpress",
+            source_url=HttpUrl("https://example.com"),
+        )
+        config = CrawlerConfig(source=wordpress_source)
+
+        with pytest.raises(
+            ValueError, match="HtmlCrawler requires a source of kind HTML"
+        ):
+            HtmlCrawler(config, mock_client_config)
+
+    def test_with_no_source_raises_error(self, mock_client_config):
+        """Test __init__ raises ValueError when no source is provided."""
+        config = CrawlerConfig(source=None)
+
+        with pytest.raises(
+            ValueError, match="HtmlCrawler requires a source of kind HTML"
+        ):
+            HtmlCrawler(config, mock_client_config)
+
+    def test_get_pagination_returns_valid_page_range(self, html_crawler):
+        """Test that get_pagination returns a valid PageRange."""
+        with patch.object(html_crawler, "get_last_page", return_value=5):
+            result = html_crawler.get_pagination()
+
+            assert isinstance(result, PageRange)
+            assert result.start == 0
+            assert result.end == 5
+            assert str(result) == "0:5"
+
+    def test_get_last_page_with_valid_pagination_links(self, html_crawler):
+        """Test get_last_page extracts page number from pagination links."""
+        # Mock HTML with pagination links
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?page=1">1</a></li>
+            <li><a href="/news?page=2">2</a></li>
+            <li><a href="/news?page=3">3</a></li>
+            <li><a href="/news?page=10">10</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            assert result == 10
+
+    def test_get_last_page_with_no_pagination_links(self, html_crawler):
+        """Test get_last_page returns 1 when no pagination links found."""
+        mock_html = "<div>No pagination here</div>"
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            assert result == 1
+
+    def test_get_last_page_with_empty_href(self, html_crawler):
+        """Test get_last_page returns 1 when href is empty or None."""
+        mock_html = """
+        <ul class="pagination">
+            <li><a>No href</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            assert result == 1
+
+    def test_get_last_page_with_regex_extraction(self, html_crawler):
+        """Test get_last_page extracts page number using regex."""
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/articles/page/25/">Page 25</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            assert result == 25
+
+    def test_get_last_page_with_query_parameters(self, html_crawler):
+        """Test get_last_page extracts page number from query parameters."""
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?category=tech&page=15&sort=date">Last</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            assert result == 15
+
+    def test_get_last_page_with_invalid_page_parameter(self, html_crawler):
+        """Test get_last_page returns 1 when page parameter is invalid."""
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?page=invalid">Last</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            assert result == 1
+
+    def test_get_last_page_with_category_support(self, html_crawler):
+        """Test get_last_page uses category in URL when supported."""
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?category=tech&page=8">8</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl") as mock_crawl:
+            mock_crawl.return_value = mock_soup
+            html_crawler.get_last_page()
+
+            # The URL construction concatenates source_url with the path
+            # Since the template doesn't contain {category}, it should remain unchanged
+            expected_url = "https://example.com/news"
+            mock_crawl.assert_called_once_with(expected_url)
+
+    def test_get_last_page_with_category_template(self, mock_client_config):
+        """Test get_last_page uses category replacement when template contains {category}."""
+        source_config = HtmlSourceConfig(
+            source_id="test_source",
+            source_url=HttpUrl("https://example.com"),
+            pagination_template="news/{category}",
+            source_selectors=SourceSelectors(pagination="ul.pagination > li a"),
+            supports_categories=True,
+        )
+        crawler_config = CrawlerConfig(source=source_config, category="tech")
+        crawler = HtmlCrawler(crawler_config, mock_client_config)
+
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news/tech?page=5">5</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(crawler, "crawl") as mock_crawl:
+            mock_crawl.return_value = mock_soup
+            crawler.get_last_page()
+
+            expected_url = "https://example.com/news/tech"
+            mock_crawl.assert_called_once_with(expected_url)
+
+    def test_get_last_page_without_category_support(self, html_crawler):
+        """Test get_last_page uses default template when categories not supported."""
+        # Modify source to not support categories
+        html_crawler.source.supports_categories = False
+
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?page=5">5</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl") as mock_crawl:
+            mock_crawl.return_value = mock_soup
+            html_crawler.get_last_page()
+
+            # Verify the URL was constructed without category replacement
+            expected_url = "https://example.com/news"
+            mock_crawl.assert_called_once_with(expected_url)
+
+    def test_get_last_page_without_category_in_config(
+        self, mock_client_config, mock_html_source_config
+    ):
+        """Test get_last_page uses default template when no category in config."""
+        config = CrawlerConfig(source=mock_html_source_config, category=None)
+        crawler = HtmlCrawler(config, mock_client_config)
+
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?page=3">3</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(crawler, "crawl") as mock_crawl:
+            mock_crawl.return_value = mock_soup
+            crawler.get_last_page()
+
+            # Verify the URL was constructed without category replacement
+            expected_url = "https://example.com/news"
+            mock_crawl.assert_called_once_with(expected_url)
+
+    def test_get_last_page_with_multiple_numbers_in_href(self, html_crawler):
+        """Test get_last_page extracts first number when multiple numbers present."""
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news/2024/page/42/comments/100">Last</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            # Should extract the first number found (2024)
+            assert result == 2024
+
+    def test_supports_html_source_kind(self, html_crawler):
+        """Test that supports method returns True for HTML source kind."""
+        assert html_crawler.supports(SourceKind.HTML) is True
+        assert html_crawler.supports(SourceKind.WORDPRESS) is False
+
+    def test_get_pagination_integration(self, html_crawler):
+        """Integration test for get_pagination calling get_last_page."""
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?page=7">7</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_pagination()
+
+            assert isinstance(result, PageRange)
+            assert result.start == 0
+            assert result.end == 7
+
+    def test_get_last_page_with_non_string_href(self, html_crawler):
+        """Test get_last_page handles non-string href attributes."""
+        # Create a mock element with href as a list (AttributeValueList)
+        mock_html = """
+        <ul class="pagination">
+            <li><a href="/news?page=5">5</a></li>
+        </ul>
+        """
+        mock_soup = BeautifulSoup(mock_html, "html.parser")
+
+        # Modify the href to simulate a non-string type by removing it
+        pagination_link = mock_soup.select("ul.pagination > li a")[-1]
+        # Instead of setting href to a list, let's test with missing href
+        del pagination_link.attrs["href"]
+
+        with patch.object(html_crawler, "crawl", return_value=mock_soup):
+            result = html_crawler.get_last_page()
+            assert result == 1
@@ -0,0 +1,240 @@
+from unittest.mock import Mock, patch
+
+import pytest
+from pydantic import HttpUrl
+
+from basango.core.config.fetch_config import CrawlerConfig, ClientConfig
+from basango.core.config.source_config import (
+    WordPressSourceConfig,
+    HtmlSourceConfig,
+    SourceSelectors,
+)
+from basango.domain import SourceKind, PageRange
+from basango.services.crawler.wordpress_crawler import WordpressCrawler
+
+
+class TestWordPressCrawler:
+    """Test suite for WordPressCrawler."""
+
+    @pytest.fixture
+    def mock_client_config(self):
+        return ClientConfig()
+
+    @pytest.fixture
+    def mock_wordpress_source_config(self):
+        return WordPressSourceConfig(
+            source_id="test_wordpress_source",
+            source_url=HttpUrl("https://example.com/"),
+            supports_categories=True,
+            categories=["tech", "news"],
+        )
+
+    @pytest.fixture
+    def mock_crawler_config(self, mock_wordpress_source_config):
+        return CrawlerConfig(source=mock_wordpress_source_config, category="tech")
+
+    @pytest.fixture
+    def wordpress_crawler(self, mock_crawler_config, mock_client_config):
+        return WordpressCrawler(mock_crawler_config, mock_client_config)
+
+    @pytest.fixture
+    def mock_response_with_headers(self):
+        response = Mock()
+        response.headers = {
+            WordpressCrawler.TOTAL_PAGES_HEADER: "5",
+            WordpressCrawler.TOTAL_POSTS_HEADER: "47",
+        }
+        return response
+
+    def test_with_valid_wordpress_source(self, wordpress_crawler):
+        """Test __init__ with valid WordPress source config."""
+        assert wordpress_crawler.source.source_kind == SourceKind.WORDPRESS
+        assert isinstance(wordpress_crawler.source, WordPressSourceConfig)
+
+    def test_with_invalid_source_kind_raises_error(self, mock_client_config):
+        """Test __init__ raises ValueError when source kind is not WORDPRESS."""
+        html_source = HtmlSourceConfig(
+            source_id="test_html",
+            source_url=HttpUrl("https://example.com"),
+            pagination_template="news",
+            source_selectors=SourceSelectors(),
+        )
+        config = CrawlerConfig(source=html_source)
+
+        with pytest.raises(
+            ValueError, match="WordpressCrawler requires a source of kind WORDPRESS"
+        ):
+            WordpressCrawler(config, mock_client_config)
+
+    def test_with_no_source_raises_error(self, mock_client_config):
+        """Test __init__ raises ValueError when source is None."""
+        config = CrawlerConfig(source=None)
+
+        with pytest.raises(
+            ValueError, match="WordpressCrawler requires a source of kind WORDPRESS"
+        ):
+            WordpressCrawler(config, mock_client_config)
+
+    def test_get_pagination_returns_valid_page_range(
+        self, wordpress_crawler, mock_response_with_headers
+    ):
+        """Test get_pagination returns correct PageRange from WordPress API headers."""
+        with patch.object(
+            wordpress_crawler.client, "get", return_value=mock_response_with_headers
+        ):
+            result = wordpress_crawler.get_pagination()
+
+            assert isinstance(result, PageRange)
+            assert result.start == 1
+            assert result.end == 5
+            assert str(result) == "1:5"
+
+    def test_get_pagination_with_default_headers(self, wordpress_crawler):
+        """Test get_pagination with default headers when WordPress headers are missing."""
+        mock_response = Mock()
+        mock_response.headers = {}  # No WordPress headers
+
+        with patch.object(wordpress_crawler.client, "get", return_value=mock_response):
+            result = wordpress_crawler.get_pagination()
+
+            assert isinstance(result, PageRange)
+            assert result.start == 1
+            assert result.end == 1  # Default when no headers
+
+    def test_get_pagination_makes_correct_api_call(self, wordpress_crawler):
+        """Test get_pagination makes the correct WordPress API call."""
+        mock_response = Mock()
+        mock_response.headers = {
+            WordpressCrawler.TOTAL_PAGES_HEADER: "3",
+            WordpressCrawler.TOTAL_POSTS_HEADER: "25",
+        }
+
+        with patch.object(
+            wordpress_crawler.client, "get", return_value=mock_response
+        ) as mock_get:
+            wordpress_crawler.get_pagination()
+
+            expected_url = f"{wordpress_crawler.source.source_url}wp-json/wp/v2/posts?_fields=id&per_page=100"
+            mock_get.assert_called_once_with(expected_url)
+
+    def test_fetch_categories_populates_category_map(self, wordpress_crawler):
+        """Test _fetch_categories populates the category_map correctly."""
+        mock_categories_response = Mock()
+        mock_categories_response.json.return_value = [
+            {"id": 1, "slug": "technology", "count": 15},
+            {"id": 2, "slug": "business", "count": 10},
+            {"id": 3, "slug": "sports", "count": 8},
+        ]
+
+        with patch.object(
+            wordpress_crawler.client, "get", return_value=mock_categories_response
+        ):
+            wordpress_crawler._fetch_categories()
+
+            assert len(wordpress_crawler.category_map) == 3
+            assert wordpress_crawler.category_map[1] == "technology"
+            assert wordpress_crawler.category_map[2] == "business"
+            assert wordpress_crawler.category_map[3] == "sports"
+
+    def test_fetch_categories_makes_correct_api_call(self, wordpress_crawler):
+        """Test _fetch_categories makes the correct WordPress API call."""
+        mock_response = Mock()
+        mock_response.json.return_value = []
+
+        with patch.object(
+            wordpress_crawler.client, "get", return_value=mock_response
+        ) as mock_get:
+            wordpress_crawler._fetch_categories()
+
+            expected_url = f"{wordpress_crawler.source.source_url}wp-json/wp/v2/categories?{WordpressCrawler.CATEGORY_QUERY}"
+            mock_get.assert_called_once_with(expected_url)
+
+    def test_map_categories_with_populated_category_map(self, wordpress_crawler):
+        """Test _map_categories returns correct comma-separated string."""
+
+        # Pre-populate category map
+        wordpress_crawler.category_map = {
+            1: "technology",
+            2: "business",
+            3: "sports",
+            4: "lifestyle",
+        }
+
+        result = wordpress_crawler._map_categories([2, 1, 4])
+
+        # Should be sorted by category ID
+        assert result == "technology,business,lifestyle"
+
+    def test_map_categories_with_empty_category_map_fetches_categories(
+        self, wordpress_crawler
+    ):
+        """Test _map_categories fetches categories when category_map is empty."""
+        mock_categories_response = Mock()
+        mock_categories_response.json.return_value = [
+            {"id": 1, "slug": "tech", "count": 15},
+            {"id": 2, "slug": "news", "count": 10},
+        ]
+
+        wordpress_crawler.category_map = {}
+        with patch.object(
+            wordpress_crawler.client, "get", return_value=mock_categories_response
+        ):
+            result = wordpress_crawler._map_categories([1, 2])
+
+            assert result == "tech,news"
+            assert len(wordpress_crawler.category_map) == 2
+
+    def test_map_categories_filters_unknown_category_ids(self, wordpress_crawler):
+        """Test _map_categories filters out unknown category IDs."""
+        wordpress_crawler.category_map = {1: "technology", 2: "business"}
+
+        result = wordpress_crawler._map_categories([1, 99, 2, 100])
+
+        # Should only include known categories
+        assert result == "technology,business"
+
+    def test_map_categories_with_empty_category_list(self, wordpress_crawler):
+        """Test _map_categories returns empty string for empty category list."""
+        wordpress_crawler.category_map = {1: "tech", 2: "news"}
+
+        result = wordpress_crawler._map_categories([])
+
+        assert result == ""
+
+    def test_map_categories_sorts_by_category_id(self, wordpress_crawler):
+        """Test _map_categories sorts categories by ID."""
+        wordpress_crawler.category_map = {3: "charlie", 1: "alpha", 2: "beta"}
+
+        result = wordpress_crawler._map_categories([3, 1, 2])
+
+        # Should be sorted by ID: 1, 2, 3
+        assert result == "alpha,beta,charlie"
+
+    def test_supports_wordpress_source_kind(self, wordpress_crawler):
+        """Test supports method returns True for WordPress source kind."""
+        assert wordpress_crawler.supports(SourceKind.WORDPRESS) is True
+        assert wordpress_crawler.supports(SourceKind.HTML) is False
+
+    @pytest.mark.parametrize(
+        "pages,posts,expected_start,expected_end",
+        [
+            ("1", "10", 1, 1),
+            ("5", "47", 1, 5),
+            ("10", "100", 1, 10),
+        ],
+    )
+    def test_get_pagination_with_various_header_values(
+        self, wordpress_crawler, pages, posts, expected_start, expected_end
+    ):
+        """Test get_pagination with various header values."""
+        mock_response = Mock()
+        mock_response.headers = {
+            WordpressCrawler.TOTAL_PAGES_HEADER: pages,
+            WordpressCrawler.TOTAL_POSTS_HEADER: posts,
+        }
+
+        with patch.object(wordpress_crawler.client, "get", return_value=mock_response):
+            result = wordpress_crawler.get_pagination()
+
+            assert result.start == expected_start
+            assert result.end == expected_end
@@ -0,0 +1,70 @@
+from datetime import datetime, timezone
+
+import pytest
+
+from basango.services.date_parser import DateParser
+
+
+@pytest.mark.parametrize(
+    "date_str, fmt, pattern, replacement, expected",
+    [
+        (
+            "2004-02-12T15:19:21",
+            "%Y-%m-%dT%H:%M:%S",
+            None,
+            None,
+            1076599161,  # 2004-02-12 15:19:21 UTC
+        ),
+        (
+            "08/10/2024 - 00:00",
+            "%Y-%m-%d %H:%M",
+            r"/(\d{2})\/(\d{2})\/(\d{4}) - (\d{2}:\d{2})/",
+            r"$3-$2-$1 $4",
+            1728345600,  # 2024-10-08 00:00:00 UTC
+        ),
+        (
+            "mar 08/10/2024 - 00:00",
+            "%Y-%m-%d %H:%M",
+            r"/\w{3} (\d{2})\/(\d{2})\/(\d{4}) - (\d{2}:\d{2})/",
+            r"$3-$2-$1 $4",
+            1728345600,  # 2024-10-08 00:00:00 UTC
+        ),
+        (
+            "Mardi 8 octobre 2024 - 00:00",
+            "%Y-%m-%d %H:%M",
+            r"/(\d{1}) (\d{1,2}) (\d{2}) (\d{4}) - (\d{2}:\d{2})/",
+            r"$4-$3-$2 $5",
+            1728345600,  # 2024-10-08 00:00:00 UTC
+        ),
+        (
+            "8.10.2024 00:00",
+            "%d.%m.%Y %H:%M",
+            None,
+            None,
+            1728345600,  # 2024-10-08 00:00:00 UTC
+        ),
+    ],
+)
+def test_create_timestamp_with_valid_dates(
+    date_str: str,
+    fmt: str | None,
+    pattern: str | None,
+    replacement: str | None,
+    expected: int,
+) -> None:
+    dr = DateParser()
+    result = dr.create_timestamp(date_str, fmt, pattern, replacement)
+    assert result == expected
+
+
+def test_create_timestamp_with_invalid_date_falls_back_to_midnight_today() -> None:
+    dr = DateParser()
+
+    # Compute expected midnight (UTC) before invoking the parser to avoid edge cases.
+    now = datetime.now(timezone.utc)
+    expected_midnight = int(
+        now.replace(hour=0, minute=0, second=0, microsecond=0).timestamp()
+    )
+
+    result = dr.create_timestamp("invalid date string", None, None, None)
+    assert result == expected_midnight
@@ -0,0 +1,9 @@
+import os
+import sys
+
+
+# Ensure 'src' is on sys.path so `import basango...` works in tests
+ROOT = os.path.dirname(os.path.dirname(__file__))
+SRC = os.path.join(ROOT, "src")
+if SRC not in sys.path:
+    sys.path.insert(0, SRC)