""" Integration tests for PDF export functionality. Tests: PDF generation → Caching → Download """ from io import BytesIO from unittest.mock import MagicMock import pytest from fastapi.testclient import TestClient from backend.core.core import SwotAnalysis from backend.core.pdf_cache import pdf_cache from backend.core.pdf_service import SwotPDFGenerator, generate_swot_pdf from backend.site.consts import result_store @pytest.mark.integration @pytest.mark.pdf class TestPDFGeneration: """Test PDF generation with ReportLab""" def test_pdf_generator_creates_valid_pdf(self, sample_swot_analysis: SwotAnalysis): """ Test that PDF generator produces a valid PDF BytesIO buffer. Regression: Ensure BytesIO returned, not int. """ pdf_buffer = generate_swot_pdf(sample_swot_analysis) assert isinstance(pdf_buffer, BytesIO) # Seek to beginning and check for PDF magic bytes pdf_buffer.seek(0) content = pdf_buffer.read(4) assert content == b"%PDF" # Valid PDF magic bytes # Check buffer has content (seek to end to get size) pdf_buffer.seek(0, 2) size = pdf_buffer.tell() assert size > 0 # Buffer has content def test_pdf_generator_no_reserved_style_names( self, sample_swot_analysis: SwotAnalysis ): """ Regression test: Ensure no ReportLab reserved style name conflicts. Bug: KeyError: "Style 'BodyText' already defined" Fix: Renamed to "ReportBodyText" """ # Should not raise KeyError generator = SwotPDFGenerator(sample_swot_analysis) assert "ReportBodyText" in generator.styles # Original BodyText should exist as built-in assert "BodyText" in generator.styles def test_pdf_contains_swot_data(self, sample_swot_analysis: SwotAnalysis): """Verify PDF contains SWOT analysis data""" pdf_buffer = generate_swot_pdf(sample_swot_analysis) # Read PDF as bytes pdf_buffer.seek(0) pdf_bytes = pdf_buffer.read() # PDF should be valid and non-empty assert pdf_bytes.startswith(b"%PDF") assert len(pdf_bytes) > 1000 # Reasonable minimum size # Note: Text search in compressed PDFs is unreliable # For proper validation, would need PyPDF2 or similar # Just verify the PDF structure is valid @pytest.mark.integration @pytest.mark.pdf class TestPDFCaching: """Test PDF caching system""" def test_cache_stores_and_retrieves_pdf( self, sample_swot_analysis: SwotAnalysis, mock_session_id: str ): """Test basic cache storage and retrieval""" pdf_buffer = generate_swot_pdf(sample_swot_analysis) # Store in cache pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer) # Retrieve from cache cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis) assert cached_pdf is not None assert isinstance(cached_pdf, BytesIO) # Should be a copy, not same object assert cached_pdf is not pdf_buffer def test_cache_miss_returns_none( self, sample_swot_analysis: SwotAnalysis ): """Cache miss should return None""" cached_pdf = pdf_cache.get("nonexistent_session", sample_swot_analysis) assert cached_pdf is None def test_cache_invalidation( self, sample_swot_analysis: SwotAnalysis, mock_session_id: str ): """Test cache invalidation for a session""" pdf_buffer = generate_swot_pdf(sample_swot_analysis) pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer) # Invalidate pdf_cache.invalidate(mock_session_id) # Should return None after invalidation cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis) assert cached_pdf is None @pytest.mark.integration @pytest.mark.pdf @pytest.mark.api class TestPDFDownloadEndpoint: """Test PDF download endpoint""" def test_download_pdf_without_session_returns_404(self, test_client: TestClient): """ Regression test: PDF download without session ID should return 404. Bug: AttributeError: 'int' object has no attribute 'encode' Root Cause: StreamingResponse used with raw bytes instead of Response Fix: Use Response for error paths, not StreamingResponse This tests the first error path (no session_id). """ # Don't set any session cookie response = test_client.get("/download-pdf") assert response.status_code == 404 assert b"No analysis found" in response.content # Verify response can be read without AttributeError assert isinstance(response.content, bytes) assert len(response.content) > 0 @pytest.mark.asyncio async def test_download_pdf_without_result_returns_404( self, mock_session_id: str ): """ Regression test: PDF download with session but no result should return 404. Bug: AttributeError: 'int' object has no attribute 'encode' Root Cause: StreamingResponse used with raw bytes instead of Response Fix: Use Response for error paths, not StreamingResponse This tests the second error path (session exists but result is None). This is the exact error scenario from the production bug. """ from backend.site.router import download_pdf # Create mock request with session but no result mock_request = MagicMock() mock_session = MagicMock() mock_session.get = MagicMock(return_value=mock_session_id) mock_request.session = mock_session # Clear result_store to simulate "analysis not complete" result_store.clear() # Call the endpoint handler directly response = await download_pdf(mock_request) # Should hit the "result is None" error path assert response.status_code == 404 assert b"Analysis not complete" in response.body # Verify response can be read without AttributeError assert isinstance(response.body, bytes) assert len(response.body) > 0 def test_download_pdf_error_paths_use_response_not_streaming( self, test_client: TestClient, mock_session_id: str ): """ Regression test: Verify error paths return Response, not StreamingResponse. This prevents the AttributeError when iterating over bytes in StreamingResponse. Response handles bytes directly, StreamingResponse requires an iterator. """ # Test path 1: No session ID response1 = test_client.get("/download-pdf") assert response1.status_code == 404 # Response should be directly readable (not streamed) assert isinstance(response1.content, bytes) # Test path 2: Session ID but no result with test_client: test_client.cookies.set("analysis_id", mock_session_id) response2 = test_client.get("/download-pdf") assert response2.status_code == 404 # Response should be directly readable (not streamed) assert isinstance(response2.content, bytes) @pytest.mark.asyncio async def test_download_pdf_returns_pdf_file( self, mock_session_id: str, sample_swot_analysis ): """ Test PDF download returns valid PDF file. Regression: Ensure StreamingResponse works with BytesIO iterator. Bug: AttributeError: 'int' object has no attribute 'encode' Fix: Use iterfile() generator for BytesIO chunks """ from backend.site.router import download_pdf # Create mock request with session mock_request = MagicMock() mock_session = MagicMock() mock_session.get = MagicMock(return_value=mock_session_id) mock_request.session = mock_session # Populate result_store with completed analysis result_store[mock_session_id] = sample_swot_analysis # Call the endpoint handler directly response = await download_pdf(mock_request) assert response.status_code == 200 assert response.headers["content-type"] == "application/pdf" assert "attachment" in response.headers["content-disposition"] # New filename format: swot-{company}-{date}.pdf assert "swot-" in response.headers["content-disposition"] assert ".pdf" in response.headers["content-disposition"] # Verify it's a valid PDF by reading the stream body_content = b"" async for chunk in response.body_iterator: body_content += chunk assert body_content.startswith(b"%PDF") assert len(body_content) > 1000 # PDF should have substantial content @pytest.mark.asyncio async def test_download_pdf_uses_cache( self, mock_session_id: str, sample_swot_analysis ): """Test that PDF download uses cache when available""" from backend.site.router import download_pdf # Create mock request mock_request = MagicMock() mock_session = MagicMock() mock_session.get = MagicMock(return_value=mock_session_id) mock_request.session = mock_session # Populate result_store result_store[mock_session_id] = sample_swot_analysis # First request - generates PDF and caches response1 = await download_pdf(mock_request) assert response1.status_code == 200 # Read first response body body1 = b"" async for chunk in response1.body_iterator: body1 += chunk # Second request - should use cache response2 = await download_pdf(mock_request) assert response2.status_code == 200 # Read second response body body2 = b"" async for chunk in response2.body_iterator: body2 += chunk # Both should return identical content assert body1 == body2 @pytest.mark.asyncio async def test_download_pdf_filename_format( self, mock_session_id: str, sample_swot_analysis ): """Test PDF filename follows expected format: swot-{company}-{date}.pdf""" from datetime import datetime from backend.site.router import download_pdf # Create mock request mock_request = MagicMock() mock_session = MagicMock() mock_session.get = MagicMock(return_value=mock_session_id) mock_request.session = mock_session # Populate result_store result_store[mock_session_id] = sample_swot_analysis response = await download_pdf(mock_request) assert response.status_code == 200 disposition = response.headers["content-disposition"] # New format: swot-{primary_entity}-vs-{comparison[0]}-{date}.pdf # Sample has primary_entity="Google", comparison_entities=["Microsoft", "Amazon"] assert "swot-Google-vs-Microsoft" in disposition assert "plus1" in disposition # +1 more comparison (Amazon) assert datetime.now().strftime("%Y-%m-%d") in disposition assert ".pdf" in disposition