Files
Pygentic-AI/tests/integration/test_pdf_export.py
Francis Secada dd2b2e8a33 feat(pdf): improve download filename with company names and date
- Generate descriptive filenames: swot-{company}-vs-{competitor}-{date}.pdf
- Sanitize company names for filesystem safety
- Include date in YYYY-MM-DD format
- Handle multiple comparison entities (e.g., "plus2" for 2 additional)

Examples:
- Single entity: "swot-Apple-2026-02-04.pdf"
- With comparison: "swot-Apple-vs-Microsoft-2026-02-04.pdf"
- Multiple comparisons: "swot-Apple-vs-Microsoft-plus2-2026-02-04.pdf"

Fixes: Downloaded PDF had generic filename with .txt suffix

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-04 17:04:39 -05:00

311 lines
11 KiB
Python

"""
Integration tests for PDF export functionality.
Tests: PDF generation → Caching → Download
"""
from io import BytesIO
from unittest.mock import MagicMock
import pytest
from fastapi.testclient import TestClient
from backend.core.core import SwotAnalysis
from backend.core.pdf_cache import pdf_cache
from backend.core.pdf_service import SwotPDFGenerator, generate_swot_pdf
from backend.site.consts import result_store
@pytest.mark.integration
@pytest.mark.pdf
class TestPDFGeneration:
"""Test PDF generation with ReportLab"""
def test_pdf_generator_creates_valid_pdf(self, sample_swot_analysis: SwotAnalysis):
"""
Test that PDF generator produces a valid PDF BytesIO buffer.
Regression: Ensure BytesIO returned, not int.
"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
assert isinstance(pdf_buffer, BytesIO)
# Seek to beginning and check for PDF magic bytes
pdf_buffer.seek(0)
content = pdf_buffer.read(4)
assert content == b"%PDF" # Valid PDF magic bytes
# Check buffer has content (seek to end to get size)
pdf_buffer.seek(0, 2)
size = pdf_buffer.tell()
assert size > 0 # Buffer has content
def test_pdf_generator_no_reserved_style_names(
self, sample_swot_analysis: SwotAnalysis
):
"""
Regression test: Ensure no ReportLab reserved style name conflicts.
Bug: KeyError: "Style 'BodyText' already defined"
Fix: Renamed to "ReportBodyText"
"""
# Should not raise KeyError
generator = SwotPDFGenerator(sample_swot_analysis)
assert "ReportBodyText" in generator.styles
# Original BodyText should exist as built-in
assert "BodyText" in generator.styles
def test_pdf_contains_swot_data(self, sample_swot_analysis: SwotAnalysis):
"""Verify PDF contains SWOT analysis data"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
# Read PDF as bytes
pdf_buffer.seek(0)
pdf_bytes = pdf_buffer.read()
# PDF should be valid and non-empty
assert pdf_bytes.startswith(b"%PDF")
assert len(pdf_bytes) > 1000 # Reasonable minimum size
# Note: Text search in compressed PDFs is unreliable
# For proper validation, would need PyPDF2 or similar
# Just verify the PDF structure is valid
@pytest.mark.integration
@pytest.mark.pdf
class TestPDFCaching:
"""Test PDF caching system"""
def test_cache_stores_and_retrieves_pdf(
self, sample_swot_analysis: SwotAnalysis, mock_session_id: str
):
"""Test basic cache storage and retrieval"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
# Store in cache
pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer)
# Retrieve from cache
cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis)
assert cached_pdf is not None
assert isinstance(cached_pdf, BytesIO)
# Should be a copy, not same object
assert cached_pdf is not pdf_buffer
def test_cache_miss_returns_none(
self, sample_swot_analysis: SwotAnalysis
):
"""Cache miss should return None"""
cached_pdf = pdf_cache.get("nonexistent_session", sample_swot_analysis)
assert cached_pdf is None
def test_cache_invalidation(
self, sample_swot_analysis: SwotAnalysis, mock_session_id: str
):
"""Test cache invalidation for a session"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer)
# Invalidate
pdf_cache.invalidate(mock_session_id)
# Should return None after invalidation
cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis)
assert cached_pdf is None
@pytest.mark.integration
@pytest.mark.pdf
@pytest.mark.api
class TestPDFDownloadEndpoint:
"""Test PDF download endpoint"""
def test_download_pdf_without_session_returns_404(self, test_client: TestClient):
"""
Regression test: PDF download without session ID should return 404.
Bug: AttributeError: 'int' object has no attribute 'encode'
Root Cause: StreamingResponse used with raw bytes instead of Response
Fix: Use Response for error paths, not StreamingResponse
This tests the first error path (no session_id).
"""
# Don't set any session cookie
response = test_client.get("/download-pdf")
assert response.status_code == 404
assert b"No analysis found" in response.content
# Verify response can be read without AttributeError
assert isinstance(response.content, bytes)
assert len(response.content) > 0
@pytest.mark.asyncio
async def test_download_pdf_without_result_returns_404(
self, mock_session_id: str
):
"""
Regression test: PDF download with session but no result should return 404.
Bug: AttributeError: 'int' object has no attribute 'encode'
Root Cause: StreamingResponse used with raw bytes instead of Response
Fix: Use Response for error paths, not StreamingResponse
This tests the second error path (session exists but result is None).
This is the exact error scenario from the production bug.
"""
from backend.site.router import download_pdf
# Create mock request with session but no result
mock_request = MagicMock()
mock_session = MagicMock()
mock_session.get = MagicMock(return_value=mock_session_id)
mock_request.session = mock_session
# Clear result_store to simulate "analysis not complete"
result_store.clear()
# Call the endpoint handler directly
response = await download_pdf(mock_request)
# Should hit the "result is None" error path
assert response.status_code == 404
assert b"Analysis not complete" in response.body
# Verify response can be read without AttributeError
assert isinstance(response.body, bytes)
assert len(response.body) > 0
def test_download_pdf_error_paths_use_response_not_streaming(
self, test_client: TestClient, mock_session_id: str
):
"""
Regression test: Verify error paths return Response, not StreamingResponse.
This prevents the AttributeError when iterating over bytes in StreamingResponse.
Response handles bytes directly, StreamingResponse requires an iterator.
"""
# Test path 1: No session ID
response1 = test_client.get("/download-pdf")
assert response1.status_code == 404
# Response should be directly readable (not streamed)
assert isinstance(response1.content, bytes)
# Test path 2: Session ID but no result
with test_client:
test_client.cookies.set("analysis_id", mock_session_id)
response2 = test_client.get("/download-pdf")
assert response2.status_code == 404
# Response should be directly readable (not streamed)
assert isinstance(response2.content, bytes)
@pytest.mark.asyncio
async def test_download_pdf_returns_pdf_file(
self, mock_session_id: str, sample_swot_analysis
):
"""
Test PDF download returns valid PDF file.
Regression: Ensure StreamingResponse works with BytesIO iterator.
Bug: AttributeError: 'int' object has no attribute 'encode'
Fix: Use iterfile() generator for BytesIO chunks
"""
from backend.site.router import download_pdf
# Create mock request with session
mock_request = MagicMock()
mock_session = MagicMock()
mock_session.get = MagicMock(return_value=mock_session_id)
mock_request.session = mock_session
# Populate result_store with completed analysis
result_store[mock_session_id] = sample_swot_analysis
# Call the endpoint handler directly
response = await download_pdf(mock_request)
assert response.status_code == 200
assert response.headers["content-type"] == "application/pdf"
assert "attachment" in response.headers["content-disposition"]
# New filename format: swot-{company}-{date}.pdf
assert "swot-" in response.headers["content-disposition"]
assert ".pdf" in response.headers["content-disposition"]
# Verify it's a valid PDF by reading the stream
body_content = b""
async for chunk in response.body_iterator:
body_content += chunk
assert body_content.startswith(b"%PDF")
assert len(body_content) > 1000 # PDF should have substantial content
@pytest.mark.asyncio
async def test_download_pdf_uses_cache(
self, mock_session_id: str, sample_swot_analysis
):
"""Test that PDF download uses cache when available"""
from backend.site.router import download_pdf
# Create mock request
mock_request = MagicMock()
mock_session = MagicMock()
mock_session.get = MagicMock(return_value=mock_session_id)
mock_request.session = mock_session
# Populate result_store
result_store[mock_session_id] = sample_swot_analysis
# First request - generates PDF and caches
response1 = await download_pdf(mock_request)
assert response1.status_code == 200
# Read first response body
body1 = b""
async for chunk in response1.body_iterator:
body1 += chunk
# Second request - should use cache
response2 = await download_pdf(mock_request)
assert response2.status_code == 200
# Read second response body
body2 = b""
async for chunk in response2.body_iterator:
body2 += chunk
# Both should return identical content
assert body1 == body2
@pytest.mark.asyncio
async def test_download_pdf_filename_format(
self, mock_session_id: str, sample_swot_analysis
):
"""Test PDF filename follows expected format: swot-{company}-{date}.pdf"""
from datetime import datetime
from backend.site.router import download_pdf
# Create mock request
mock_request = MagicMock()
mock_session = MagicMock()
mock_session.get = MagicMock(return_value=mock_session_id)
mock_request.session = mock_session
# Populate result_store
result_store[mock_session_id] = sample_swot_analysis
response = await download_pdf(mock_request)
assert response.status_code == 200
disposition = response.headers["content-disposition"]
# New format: swot-{primary_entity}-vs-{comparison[0]}-{date}.pdf
# Sample has primary_entity="Google", comparison_entities=["Microsoft", "Amazon"]
assert "swot-Google-vs-Microsoft" in disposition
assert "plus1" in disposition # +1 more comparison (Amazon)
assert datetime.now().strftime("%Y-%m-%d") in disposition
assert ".pdf" in disposition