Files
Pygentic-AI/tests/integration/test_pdf_export.py
Francis Secada 1be156ed7c feat: add pytest infrastructure and regression tests
Implements comprehensive testing framework to prevent regressions like:
- PDF style name conflicts (BodyText)
- StreamingResponse type errors
- Template not found after refactors

Test Infrastructure:
- pytest.ini: Configuration with coverage, markers, asyncio support
- conftest.py: Shared fixtures (test_client, test_db, sample_data)
- GitHub Actions CI: Automated testing on push/PR
- Directory structure: tests/{unit,integration,fixtures}

Integration Tests (test_analyze_flow.py):
- Regression: analyze endpoint returns empty response (not status.html)
- Status polling with OOB swaps
- Session creation and management
- First poll returns container + items
- Subsequent polls return only new items
- Result endpoint with/without data

Integration Tests (test_pdf_export.py):
- Regression: PDF generation returns BytesIO (not int)
- Regression: No ReportLab style name conflicts
- PDF download endpoint with streaming response
- PDF caching behavior
- Valid PDF format verification
- Filename format validation

Unit Tests (test_pdf_service.py):
- Content hash generation and consistency
- PDF generator initialization
- Custom style creation without conflicts
- SwotAnalysis model validation

CI/CD:
- GitHub Actions workflow for automated testing
- Python 3.13 support
- Coverage reporting with codecov integration

Test Markers:
- @pytest.mark.unit: Fast, isolated tests
- @pytest.mark.integration: Multi-component tests
- @pytest.mark.pdf: PDF-related tests
- @pytest.mark.api: API endpoint tests

Fixtures:
- test_client: FastAPI TestClient
- test_db_session: SQLite in-memory database
- sample_swot_analysis: Mock SWOT data
- clear_caches: Auto-cleanup between tests

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-04 15:18:07 -05:00

196 lines
6.7 KiB
Python

"""
Integration tests for PDF export functionality.
Tests: PDF generation → Caching → Download
"""
from io import BytesIO
import pytest
from fastapi.testclient import TestClient
from backend.core.core import SwotAnalysis
from backend.core.pdf_cache import pdf_cache
from backend.core.pdf_service import SwotPDFGenerator, generate_swot_pdf
from backend.site.consts import result_store
@pytest.mark.integration
@pytest.mark.pdf
class TestPDFGeneration:
"""Test PDF generation with ReportLab"""
def test_pdf_generator_creates_valid_pdf(self, sample_swot_analysis: SwotAnalysis):
"""
Test that PDF generator produces a valid PDF BytesIO buffer.
Regression: Ensure BytesIO returned, not int.
"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
assert isinstance(pdf_buffer, BytesIO)
assert pdf_buffer.tell() > 0 # Buffer has content
pdf_buffer.seek(0)
content = pdf_buffer.read(4)
assert content == b"%PDF" # Valid PDF magic bytes
def test_pdf_generator_no_reserved_style_names(
self, sample_swot_analysis: SwotAnalysis
):
"""
Regression test: Ensure no ReportLab reserved style name conflicts.
Bug: KeyError: "Style 'BodyText' already defined"
Fix: Renamed to "ReportBodyText"
"""
# Should not raise KeyError
generator = SwotPDFGenerator(sample_swot_analysis)
assert "ReportBodyText" in generator.styles
# Original BodyText should exist as built-in
assert "BodyText" in generator.styles
def test_pdf_contains_swot_data(self, sample_swot_analysis: SwotAnalysis):
"""Verify PDF contains SWOT analysis data"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
# Read PDF as text (simplified - real PDF parsing would use PyPDF2)
pdf_buffer.seek(0)
pdf_bytes = pdf_buffer.read()
# Check for entity name and category keywords
# Note: PDF encoding may make this fragile; consider using PyPDF2
assert b"Google" in pdf_bytes or sample_swot_analysis.primary_entity.encode() in pdf_bytes
@pytest.mark.integration
@pytest.mark.pdf
class TestPDFCaching:
"""Test PDF caching system"""
def test_cache_stores_and_retrieves_pdf(
self, sample_swot_analysis: SwotAnalysis, mock_session_id: str
):
"""Test basic cache storage and retrieval"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
# Store in cache
pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer)
# Retrieve from cache
cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis)
assert cached_pdf is not None
assert isinstance(cached_pdf, BytesIO)
# Should be a copy, not same object
assert cached_pdf is not pdf_buffer
def test_cache_miss_returns_none(
self, sample_swot_analysis: SwotAnalysis
):
"""Cache miss should return None"""
cached_pdf = pdf_cache.get("nonexistent_session", sample_swot_analysis)
assert cached_pdf is None
def test_cache_invalidation(
self, sample_swot_analysis: SwotAnalysis, mock_session_id: str
):
"""Test cache invalidation for a session"""
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer)
# Invalidate
pdf_cache.invalidate(mock_session_id)
# Should return None after invalidation
cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis)
assert cached_pdf is None
@pytest.mark.integration
@pytest.mark.pdf
@pytest.mark.api
class TestPDFDownloadEndpoint:
"""Test PDF download endpoint"""
def test_download_pdf_without_result_returns_404(
self, test_client: TestClient, mock_session_id: str
):
"""
PDF download should return 404 if no result available.
Tests error handling for missing result.
"""
with test_client:
test_client.cookies.set("analysis_id", mock_session_id)
response = test_client.get("/download-pdf")
assert response.status_code == 404
assert b"Analysis not complete" in response.content
def test_download_pdf_returns_pdf_file(
self, test_client: TestClient, mock_session_id: str, sample_swot_analysis
):
"""
Test PDF download returns valid PDF file.
Regression: Ensure StreamingResponse works with BytesIO iterator.
Bug: AttributeError: 'int' object has no attribute 'encode'
Fix: Use iterfile() generator for BytesIO chunks
"""
# Set up result
result_store[mock_session_id] = sample_swot_analysis
with test_client:
test_client.cookies.set("analysis_id", mock_session_id)
response = test_client.get("/download-pdf")
assert response.status_code == 200
assert response.headers["content-type"] == "application/pdf"
assert "attachment" in response.headers["content-disposition"]
assert "swot-analysis" in response.headers["content-disposition"]
# Verify it's a valid PDF
content = response.content
assert content.startswith(b"%PDF")
assert len(content) > 1000 # PDF should have substantial content
def test_download_pdf_uses_cache(
self, test_client: TestClient, mock_session_id: str, sample_swot_analysis
):
"""Test that PDF download uses cache when available"""
# Set up result
result_store[mock_session_id] = sample_swot_analysis
with test_client:
test_client.cookies.set("analysis_id", mock_session_id)
# First request - generates PDF and caches
response1 = test_client.get("/download-pdf")
assert response1.status_code == 200
# Second request - should use cache
response2 = test_client.get("/download-pdf")
assert response2.status_code == 200
# Both should return identical content
assert response1.content == response2.content
def test_download_pdf_filename_format(
self, test_client: TestClient, mock_session_id: str, sample_swot_analysis
):
"""Test PDF filename follows expected format"""
result_store[mock_session_id] = sample_swot_analysis
with test_client:
test_client.cookies.set("analysis_id", mock_session_id)
response = test_client.get("/download-pdf")
assert response.status_code == 200
disposition = response.headers["content-disposition"]
# Format: swot-analysis-{session_id[:8]}.pdf
assert f"swot-analysis-{mock_session_id[:8]}.pdf" in disposition