mirror of
https://github.com/fsecada01/Pygentic-AI.git
synced 2026-05-12 12:15:00 +00:00
- Generate descriptive filenames: swot-{company}-vs-{competitor}-{date}.pdf
- Sanitize company names for filesystem safety
- Include date in YYYY-MM-DD format
- Handle multiple comparison entities (e.g., "plus2" for 2 additional)
Examples:
- Single entity: "swot-Apple-2026-02-04.pdf"
- With comparison: "swot-Apple-vs-Microsoft-2026-02-04.pdf"
- Multiple comparisons: "swot-Apple-vs-Microsoft-plus2-2026-02-04.pdf"
Fixes: Downloaded PDF had generic filename with .txt suffix
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
311 lines
11 KiB
Python
311 lines
11 KiB
Python
"""
|
|
Integration tests for PDF export functionality.
|
|
|
|
Tests: PDF generation → Caching → Download
|
|
"""
|
|
|
|
from io import BytesIO
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from backend.core.core import SwotAnalysis
|
|
from backend.core.pdf_cache import pdf_cache
|
|
from backend.core.pdf_service import SwotPDFGenerator, generate_swot_pdf
|
|
from backend.site.consts import result_store
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.pdf
|
|
class TestPDFGeneration:
|
|
"""Test PDF generation with ReportLab"""
|
|
|
|
def test_pdf_generator_creates_valid_pdf(self, sample_swot_analysis: SwotAnalysis):
|
|
"""
|
|
Test that PDF generator produces a valid PDF BytesIO buffer.
|
|
|
|
Regression: Ensure BytesIO returned, not int.
|
|
"""
|
|
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
|
|
|
|
assert isinstance(pdf_buffer, BytesIO)
|
|
|
|
# Seek to beginning and check for PDF magic bytes
|
|
pdf_buffer.seek(0)
|
|
content = pdf_buffer.read(4)
|
|
assert content == b"%PDF" # Valid PDF magic bytes
|
|
|
|
# Check buffer has content (seek to end to get size)
|
|
pdf_buffer.seek(0, 2)
|
|
size = pdf_buffer.tell()
|
|
assert size > 0 # Buffer has content
|
|
|
|
def test_pdf_generator_no_reserved_style_names(
|
|
self, sample_swot_analysis: SwotAnalysis
|
|
):
|
|
"""
|
|
Regression test: Ensure no ReportLab reserved style name conflicts.
|
|
|
|
Bug: KeyError: "Style 'BodyText' already defined"
|
|
Fix: Renamed to "ReportBodyText"
|
|
"""
|
|
# Should not raise KeyError
|
|
generator = SwotPDFGenerator(sample_swot_analysis)
|
|
assert "ReportBodyText" in generator.styles
|
|
# Original BodyText should exist as built-in
|
|
assert "BodyText" in generator.styles
|
|
|
|
def test_pdf_contains_swot_data(self, sample_swot_analysis: SwotAnalysis):
|
|
"""Verify PDF contains SWOT analysis data"""
|
|
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
|
|
|
|
# Read PDF as bytes
|
|
pdf_buffer.seek(0)
|
|
pdf_bytes = pdf_buffer.read()
|
|
|
|
# PDF should be valid and non-empty
|
|
assert pdf_bytes.startswith(b"%PDF")
|
|
assert len(pdf_bytes) > 1000 # Reasonable minimum size
|
|
|
|
# Note: Text search in compressed PDFs is unreliable
|
|
# For proper validation, would need PyPDF2 or similar
|
|
# Just verify the PDF structure is valid
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.pdf
|
|
class TestPDFCaching:
|
|
"""Test PDF caching system"""
|
|
|
|
def test_cache_stores_and_retrieves_pdf(
|
|
self, sample_swot_analysis: SwotAnalysis, mock_session_id: str
|
|
):
|
|
"""Test basic cache storage and retrieval"""
|
|
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
|
|
|
|
# Store in cache
|
|
pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer)
|
|
|
|
# Retrieve from cache
|
|
cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis)
|
|
|
|
assert cached_pdf is not None
|
|
assert isinstance(cached_pdf, BytesIO)
|
|
# Should be a copy, not same object
|
|
assert cached_pdf is not pdf_buffer
|
|
|
|
def test_cache_miss_returns_none(
|
|
self, sample_swot_analysis: SwotAnalysis
|
|
):
|
|
"""Cache miss should return None"""
|
|
cached_pdf = pdf_cache.get("nonexistent_session", sample_swot_analysis)
|
|
|
|
assert cached_pdf is None
|
|
|
|
def test_cache_invalidation(
|
|
self, sample_swot_analysis: SwotAnalysis, mock_session_id: str
|
|
):
|
|
"""Test cache invalidation for a session"""
|
|
pdf_buffer = generate_swot_pdf(sample_swot_analysis)
|
|
pdf_cache.set(mock_session_id, sample_swot_analysis, pdf_buffer)
|
|
|
|
# Invalidate
|
|
pdf_cache.invalidate(mock_session_id)
|
|
|
|
# Should return None after invalidation
|
|
cached_pdf = pdf_cache.get(mock_session_id, sample_swot_analysis)
|
|
assert cached_pdf is None
|
|
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.pdf
|
|
@pytest.mark.api
|
|
class TestPDFDownloadEndpoint:
|
|
"""Test PDF download endpoint"""
|
|
|
|
def test_download_pdf_without_session_returns_404(self, test_client: TestClient):
|
|
"""
|
|
Regression test: PDF download without session ID should return 404.
|
|
|
|
Bug: AttributeError: 'int' object has no attribute 'encode'
|
|
Root Cause: StreamingResponse used with raw bytes instead of Response
|
|
Fix: Use Response for error paths, not StreamingResponse
|
|
|
|
This tests the first error path (no session_id).
|
|
"""
|
|
# Don't set any session cookie
|
|
response = test_client.get("/download-pdf")
|
|
|
|
assert response.status_code == 404
|
|
assert b"No analysis found" in response.content
|
|
# Verify response can be read without AttributeError
|
|
assert isinstance(response.content, bytes)
|
|
assert len(response.content) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_download_pdf_without_result_returns_404(
|
|
self, mock_session_id: str
|
|
):
|
|
"""
|
|
Regression test: PDF download with session but no result should return 404.
|
|
|
|
Bug: AttributeError: 'int' object has no attribute 'encode'
|
|
Root Cause: StreamingResponse used with raw bytes instead of Response
|
|
Fix: Use Response for error paths, not StreamingResponse
|
|
|
|
This tests the second error path (session exists but result is None).
|
|
This is the exact error scenario from the production bug.
|
|
"""
|
|
from backend.site.router import download_pdf
|
|
|
|
# Create mock request with session but no result
|
|
mock_request = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_session.get = MagicMock(return_value=mock_session_id)
|
|
mock_request.session = mock_session
|
|
|
|
# Clear result_store to simulate "analysis not complete"
|
|
result_store.clear()
|
|
|
|
# Call the endpoint handler directly
|
|
response = await download_pdf(mock_request)
|
|
|
|
# Should hit the "result is None" error path
|
|
assert response.status_code == 404
|
|
assert b"Analysis not complete" in response.body
|
|
# Verify response can be read without AttributeError
|
|
assert isinstance(response.body, bytes)
|
|
assert len(response.body) > 0
|
|
|
|
def test_download_pdf_error_paths_use_response_not_streaming(
|
|
self, test_client: TestClient, mock_session_id: str
|
|
):
|
|
"""
|
|
Regression test: Verify error paths return Response, not StreamingResponse.
|
|
|
|
This prevents the AttributeError when iterating over bytes in StreamingResponse.
|
|
Response handles bytes directly, StreamingResponse requires an iterator.
|
|
"""
|
|
|
|
# Test path 1: No session ID
|
|
response1 = test_client.get("/download-pdf")
|
|
assert response1.status_code == 404
|
|
# Response should be directly readable (not streamed)
|
|
assert isinstance(response1.content, bytes)
|
|
|
|
# Test path 2: Session ID but no result
|
|
with test_client:
|
|
test_client.cookies.set("analysis_id", mock_session_id)
|
|
response2 = test_client.get("/download-pdf")
|
|
assert response2.status_code == 404
|
|
# Response should be directly readable (not streamed)
|
|
assert isinstance(response2.content, bytes)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_download_pdf_returns_pdf_file(
|
|
self, mock_session_id: str, sample_swot_analysis
|
|
):
|
|
"""
|
|
Test PDF download returns valid PDF file.
|
|
|
|
Regression: Ensure StreamingResponse works with BytesIO iterator.
|
|
Bug: AttributeError: 'int' object has no attribute 'encode'
|
|
Fix: Use iterfile() generator for BytesIO chunks
|
|
"""
|
|
from backend.site.router import download_pdf
|
|
|
|
# Create mock request with session
|
|
mock_request = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_session.get = MagicMock(return_value=mock_session_id)
|
|
mock_request.session = mock_session
|
|
|
|
# Populate result_store with completed analysis
|
|
result_store[mock_session_id] = sample_swot_analysis
|
|
|
|
# Call the endpoint handler directly
|
|
response = await download_pdf(mock_request)
|
|
|
|
assert response.status_code == 200
|
|
assert response.headers["content-type"] == "application/pdf"
|
|
assert "attachment" in response.headers["content-disposition"]
|
|
# New filename format: swot-{company}-{date}.pdf
|
|
assert "swot-" in response.headers["content-disposition"]
|
|
assert ".pdf" in response.headers["content-disposition"]
|
|
|
|
# Verify it's a valid PDF by reading the stream
|
|
body_content = b""
|
|
async for chunk in response.body_iterator:
|
|
body_content += chunk
|
|
|
|
assert body_content.startswith(b"%PDF")
|
|
assert len(body_content) > 1000 # PDF should have substantial content
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_download_pdf_uses_cache(
|
|
self, mock_session_id: str, sample_swot_analysis
|
|
):
|
|
"""Test that PDF download uses cache when available"""
|
|
from backend.site.router import download_pdf
|
|
|
|
# Create mock request
|
|
mock_request = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_session.get = MagicMock(return_value=mock_session_id)
|
|
mock_request.session = mock_session
|
|
|
|
# Populate result_store
|
|
result_store[mock_session_id] = sample_swot_analysis
|
|
|
|
# First request - generates PDF and caches
|
|
response1 = await download_pdf(mock_request)
|
|
assert response1.status_code == 200
|
|
|
|
# Read first response body
|
|
body1 = b""
|
|
async for chunk in response1.body_iterator:
|
|
body1 += chunk
|
|
|
|
# Second request - should use cache
|
|
response2 = await download_pdf(mock_request)
|
|
assert response2.status_code == 200
|
|
|
|
# Read second response body
|
|
body2 = b""
|
|
async for chunk in response2.body_iterator:
|
|
body2 += chunk
|
|
|
|
# Both should return identical content
|
|
assert body1 == body2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_download_pdf_filename_format(
|
|
self, mock_session_id: str, sample_swot_analysis
|
|
):
|
|
"""Test PDF filename follows expected format: swot-{company}-{date}.pdf"""
|
|
from datetime import datetime
|
|
|
|
from backend.site.router import download_pdf
|
|
|
|
# Create mock request
|
|
mock_request = MagicMock()
|
|
mock_session = MagicMock()
|
|
mock_session.get = MagicMock(return_value=mock_session_id)
|
|
mock_request.session = mock_session
|
|
|
|
# Populate result_store
|
|
result_store[mock_session_id] = sample_swot_analysis
|
|
|
|
response = await download_pdf(mock_request)
|
|
|
|
assert response.status_code == 200
|
|
disposition = response.headers["content-disposition"]
|
|
|
|
# New format: swot-{primary_entity}-vs-{comparison[0]}-{date}.pdf
|
|
# Sample has primary_entity="Google", comparison_entities=["Microsoft", "Amazon"]
|
|
assert "swot-Google-vs-Microsoft" in disposition
|
|
assert "plus1" in disposition # +1 more comparison (Amazon)
|
|
assert datetime.now().strftime("%Y-%m-%d") in disposition
|
|
assert ".pdf" in disposition
|