nex_docus/backend/venv312/lib/python3.12/site-packages/weasyprint/pdf/pdfa.py

95 lines
3.5 KiB
Python

"""PDF/A generation."""
try:
# Available in Python 3.9+
from importlib.resources import files
except ImportError:
# Deprecated in Python 3.11+
from importlib.resources import read_binary
else:
def read_binary(package, resource):
return (files(package) / resource).read_bytes()
from functools import partial
import pydyf
from .metadata import add_metadata
def pdfa(pdf, metadata, document, page_streams, attachments, compress,
version):
"""Set metadata for PDF/A documents."""
# Add ICC profile.
profile = pydyf.Stream(
[read_binary(__package__, 'sRGB2014.icc')],
pydyf.Dictionary({'N': 3, 'Alternate': '/DeviceRGB'}),
compress=compress)
pdf.add_object(profile)
pdf.catalog['OutputIntents'] = pydyf.Array([
pydyf.Dictionary({
'Type': '/OutputIntent',
'S': '/GTS_PDFA1',
'OutputConditionIdentifier': pydyf.String('sRGB IEC61966-2.1'),
'DestOutputProfile': profile.reference,
}),
])
# Handle attachments.
if version == 1:
# Remove embedded files dictionary.
if 'Names' in pdf.catalog and 'EmbeddedFiles' in pdf.catalog['Names']:
del pdf.catalog['Names']['EmbeddedFiles']
if version <= 2:
# Remove attachments.
for pdf_object in pdf.objects:
if not isinstance(pdf_object, dict):
continue
if pdf_object.get('Type') != '/Filespec':
continue
reference = int(pdf_object['EF']['F'].split()[0])
stream = pdf.objects[reference]
# Remove all attachments for version 1.
# Remove non-PDF attachments for version 2.
# TODO: check that PDFs are actually PDF/A-2+ files.
if version == 1 or stream.extra['Subtype'] != '/application#2fpdf':
del pdf_object['EF']
if version >= 3:
# Add AF for attachments.
relationships = {
f'<{attachment.md5}>': attachment.relationship
for attachment in attachments if attachment.md5}
pdf_attachments = []
if 'Names' in pdf.catalog and 'EmbeddedFiles' in pdf.catalog['Names']:
reference = int(pdf.catalog['Names']['EmbeddedFiles'].split()[0])
names = pdf.objects[reference]
for name in names['Names'][1::2]:
pdf_attachments.append(name)
for pdf_object in pdf.objects:
if not isinstance(pdf_object, dict):
continue
if pdf_object.get('Type') != '/Filespec':
continue
reference = int(pdf_object['EF']['F'].split()[0])
checksum = pdf.objects[reference].extra['Params']['CheckSum']
relationship = relationships.get(checksum, 'Unspecified')
pdf_object['AFRelationship'] = f'/{relationship}'
pdf_attachments.append(pdf_object.reference)
if pdf_attachments:
if 'AF' not in pdf.catalog:
pdf.catalog['AF'] = pydyf.Array()
pdf.catalog['AF'].extend(pdf_attachments)
# Print annotations.
for pdf_object in pdf.objects:
if isinstance(pdf_object, dict) and pdf_object.get('Type') == '/Annot':
pdf_object['F'] = 2 ** (3 - 1)
# Common PDF metadata stream.
add_metadata(pdf, metadata, 'a', version, 'B', compress)
VARIANTS = {
f'pdf/a-{i}b': (partial(pdfa, version=i), {'version': pdf_version})
for i, pdf_version in enumerate(('1.4', '1.7', '1.7', '2.0'), start=1)}