From 3d11ac1bb9c92cfcd941ae18c7fe8ac15542417f Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 30 Sep 2024 15:49:49 +0200 Subject: [PATCH] Minimum PDF/A support (#81) --- Cargo.lock | 9 ++-- Cargo.toml | 4 +- cli/src/main.rs | 1 + src/lib.rs | 23 ++++++++-- src/render/clip_path.rs | 6 ++- src/render/group.rs | 8 ++-- src/render/image.rs | 4 +- src/render/mask.rs | 4 +- src/render/mod.rs | 4 +- src/render/path.rs | 16 ++++--- src/render/text.rs | 97 +++++++++++++++++++++++++++++------------ src/util/context.rs | 6 +-- src/util/helper.rs | 16 +++++++ 13 files changed, 140 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c16249e2..0b8dcdd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1069,9 +1069,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pdf-writer" -version = "0.10.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af6a7882fda7808481d43c51cadfc3ec934c6af72612a1fe6985ce329a2f0469" +checksum = "be17f48d7fbbd22c6efedb58af5d409aa578e407f40b29a0bcb4e66ed84c5c98" dependencies = [ "bitflags 2.6.0", "itoa", @@ -1546,8 +1546,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "subsetter" -version = "0.11.0" -source = "git+https://github.com/typst/subsetter?rev=4e0058b#4e0058b4b9a0948a5f79894111948d95e59ba350" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74f98178f34057d4d4de93d68104007c6dea4dfac930204a69ab4622daefa648" [[package]] name = "svg2pdf" diff --git a/Cargo.toml b/Cargo.toml index e1102190..8c9b03e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,13 +22,13 @@ image = { version = "0.25", default-features = false, features = ["jpeg", "png", miniz_oxide = "0.8" once_cell = "1.18" oxipng = { version = "9", default-features = false, features = ["filetime", "parallel", "zopfli"] } -pdf-writer = "0.10" +pdf-writer = "0.12" pdfium-render = "=0.8.20" termcolor = "1.2" usvg = { version = "0.43", default-features = false } tiny-skia = "0.11.4" resvg = { version = "0.43", default-features = false } -subsetter = { git = "https://github.com/typst/subsetter", rev = "4e0058b" } +subsetter = "0.2" ttf-parser = { version = "0.24.1" } siphasher = { version = "1.0.1"} diff --git a/cli/src/main.rs b/cli/src/main.rs index 6e5b9b07..cbb0fed5 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -27,6 +27,7 @@ fn run() -> Result<(), String> { compress: true, embed_text: !args.text_to_paths, raster_scale: args.raster_scale, + pdfa: false, }; let page_options = PageOptions { dpi: args.dpi }; diff --git a/src/lib.rs b/src/lib.rs index 701e0715..99086cda 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,7 @@ use usvg::{Size, Transform, Tree}; use crate::render::{tree_to_stream, tree_to_xobject}; use crate::util::context::Context; -use crate::util::helper::{deflate, RectExt, TransformExt}; +use crate::util::helper::{deflate, ContentExt, RectExt, TransformExt}; use crate::util::resources::ResourceContainer; // The ICC profiles. @@ -96,6 +96,11 @@ impl Default for PageOptions { pub enum ConversionError { /// The SVG image contains an unrecognized type of image. InvalidImage, + /// Text shaping resulted in a .notdef glyph. Can only occur if PDF/A + /// processing is enabled. + MissingGlyphs, + /// Converting the SVG would require too much nesting depth. + TooMuchNesting, /// An unknown error occurred during the conversion. This could indicate a bug in the /// svg2pdf. UnknownError, @@ -111,6 +116,8 @@ impl Display for ConversionError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::InvalidImage => f.write_str("An unknown type of image appears in the SVG."), + Self::MissingGlyphs => f.write_str("A piece of text could not be displayed with any font."), + Self::TooMuchNesting => f.write_str("The SVG's nesting depth is too high."), Self::UnknownError => f.write_str("An unknown error occurred during the conversion. This could indicate a bug in svg2pdf"), #[cfg(feature = "text")] Self::SubsetError(_) => f.write_str("An error occurred while subsetting a font."), @@ -148,6 +155,13 @@ pub struct ConversionOptions { /// /// _Default:_ `true`. pub embed_text: bool, + + /// Whether to write chunks in PDF/A-2b compliant mode. + /// + /// **Note:** This currently only ensures that `to_chunk` does not generate + /// anything that is forbidden by PDF/A. It does _not_ turn the + /// free-standing PDF generated by `to_pdf` into a valid PDF/A. + pub pdfa: bool, } impl Default for ConversionOptions { @@ -156,6 +170,7 @@ impl Default for ConversionOptions { compress: true, raster_scale: 1.5, embed_text: true, + pdfa: false, } } } @@ -190,7 +205,7 @@ pub fn to_pdf( conversion_options: ConversionOptions, page_options: PageOptions, ) -> Result> { - let mut ctx = Context::new(tree, conversion_options); + let mut ctx = Context::new(tree, conversion_options)?; let mut pdf = Pdf::new(); let dpi_ratio = 72.0 / page_options.dpi; @@ -210,7 +225,7 @@ pub fn to_pdf( // Generate main content let mut rc = ResourceContainer::new(); let mut content = Content::new(); - content.save_state(); + content.save_state_checked()?; content.transform(dpi_transform.to_pdf_transform()); tree_to_stream(tree, &mut pdf, &mut content, &mut ctx, &mut rc)?; content.restore_state(); @@ -347,7 +362,7 @@ pub fn to_chunk( ) -> Result<(Chunk, Ref)> { let mut chunk = Chunk::new(); - let mut ctx = Context::new(tree, conversion_options); + let mut ctx = Context::new(tree, conversion_options)?; let x_ref = tree_to_xobject(tree, &mut chunk, &mut ctx)?; ctx.write_global_objects(&mut chunk)?; Ok((chunk, x_ref)) diff --git a/src/render/clip_path.rs b/src/render/clip_path.rs index bf3a36f7..58292577 100644 --- a/src/render/clip_path.rs +++ b/src/render/clip_path.rs @@ -6,7 +6,9 @@ use usvg::{ClipPath, FillRule, Group, Node, Transform}; use super::group; use super::path::draw_path; use crate::util::context::Context; -use crate::util::helper::{bbox_to_non_zero_rect, NameExt, RectExt, TransformExt}; +use crate::util::helper::{ + bbox_to_non_zero_rect, ContentExt, NameExt, RectExt, TransformExt, +}; use crate::util::resources::ResourceContainer; use crate::Result; @@ -181,7 +183,7 @@ fn create_complex_clip_path( let x_ref = ctx.alloc_ref(); let mut content = Content::new(); - content.save_state(); + content.save_state_checked()?; if let Some(clip_path) = clip_path.clip_path() { render(parent, clip_path, chunk, &mut content, ctx, &mut rc)?; diff --git a/src/render/group.rs b/src/render/group.rs index e27a46d5..30ebc83e 100644 --- a/src/render/group.rs +++ b/src/render/group.rs @@ -7,7 +7,9 @@ use usvg::{Opacity, Transform}; use super::filter; use super::{clip_path, mask, Render}; use crate::util::context::Context; -use crate::util::helper::{BlendModeExt, GroupExt, NameExt, RectExt, TransformExt}; +use crate::util::helper::{ + BlendModeExt, ContentExt, GroupExt, NameExt, RectExt, TransformExt, +}; use crate::util::resources::ResourceContainer; use crate::Result; @@ -36,7 +38,7 @@ pub fn render( let initial_opacity = initial_opacity.unwrap_or(Opacity::ONE); if group.is_isolated() || initial_opacity.get() != 1.0 { - content.save_state(); + content.save_state_checked()?; let gs_ref = ctx.alloc_ref(); let mut gs = chunk.ext_graphics(gs_ref); gs.non_stroking_alpha(group.opacity().mul(initial_opacity).get()) @@ -126,7 +128,7 @@ fn create_to_stream( accumulated_transform: Transform, rc: &mut ResourceContainer, ) -> Result<()> { - content.save_state(); + content.save_state_checked()?; content.transform(group.transform().to_pdf_transform()); let accumulated_transform = accumulated_transform.pre_concat(group.transform()); diff --git a/src/render/image.rs b/src/render/image.rs index 9a7ee46b..d539226c 100644 --- a/src/render/image.rs +++ b/src/render/image.rs @@ -8,7 +8,7 @@ use usvg::{ImageKind, Rect, Size, Transform, Tree}; use crate::render::tree_to_xobject; use crate::util::context::Context; -use crate::util::helper::{NameExt, TransformExt}; +use crate::util::helper::{ContentExt, NameExt, TransformExt}; use crate::util::resources::ResourceContainer; use crate::Result; @@ -59,7 +59,7 @@ pub fn render( Rect::from_xywh(0.0, 0.0, image_size.width(), image_size.height()).unwrap(), ); - content.save_state(); + content.save_state_checked()?; // Account for the x/y of the viewbox. content.transform( diff --git a/src/render/mask.rs b/src/render/mask.rs index 058ac7c0..252b2b42 100644 --- a/src/render/mask.rs +++ b/src/render/mask.rs @@ -3,7 +3,7 @@ use usvg::{Group, Mask, Transform}; use super::group; use crate::util::context::Context; -use crate::util::helper::{clip_to_rect, MaskTypeExt, NameExt, RectExt}; +use crate::util::helper::{clip_to_rect, ContentExt, MaskTypeExt, NameExt, RectExt}; use crate::util::resources::ResourceContainer; use crate::Result; @@ -34,7 +34,7 @@ pub fn create( let mut rc = ResourceContainer::new(); let mut content = Content::new(); - content.save_state(); + content.save_state_checked()?; if let Some(mask) = mask.mask() { render(parent, mask, chunk, &mut content, ctx, &mut rc)?; diff --git a/src/render/mod.rs b/src/render/mod.rs index 661e5c79..91ade5bd 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -2,7 +2,7 @@ use pdf_writer::{Chunk, Content, Filter, Finish, Ref}; use usvg::{Node, Transform, Tree}; use crate::util::context::Context; -use crate::util::helper::{RectExt, TransformExt}; +use crate::util::helper::{ContentExt, RectExt, TransformExt}; use crate::util::resources::ResourceContainer; use crate::Result; @@ -28,7 +28,7 @@ pub fn tree_to_stream( ctx: &mut Context, rc: &mut ResourceContainer, ) -> Result<()> { - content.save_state(); + content.save_state_checked()?; // From PDF coordinate system to SVG coordinate system let initial_transform = diff --git a/src/render/path.rs b/src/render/path.rs index 6e6f39b7..ffec45e1 100644 --- a/src/render/path.rs +++ b/src/render/path.rs @@ -8,7 +8,7 @@ use usvg::{Stroke, Transform}; use super::{gradient, pattern}; use crate::util::context::Context; -use crate::util::helper::{ColorExt, LineCapExt, LineJoinExt, NameExt}; +use crate::util::helper::{ColorExt, ContentExt, LineCapExt, LineJoinExt, NameExt}; use crate::util::resources::ResourceContainer; use crate::Result; @@ -104,6 +104,7 @@ pub(crate) fn stroke_path( let operation = |content: &mut Content, stroke: &Stroke| { draw_path(path.data().segments(), content); finish_path(Some(stroke), None, content); + Ok(()) }; if let Some(path_stroke) = path.stroke() { @@ -131,13 +132,13 @@ pub(crate) fn stroke( content: &mut Content, ctx: &mut Context, rc: &mut ResourceContainer, - operation: impl Fn(&mut Content, &Stroke), + operation: impl Fn(&mut Content, &Stroke) -> Result<()>, accumulated_transform: Transform, bbox: Rect, ) -> Result<()> { let paint = &stroke.paint(); - content.save_state(); + content.save_state_checked()?; match paint { Paint::Color(c) => { @@ -206,7 +207,7 @@ pub(crate) fn stroke( content.set_dash_pattern(vec![], 0.0); } - operation(content, stroke); + operation(content, stroke)?; content.restore_state(); @@ -229,6 +230,7 @@ pub(crate) fn fill_path( let operation = |content: &mut Content, fill: &Fill| { draw_path(path.data().segments(), content); finish_path(None, Some(fill), content); + Ok(()) }; if let Some(path_fill) = path.fill() { @@ -256,13 +258,13 @@ pub(crate) fn fill( content: &mut Content, ctx: &mut Context, rc: &mut ResourceContainer, - operation: impl Fn(&mut Content, &Fill), + operation: impl Fn(&mut Content, &Fill) -> Result<()>, accumulated_transform: Transform, bbox: Rect, ) -> Result<()> { let paint = &fill.paint(); - content.save_state(); + content.save_state_checked()?; match paint { Paint::Color(c) => { @@ -307,7 +309,7 @@ pub(crate) fn fill( } } - operation(content, fill); + operation(content, fill)?; content.restore_state(); Ok(()) diff --git a/src/render/text.rs b/src/render/text.rs index 45c79263..d9fe6577 100644 --- a/src/render/text.rs +++ b/src/render/text.rs @@ -1,13 +1,14 @@ use crate::render::path; use crate::util::allocate::RefAllocator; use crate::util::context::Context; -use crate::util::helper::{deflate, TransformExt}; +use crate::util::helper::{deflate, ContentExt, TransformExt}; use crate::util::resources::ResourceContainer; -use crate::ConversionError::{InvalidFont, SubsetError, UnknownError}; +use crate::ConversionError::{self, InvalidFont, SubsetError}; use crate::Result; use pdf_writer::types::{ CidFontType, FontFlags, SystemInfo, TextRenderingMode, UnicodeCmap, }; +use pdf_writer::writers::WMode; use pdf_writer::{Chunk, Content, Filter, Finish, Name, Ref, Str}; use siphasher::sip128::{Hasher128, SipHasher13}; use std::collections::{BTreeMap, HashMap}; @@ -19,6 +20,10 @@ use usvg::{Fill, Group, ImageKind, Node, PaintOrder, Stroke, Transform}; const CFF: Tag = Tag::from_bytes(b"CFF "); const CFF2: Tag = Tag::from_bytes(b"CFF2"); + +const SUBSET_TAG_LEN: usize = 6; +const IDENTITY_H: &str = "Identity-H"; + const CMAP_NAME: Name = Name(b"Custom"); const SYSTEM_INFO: SystemInfo = SystemInfo { registry: Str(b"Adobe"), @@ -57,18 +62,14 @@ pub fn write_font( .or_else(|| ttf.raw_face().table(CFF2)) .is_some(); - let postscript_name = find_name(&ttf, name_id::POST_SCRIPT_NAME) - .unwrap_or_else(|| "unknown".to_string()); - - let subset_tag = subset_tag(glyph_set)?; - let base_font = format!("{subset_tag}+{postscript_name}"); + let base_font = base_font_name(&ttf, glyph_set); let base_font_type0 = - if is_cff { format!("{base_font}-Identity-H") } else { base_font.clone() }; + if is_cff { format!("{base_font}-{IDENTITY_H}") } else { base_font.clone() }; chunk .type0_font(type0_ref) .base_font(Name(base_font_type0.as_bytes())) - .encoding_predefined(Name(b"Identity-H")) + .encoding_predefined(Name(IDENTITY_H.as_bytes())) .descendant_font(cid_ref) .to_unicode(cmap_ref); @@ -106,7 +107,11 @@ pub fn write_font( cid.finish(); let mut flags = FontFlags::empty(); - flags.set(FontFlags::SERIF, postscript_name.contains("Serif")); + flags.set( + FontFlags::SERIF, + find_name(&ttf, name_id::POST_SCRIPT_NAME) + .is_some_and(|name| name.contains("Serif")), + ); flags.set(FontFlags::FIXED_PITCH, ttf.is_monospaced()); flags.set(FontFlags::ITALIC, ttf.is_italic()); flags.insert(FontFlags::SYMBOLIC); @@ -154,7 +159,7 @@ pub fn write_font( font_descriptor.finish(); let cmap = create_cmap(glyph_set, glyph_remapper).ok_or(SubsetError(font.id))?; - chunk.cmap(cmap_ref, &cmap.finish()); + chunk.cmap(cmap_ref, &cmap.finish()).writing_mode(WMode::Horizontal); // Subset and write the font's bytes. let data = subset_font(&font.face_data, font.face_index, glyph_remapper, font.id)?; @@ -234,7 +239,7 @@ pub fn render( continue; } - let operation = |content: &mut Content| { + let operation = |content: &mut Content| -> Result<()> { for glyph in &span.positioned_glyphs { let Some(font) = fonts.get(&glyph.font).and_then(|f| f.as_ref()) else { continue; @@ -243,6 +248,9 @@ pub fn render( let name = font_names.get(&font.reference).unwrap(); // TODO: Remove unwraps and switch to error-based handling. + // NOTE(laurmaedje): If it can't happen, I think a panic is + // better. There is no way to handle it as a consumer of + // svg2pdf. let cid = font.glyph_remapper.get(glyph.id.0).unwrap(); let ts = glyph .outline_transform() @@ -251,7 +259,7 @@ pub fn render( // we want to leverage the native PDF font size feature instead, so we downscale // it to a font size of 1. .pre_scale(1.0 / span.font_size.get(), 1.0 / span.font_size.get()); - content.save_state(); + content.save_state_checked()?; content.begin_text(); content.set_text_matrix(ts.to_pdf_transform()); content.set_font(Name(name.as_bytes()), span.font_size.get()); @@ -259,16 +267,18 @@ pub fn render( content.end_text(); content.restore_state(); } + + Ok(()) }; - let stroke_operation = |content: &mut Content, _: &Stroke| { + let stroke_operation = |content: &mut Content, _: &Stroke| -> Result<()> { content.set_text_rendering_mode(TextRenderingMode::Stroke); - operation(content); + operation(content) }; - let fill_operation = |content: &mut Content, _: &Fill| { + let fill_operation = |content: &mut Content, _: &Fill| -> Result<()> { content.set_text_rendering_mode(TextRenderingMode::Fill); - operation(content); + operation(content) }; if let Some(overline) = &span.overline { @@ -279,7 +289,7 @@ pub fn render( path::render(underline, chunk, content, ctx, rc, accumulated_transform)?; } - content.save_state(); + content.save_state_checked()?; match (span.fill.as_ref(), span.stroke.as_ref()) { (Some(fill), Some(stroke)) => match span.paint_order { PaintOrder::FillAndStroke => { @@ -353,7 +363,7 @@ pub fn render( } (None, None) => { content.set_text_rendering_mode(TextRenderingMode::Invisible); - operation(content); + operation(content)?; } }; @@ -367,17 +377,36 @@ pub fn render( Ok(()) } +/// Creates the base font name for a font with a specific glyph subset. +/// Consists of a subset tag and the PostScript name of the font. +/// +/// Returns a string of length maximum 116, so that even with `-Identity-H` +/// added it does not exceed the maximum PDF/A name length of 127. +fn base_font_name(ttf: &Face, glyphs: &T) -> String { + const MAX_LEN: usize = 127 - REST_LEN; + const REST_LEN: usize = SUBSET_TAG_LEN + 1 + 1 + IDENTITY_H.len(); + + let postscript_name = find_name(ttf, name_id::POST_SCRIPT_NAME); + let name = postscript_name.as_deref().unwrap_or("unknown"); + let trimmed = &name[..name.len().min(MAX_LEN)]; + + // Hash the full name (we might have trimmed) and the glyphs to produce + // a fairly unique subset tag. + let subset_tag = subset_tag(&(name, glyphs)); + + format!("{subset_tag}+{trimmed}") +} + /// Produce a unique 6 letter tag for a glyph set. -fn subset_tag(glyphs: &mut BTreeMap) -> Result { - const LEN: usize = 6; +fn subset_tag(glyphs: &T) -> String { const BASE: u128 = 26; let mut hash = hash128(&glyphs); - let mut letter = [b'A'; LEN]; + let mut letter = [b'A'; SUBSET_TAG_LEN]; for l in letter.iter_mut() { *l = b'A' + (hash % BASE) as u8; hash /= BASE; } - Ok(std::str::from_utf8(&letter).map_err(|_| UnknownError)?.to_string()) + std::str::from_utf8(&letter).unwrap().into() } /// Calculate a 128-bit siphash of a value. @@ -479,7 +508,11 @@ pub struct Font { pub face_index: u32, } -pub fn fill_fonts(group: &Group, ctx: &mut Context, fontdb: &fontdb::Database) { +pub fn fill_fonts( + group: &Group, + ctx: &mut Context, + fontdb: &fontdb::Database, +) -> Result<()> { for child in group.children() { match child { Node::Text(t) => { @@ -518,18 +551,28 @@ pub fn fill_fonts(group: &Group, ctx: &mut Context, fontdb: &fontdb::Database) { font.glyph_set.insert(g.id.0, g.text.clone()); font.glyph_remapper.remap(g.id.0); } + + if ctx.options.pdfa && g.id.0 == 0 { + return Err(ConversionError::MissingGlyphs); + } } } } - Node::Group(group) => fill_fonts(group, ctx, fontdb), + Node::Group(group) => fill_fonts(group, ctx, fontdb)?, Node::Image(image) => { if let ImageKind::SVG(svg) = image.kind() { - fill_fonts(svg.root(), ctx, fontdb); + fill_fonts(svg.root(), ctx, fontdb)?; } } _ => {} } - child.subroots(|subroot| fill_fonts(subroot, ctx, fontdb)); + let mut result = Ok(()); + child.subroots(|subroot| { + result = result.and(fill_fonts(subroot, ctx, fontdb)); + }); + result?; } + + Ok(()) } diff --git a/src/util/context.rs b/src/util/context.rs index db910384..2cf330f9 100644 --- a/src/util/context.rs +++ b/src/util/context.rs @@ -30,7 +30,7 @@ impl Context { pub fn new( #[allow(unused_variables)] tree: &Tree, options: ConversionOptions, - ) -> Self { + ) -> Result { #[allow(unused_mut)] let mut ctx = Self { ref_allocator: RefAllocator::new(), @@ -43,10 +43,10 @@ impl Context { #[cfg(feature = "text")] if options.embed_text { - text::fill_fonts(tree.root(), &mut ctx, tree.fontdb().as_ref()); + text::fill_fonts(tree.root(), &mut ctx, tree.fontdb().as_ref())?; } - ctx + Ok(ctx) } /// Allocate a new reference. diff --git a/src/util/helper.rs b/src/util/helper.rs index a5ffcf87..b28d053e 100644 --- a/src/util/helper.rs +++ b/src/util/helper.rs @@ -3,6 +3,7 @@ use pdf_writer::{Content, Name, Rect}; use usvg::{LineCap, LineJoin, NonZeroRect, Transform}; use crate::render::gradient::Stop; +use crate::{ConversionError, Result}; /// Extension trait to convert [Colors](usvg::Color) into PDF colors. pub trait ColorExt { @@ -48,6 +49,21 @@ impl RectExt for NonZeroRect { } } +/// Extension trait for [Content]. +pub trait ContentExt { + fn save_state_checked(&mut self) -> Result<()>; +} + +impl ContentExt for Content { + fn save_state_checked(&mut self) -> Result<()> { + self.save_state(); + if self.state_nesting_depth() > 28 { + return Err(ConversionError::TooMuchNesting); + } + Ok(()) + } +} + /// Extension trait to turn a [`usvg` BlendMode](usvg::BlendMode) into a [PDF Blendmode](BlendMode) pub trait BlendModeExt { fn to_pdf_blend_mode(&self) -> BlendMode;