"""Find anchors, links, bookmarks and inputs in documents.""" import math from .formatting_structure import boxes from .layout.percent import percentage from .matrix import Matrix def rectangle_aabb(matrix, pos_x, pos_y, width, height): """Apply a transformation matrix to an axis-aligned rectangle. Return its axis-aligned bounding box as ``(x1, y1, x2, y2)``. """ if not matrix: return pos_x, pos_y, pos_x + width, pos_y + height transform_point = matrix.transform_point x1, y1 = transform_point(pos_x, pos_y) x2, y2 = transform_point(pos_x + width, pos_y) x3, y3 = transform_point(pos_x, pos_y + height) x4, y4 = transform_point(pos_x + width, pos_y + height) box_x1 = min(x1, x2, x3, x4) box_y1 = min(y1, y2, y3, y4) box_x2 = max(x1, x2, x3, x4) box_y2 = max(y1, y2, y3, y4) return box_x1, box_y1, box_x2, box_y2 def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None): """Gather anchors and other data related to specific positions in PDF. Currently finds anchors, links, bookmarks and inputs. """ # Get box transformation matrix. # "Transforms apply to block-level and atomic inline-level elements, # but do not apply to elements which may be split into # multiple inline-level boxes." # https://www.w3.org/TR/css-transforms-1/#introduction if box.style['transform'] and not isinstance(box, boxes.InlineBox): border_width = box.border_width() border_height = box.border_height() origin_x, origin_y = box.style['transform_origin'] offset_x = percentage(origin_x, border_width) offset_y = percentage(origin_y, border_height) origin_x = box.border_box_x() + offset_x origin_y = box.border_box_y() + offset_y matrix = Matrix(e=origin_x, f=origin_y) for name, args in box.style['transform']: a, b, c, d, e, f = 1, 0, 0, 1, 0, 0 if name == 'scale': a, d = args elif name == 'rotate': a = d = math.cos(args) b = math.sin(args) c = -b elif name == 'translate': e = percentage(args[0], border_width) f = percentage(args[1], border_height) elif name == 'skew': b, c = math.tan(args[1]), math.tan(args[0]) else: assert name == 'matrix' a, b, c, d, e, f = args matrix = Matrix(a, b, c, d, e, f) @ matrix box.transformation_matrix = ( Matrix(e=-origin_x, f=-origin_y) @ matrix) if parent_matrix: matrix = box.transformation_matrix @ parent_matrix else: matrix = box.transformation_matrix else: matrix = parent_matrix bookmark_label = box.bookmark_label if box.style['bookmark_level'] == 'none': bookmark_level = None else: bookmark_level = box.style['bookmark_level'] state = box.style['bookmark_state'] link = box.style['link'] anchor_name = box.style['anchor'] has_bookmark = bookmark_label and bookmark_level # 'link' is inherited but redundant on text boxes has_link = link and not isinstance(box, (boxes.TextBox, boxes.LineBox)) # In case of duplicate IDs, only the first is an anchor. has_anchor = anchor_name and anchor_name not in anchors is_input = box.is_input() if has_bookmark or has_link or has_anchor or is_input: if is_input: pos_x, pos_y = box.content_box_x(), box.content_box_y() width, height = box.width, box.height else: pos_x, pos_y, width, height = box.hit_area() if has_link or is_input: rectangle = rectangle_aabb(matrix, pos_x, pos_y, width, height) if has_link: token_type, link = link assert token_type == 'url' link_type, target = link assert isinstance(target, str) if link_type == 'external' and box.is_attachment(): link_type = 'attachment' links.append((link_type, target, rectangle, box)) if is_input: inputs.append((box.element, box.style, rectangle)) if matrix and (has_bookmark or has_anchor): pos_x, pos_y = matrix.transform_point(pos_x, pos_y) if has_bookmark: bookmark = (bookmark_level, bookmark_label, (pos_x, pos_y), state) bookmarks.append(bookmark) if has_anchor: anchors[anchor_name] = pos_x, pos_y for child in box.all_children(): gather_anchors(child, anchors, links, bookmarks, inputs, matrix) def make_page_bookmark_tree(page, skipped_levels, last_by_depth, previous_level, page_number, matrix): """Make a tree of all bookmarks in a given page.""" for level, label, (point_x, point_y), state in page.bookmarks: if level > previous_level: # Example: if the previous bookmark is a