#!chapter_005/src/snippet_006.py
import typing
from borb.pdf import Document
from borb.pdf import PDF
from borb.toolkit import RegularExpressionTextExtraction
def main():
# read the Document
# fmt: off
doc: typing.Optional[Document] = None
l: RegularExpressionTextExtraction = RegularExpressionTextExtraction("[lL]orem .* [dD]olor")
with open("output.pdf", "rb") as in_file_handle:
doc = PDF.loads(in_file_handle, [l])
# fmt: on
# check whether we have read a Document
assert doc is not None
# print matching groups
for i, m in enumerate(l.get_matches()[0]):
print("%d %s" % (i, m.group(0)))
for r in m.get_bounding_boxes():
print(
"\t%f %f %f %f" % (r.get_x(), r.get_y(), r.get_width(), r.get_height())
)
if __name__ == "__main__":
main()
#!chapter_006/src/snippet_005.py
from decimal import Decimal
from borb.pdf.canvas.layout.annotation.rubber_stamp_annotation import (
RubberStampAnnotation,
RubberStampAnnotationIconType,
)
from borb.pdf.canvas.geometry.rectangle import Rectangle
from borb.pdf import SingleColumnLayout
from borb.pdf import PageLayout
from borb.pdf import Paragraph
from borb.pdf import Document
from borb.pdf import Page
from borb.pdf.page.page_size import PageSize
from borb.pdf import PDF
def main():
doc: Document = Document()
page: Page = Page()
doc.add_page(page)
layout: PageLayout = SingleColumnLayout(page)
layout.add(
Paragraph(
"""
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
"""
)
)
# This is where the stamp is added
page_width: Decimal = PageSize.A4_PORTRAIT.value[0]
page_height: Decimal = PageSize.A4_PORTRAIT.value[1]
s: Decimal = Decimal(100)
page.add_annotation(
RubberStampAnnotation(
Rectangle(
page_width / Decimal(2) - s / Decimal(2),
page_height / Decimal(2) - s / Decimal(2),
s,
s,
),
name=RubberStampAnnotationIconType.CONFIDENTIAL,
)
)
# store
with open("output.pdf", "wb") as out_file_handle:
PDF.dumps(out_file_handle, doc)
if __name__ == "__main__":
main()
1条答案
按热度按时间uqxowvwt1#
**免责声明:**我是
borb
的作者,该库在本答案中使用。就我对你的问题的理解,你想在页面上找到某个单词,然后在上面加一个戳。
我们把它分成两部分:
查找单词在页面上的位置
在这个代码片段中,我们使用
RegularExpressionTextExtraction
来处理Page
事件(渲染文本、图像等),这个类充当EventListener
,并跟踪哪个文本(正在渲染)与给定的正则表达式匹配。然后我们可以打印该文本及其位置。
在页面上的给定位置添加图章
在下一个片段中,我们将:
当然,您可以修改此代码片段以仅添加图章,并从现有PDF工作(而不是创建一个)。
结果应该是这样的:
为了更改图章的外观,我建议您查看文档。