我想直接将matplotlib图表嵌入到ReportLab生成的PDF中-即不保存为PNG第一,然后嵌入到PDF的PNG(我想我会得到更好的质量输出)。有人知道ReportLab是否有matplotlib可流动性吗?谢谢
pzfprimi1#
下面是一个使用pdfrw的解决方案:
#!/usr/bin/env python# encoding: utf-8"""matplotlib_example.py An simple example of how to insert matplotlib generated figures into a ReportLab platypus document."""import matplotlibmatplotlib.use('PDF')import matplotlib.pyplot as pltimport cStringIOfrom pdfrw import PdfReaderfrom pdfrw.buildxobj import pagexobjfrom pdfrw.toreportlab import makerlfrom reportlab.platypus import Flowablefrom reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHTfrom reportlab.platypus import SimpleDocTemplate, Paragraph, Spacerfrom reportlab.lib.styles import getSampleStyleSheetfrom reportlab.rl_config import defaultPageSizefrom reportlab.lib.units import inchPAGE_HEIGHT=defaultPageSize[1]; PAGE_WIDTH=defaultPageSize[0]styles = getSampleStyleSheet()class PdfImage(Flowable): """PdfImage wraps the first page from a PDF file as a Flowablewhich can be included into a ReportLab Platypus document.Based on the vectorpdf extension in rst2pdf (http://code.google.com/p/rst2pdf/)""" def __init__(self, filename_or_object, width=None, height=None, kind='direct'): from reportlab.lib.units import inch # If using StringIO buffer, set pointer to begining if hasattr(filename_or_object, 'read'): filename_or_object.seek(0) page = PdfReader(filename_or_object, decompress=False).pages[0] self.xobj = pagexobj(page) self.imageWidth = width self.imageHeight = height x1, y1, x2, y2 = self.xobj.BBox self._w, self._h = x2 - x1, y2 - y1 if not self.imageWidth: self.imageWidth = self._w if not self.imageHeight: self.imageHeight = self._h self.__ratio = float(self.imageWidth)/self.imageHeight if kind in ['direct','absolute'] or width==None or height==None: self.drawWidth = width or self.imageWidth self.drawHeight = height or self.imageHeight elif kind in ['bound','proportional']: factor = min(float(width)/self._w,float(height)/self._h) self.drawWidth = self._w*factor self.drawHeight = self._h*factor def wrap(self, aW, aH): return self.drawWidth, self.drawHeight def drawOn(self, canv, x, y, _sW=0): if _sW > 0 and hasattr(self, 'hAlign'): a = self.hAlign if a in ('CENTER', 'CENTRE', TA_CENTER): x += 0.5*_sW elif a in ('RIGHT', TA_RIGHT): x += _sW elif a not in ('LEFT', TA_LEFT): raise ValueError("Bad hAlign value " + str(a)) xobj = self.xobj xobj_name = makerl(canv._doc, xobj) xscale = self.drawWidth/self._w yscale = self.drawHeight/self._h x -= xobj.BBox[0] * xscale y -= xobj.BBox[1] * yscale canv.saveState() canv.translate(x, y) canv.scale(xscale, yscale) canv.doForm(xobj_name) canv.restoreState()Title = "Hello world"pageinfo = "platypus example"def myFirstPage(canvas, doc): canvas.saveState() canvas.setFont('Times-Bold',16) canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT-108, Title) canvas.setFont('Times-Roman',9) canvas.drawString(inch, 0.75 * inch, "First Page / %s" % pageinfo) canvas.restoreState()def myLaterPages(canvas, doc): canvas.saveState() canvas.setFont('Times-Roman',9) canvas.drawString(inch, 0.75 * inch, "Page %d %s" % (doc.page, pageinfo)) canvas.restoreState()def go(): fig = plt.figure(figsize=(4, 3)) plt.plot([1,2,3,4]) plt.ylabel('some numbers') imgdata = cStringIO.StringIO() fig.savefig(imgdata,format='PDF') doc = SimpleDocTemplate("document.pdf") Story = [Spacer(1,2*inch)] style = styles["Normal"] for i in range(5): bogustext = ("This is Paragraph number %s. " % i) *20 p = Paragraph(bogustext, style) Story.append(p) Story.append(Spacer(1,0.2*inch)) pi = PdfImage(imgdata) Story.append(pi) Story.append(Spacer(1,0.2*inch)) doc.build(Story, onFirstPage=myFirstPage, onLaterPages=myLaterPages)if __name__ == '__main__': go()
#!/usr/bin/env python
# encoding: utf-8
"""matplotlib_example.py
An simple example of how to insert matplotlib generated figures
into a ReportLab platypus document.
"""
import matplotlib
matplotlib.use('PDF')
import matplotlib.pyplot as plt
import cStringIO
from pdfrw import PdfReader
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl
from reportlab.platypus import Flowable
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.rl_config import defaultPageSize
from reportlab.lib.units import inch
PAGE_HEIGHT=defaultPageSize[1]; PAGE_WIDTH=defaultPageSize[0]
styles = getSampleStyleSheet()
class PdfImage(Flowable):
"""PdfImage wraps the first page from a PDF file as a Flowable
which can be included into a ReportLab Platypus document.
Based on the vectorpdf extension in rst2pdf (http://code.google.com/p/rst2pdf/)"""
def __init__(self, filename_or_object, width=None, height=None, kind='direct'):
# If using StringIO buffer, set pointer to begining
if hasattr(filename_or_object, 'read'):
filename_or_object.seek(0)
page = PdfReader(filename_or_object, decompress=False).pages[0]
self.xobj = pagexobj(page)
self.imageWidth = width
self.imageHeight = height
x1, y1, x2, y2 = self.xobj.BBox
self._w, self._h = x2 - x1, y2 - y1
if not self.imageWidth:
self.imageWidth = self._w
if not self.imageHeight:
self.imageHeight = self._h
self.__ratio = float(self.imageWidth)/self.imageHeight
if kind in ['direct','absolute'] or width==None or height==None:
self.drawWidth = width or self.imageWidth
self.drawHeight = height or self.imageHeight
elif kind in ['bound','proportional']:
factor = min(float(width)/self._w,float(height)/self._h)
self.drawWidth = self._w*factor
self.drawHeight = self._h*factor
def wrap(self, aW, aH):
return self.drawWidth, self.drawHeight
def drawOn(self, canv, x, y, _sW=0):
if _sW > 0 and hasattr(self, 'hAlign'):
a = self.hAlign
if a in ('CENTER', 'CENTRE', TA_CENTER):
x += 0.5*_sW
elif a in ('RIGHT', TA_RIGHT):
x += _sW
elif a not in ('LEFT', TA_LEFT):
raise ValueError("Bad hAlign value " + str(a))
xobj = self.xobj
xobj_name = makerl(canv._doc, xobj)
xscale = self.drawWidth/self._w
yscale = self.drawHeight/self._h
x -= xobj.BBox[0] * xscale
y -= xobj.BBox[1] * yscale
canv.saveState()
canv.translate(x, y)
canv.scale(xscale, yscale)
canv.doForm(xobj_name)
canv.restoreState()
Title = "Hello world"
pageinfo = "platypus example"
def myFirstPage(canvas, doc):
canvas.saveState()
canvas.setFont('Times-Bold',16)
canvas.drawCentredString(PAGE_WIDTH/2.0, PAGE_HEIGHT-108, Title)
canvas.setFont('Times-Roman',9)
canvas.drawString(inch, 0.75 * inch, "First Page / %s" % pageinfo)
canvas.restoreState()
def myLaterPages(canvas, doc):
canvas.drawString(inch, 0.75 * inch, "Page %d %s" % (doc.page, pageinfo))
def go():
fig = plt.figure(figsize=(4, 3))
plt.plot([1,2,3,4])
plt.ylabel('some numbers')
imgdata = cStringIO.StringIO()
fig.savefig(imgdata,format='PDF')
doc = SimpleDocTemplate("document.pdf")
Story = [Spacer(1,2*inch)]
style = styles["Normal"]
for i in range(5):
bogustext = ("This is Paragraph number %s. " % i) *20
p = Paragraph(bogustext, style)
Story.append(p)
Story.append(Spacer(1,0.2*inch))
pi = PdfImage(imgdata)
Story.append(pi)
doc.build(Story, onFirstPage=myFirstPage, onLaterPages=myLaterPages)
if __name__ == '__main__':
go()
niknxzdl2#
pdfrw的作者Patrick Maupin在另一个question中提供了一个更简单、更不复杂的答案。(感谢他对我之前的回答的赞美之词。)他还提到,在使用pdfrw提取之前,将matplotlib数据保存到多页PDF中,可以通过减少重复资源来减少最终reportlab PDF的大小。因此,这里是他的代码示例的修改,它演示了如何通过首先写入多页matplotlib PDF来减小PDF文件大小。对于此示例,文件大小减少了约80%。
import osfrom matplotlib import pyplot as pltfrom matplotlib.backends.backend_pdf import PdfPagesfrom reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Flowablefrom reportlab.lib.units import inchfrom reportlab.lib.styles import getSampleStyleSheetfrom pdfrw import PdfReader, PdfDictfrom pdfrw.buildxobj import pagexobjfrom pdfrw.toreportlab import makerltry: from cStringIO import StringIO as BytesIOexcept ImportError: from io import BytesIOstyles = getSampleStyleSheet()style = styles['Normal']class PdfImage(Flowable): """ Generates a reportlab image flowable for matplotlib figures. It is initialized with either a matplotlib figure or a pointer to a list of pagexobj objects and an index for the pagexobj to be used. """ def __init__(self, fig=None, width=200, height=200, cache=None, cacheindex=0): self.img_width = width self.img_height = height if fig is None and cache is None: raise ValueError("Either 'fig' or 'cache' must be provided") if fig is not None: imgdata = BytesIO() fig.savefig(imgdata, format='pdf') imgdata.seek(0) page, = PdfReader(imgdata).pages image = pagexobj(page) self.img_data = image else: self.img_data = None self.cache = cache self.cacheindex = cacheindex def wrap(self, width, height): return self.img_width, self.img_height def drawOn(self, canv, x, y, _sW=0): if _sW > 0 and hasattr(self, 'hAlign'): a = self.hAlign if a in ('CENTER', 'CENTRE', TA_CENTER): x += 0.5*_sW elif a in ('RIGHT', TA_RIGHT): x += _sW elif a not in ('LEFT', TA_LEFT): raise ValueError("Bad hAlign value " + str(a)) canv.saveState() if self.img_data is not None: img = self.img_data else: img = self.cache[self.cacheindex] if isinstance(img, PdfDict): xscale = self.img_width / img.BBox[2] yscale = self.img_height / img.BBox[3] canv.translate(x, y) canv.scale(xscale, yscale) canv.doForm(makerl(canv, img)) else: canv.drawImage(img, x, y, self.img_width, self.img_height) canv.restoreState()class PdfImageCache(object): """ Saves matplotlib figures to a temporary multi-page PDF file using the 'savefig' method. When closed the images are extracted and saved to the attribute 'cache'. The temporary PDF file is then deleted. The 'savefig' returns a PdfImage object with a pointer to the 'cache' list and an index for the figure. Use of this cache reduces duplicated resources in the reportlab generated PDF file. Use is similar to matplotlib's PdfPages object. When not used as a context manager, the 'close()' method must be explictly called before the reportlab document is built. """ def __init__(self): self.pdftempfile = '_temporary_pdf_image_cache_.pdf' self.pdf = PdfPages(self.pdftempfile) self.cache = [] self.count = 0 def __enter__(self): return self def __exit__(self, *args): self.close() def close(self, *args): self.pdf.close() pages = PdfReader(self.pdftempfile).pages pages = [pagexobj(x) for x in pages] self.cache.extend(pages) os.remove(self.pdftempfile) def savefig(self, fig, width=200, height=200): self.pdf.savefig(fig) index = self.count self.count += 1 return PdfImage(width=width, height=height, cache=self.cache, cacheindex=index)def make_report(outfn, nfig=5): """ Makes a dummy report with nfig matplotlib plots. """ doc = SimpleDocTemplate(outfn) style = styles["Normal"] story = [Spacer(0, inch)] for j in range(nfig): fig = plt.figure(figsize=(4, 3)) plt.plot([1, 2, 3, 4], [1, 4, 9, 26]) plt.ylabel('some numbers') plt.title('My Figure %i' % (j+1)) img = PdfImage(fig, width=400, height=400) plt.close() for i in range(10): bogustext = ("Paragraph number %s. " % i) p = Paragraph(bogustext, style) story.append(p) story.append(Spacer(1, 0.2*inch)) story.append(img) for i in range(10): bogustext = ("Paragraph number %s. " % i) p = Paragraph(bogustext, style) story.append(p) story.append(Spacer(1, 0.2*inch)) doc.build(story)def make_report_cached_figs(outfn, nfig=5): """ Makes a dummy report with nfig matplotlib plots using PdfImageCache to reduce PDF file size. """ doc = SimpleDocTemplate(outfn) style = styles["Normal"] story = [Spacer(0, inch)] with PdfImageCache() as pdfcache: for j in range(nfig): fig = plt.figure(figsize=(4, 3)) plt.plot([1, 2, 3, 4], [1, 4, 9, 26]) plt.ylabel('some numbers') plt.title('My Figure %i' % (j+1)) img = pdfcache.savefig(fig, width=400, height=400) plt.close() for i in range(10): bogustext = ("Paragraph number %s. " % i) p = Paragraph(bogustext, style) story.append(p) story.append(Spacer(1, 0.2*inch)) story.append(img) for i in range(10): bogustext = ("Paragraph number %s. " % i) p = Paragraph(bogustext, style) story.append(p) story.append(Spacer(1, 0.2*inch)) doc.build(story)make_report("hello_pdf.pdf", 50)make_report_cached_figs("hello_pdf_cached_figs.pdf", 50)
import os
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Flowable
from pdfrw import PdfReader, PdfDict
try:
from cStringIO import StringIO as BytesIO
except ImportError:
from io import BytesIO
style = styles['Normal']
Generates a reportlab image flowable for matplotlib figures. It is initialized
with either a matplotlib figure or a pointer to a list of pagexobj objects and
an index for the pagexobj to be used.
def __init__(self, fig=None, width=200, height=200, cache=None, cacheindex=0):
self.img_width = width
self.img_height = height
if fig is None and cache is None:
raise ValueError("Either 'fig' or 'cache' must be provided")
if fig is not None:
imgdata = BytesIO()
fig.savefig(imgdata, format='pdf')
imgdata.seek(0)
page, = PdfReader(imgdata).pages
image = pagexobj(page)
self.img_data = image
else:
self.img_data = None
self.cache = cache
self.cacheindex = cacheindex
def wrap(self, width, height):
return self.img_width, self.img_height
if self.img_data is not None:
img = self.img_data
img = self.cache[self.cacheindex]
if isinstance(img, PdfDict):
xscale = self.img_width / img.BBox[2]
yscale = self.img_height / img.BBox[3]
canv.doForm(makerl(canv, img))
canv.drawImage(img, x, y, self.img_width, self.img_height)
class PdfImageCache(object):
Saves matplotlib figures to a temporary multi-page PDF file using the 'savefig'
method. When closed the images are extracted and saved to the attribute 'cache'.
The temporary PDF file is then deleted. The 'savefig' returns a PdfImage object
with a pointer to the 'cache' list and an index for the figure. Use of this
cache reduces duplicated resources in the reportlab generated PDF file.
Use is similar to matplotlib's PdfPages object. When not used as a context
manager, the 'close()' method must be explictly called before the reportlab
document is built.
def __init__(self):
self.pdftempfile = '_temporary_pdf_image_cache_.pdf'
self.pdf = PdfPages(self.pdftempfile)
self.cache = []
self.count = 0
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def close(self, *args):
self.pdf.close()
pages = PdfReader(self.pdftempfile).pages
pages = [pagexobj(x) for x in pages]
self.cache.extend(pages)
os.remove(self.pdftempfile)
def savefig(self, fig, width=200, height=200):
self.pdf.savefig(fig)
index = self.count
self.count += 1
return PdfImage(width=width, height=height, cache=self.cache, cacheindex=index)
def make_report(outfn, nfig=5):
Makes a dummy report with nfig matplotlib plots.
doc = SimpleDocTemplate(outfn)
story = [Spacer(0, inch)]
for j in range(nfig):
plt.plot([1, 2, 3, 4], [1, 4, 9, 26])
plt.title('My Figure %i' % (j+1))
img = PdfImage(fig, width=400, height=400)
plt.close()
for i in range(10):
bogustext = ("Paragraph number %s. " % i)
story.append(p)
story.append(Spacer(1, 0.2*inch))
story.append(img)
doc.build(story)
def make_report_cached_figs(outfn, nfig=5):
Makes a dummy report with nfig matplotlib plots using PdfImageCache
to reduce PDF file size.
with PdfImageCache() as pdfcache:
img = pdfcache.savefig(fig, width=400, height=400)
make_report("hello_pdf.pdf", 50)
make_report_cached_figs("hello_pdf_cached_figs.pdf", 50)
由于matplotlib的PdfPages只接受文件路径作为输入,因此PdfImageCache对象将多页PDF写入临时文件。在记忆中做这件事会花费更多的工作。
q3aa05253#
没有一个,但我在自己使用MatPlotLib和ReportLab时所做的是生成PNG,然后嵌入PNG,这样我就不需要使用PIL。但是,如果您使用PIL,我相信您应该能够使用MatPlotLib和ReportLab生成和嵌入EPS。
ckocjqey4#
Python 3的解决方案,并将matplotlib图形嵌入为矢量图像(无光栅化)
import matplotlib.pyplot as pltfrom io import BytesIOfrom reportlab.pdfgen import canvasfrom reportlab.graphics import renderPDFfrom svglib.svglib import svg2rlgfig = plt.figure(figsize=(4, 3))plt.plot([1,2,3,4])plt.ylabel('some numbers')imgdata = BytesIO()fig.savefig(imgdata, format='svg')imgdata.seek(0) # rewind the datadrawing=svg2rlg(imgdata)c = canvas.Canvas('test2.pdf')renderPDF.draw(drawing,c, 10, 40)c.drawString(10, 300, "So nice it works")c.showPage()c.save()
from reportlab.pdfgen import canvas
from reportlab.graphics import renderPDF
from svglib.svglib import svg2rlg
fig.savefig(imgdata, format='svg')
imgdata.seek(0) # rewind the data
drawing=svg2rlg(imgdata)
c = canvas.Canvas('test2.pdf')
renderPDF.draw(drawing,c, 10, 40)
c.drawString(10, 300, "So nice it works")
c.showPage()
c.save()
svglib可从Conda-Forge获得。
svglib
e1xvtsh35#
我创建了一个示例Flowable,用于ReportLab的商业RML模板语言。这里的示例非常有用,但需要稍微调整一下才能作为RML plugInFlow元素工作。GitHub上有一个工作示例。下面是Flowable本身:
class SvgFlowable(Flowable): """Convert byte stream containing SVG into a Reportlab Flowable.""" def __init__(self, svg: BytesIO) -> None: """Convert SVG to RML drawing on initializtion.""" svg.seek(0) self.drawing: Drawing = svg2rlg(svg) self.width: int = self.drawing.minWidth() self.height: int = self.drawing.height self.drawing.setProperties({"vAlign": "CENTER", "hAlign": "CENTER"}) def wrap(self, *_args): """Return diagram size.""" return (self.width, self.height) def draw(self) -> None: """Render the chart.""" renderPDF.draw(self.drawing, self.canv, 0, 0)
class SvgFlowable(Flowable):
"""Convert byte stream containing SVG into a Reportlab Flowable."""
def __init__(self, svg: BytesIO) -> None:
"""Convert SVG to RML drawing on initializtion."""
svg.seek(0)
self.drawing: Drawing = svg2rlg(svg)
self.width: int = self.drawing.minWidth()
self.height: int = self.drawing.height
self.drawing.setProperties({"vAlign": "CENTER", "hAlign": "CENTER"})
def wrap(self, *_args):
"""Return diagram size."""
return (self.width, self.height)
def draw(self) -> None:
"""Render the chart."""
renderPDF.draw(self.drawing, self.canv, 0, 0)
5条答案
按热度按时间pzfprimi1#
下面是一个使用pdfrw的解决方案:
niknxzdl2#
pdfrw的作者Patrick Maupin在另一个question中提供了一个更简单、更不复杂的答案。(感谢他对我之前的回答的赞美之词。)他还提到,在使用pdfrw提取之前,将matplotlib数据保存到多页PDF中,可以通过减少重复资源来减少最终reportlab PDF的大小。因此,这里是他的代码示例的修改,它演示了如何通过首先写入多页matplotlib PDF来减小PDF文件大小。对于此示例,文件大小减少了约80%。
由于matplotlib的PdfPages只接受文件路径作为输入,因此PdfImageCache对象将多页PDF写入临时文件。在记忆中做这件事会花费更多的工作。
q3aa05253#
没有一个,但我在自己使用MatPlotLib和ReportLab时所做的是生成PNG,然后嵌入PNG,这样我就不需要使用PIL。但是,如果您使用PIL,我相信您应该能够使用MatPlotLib和ReportLab生成和嵌入EPS。
ckocjqey4#
Python 3的解决方案,并将matplotlib图形嵌入为矢量图像(无光栅化)
svglib
可从Conda-Forge获得。e1xvtsh35#
我创建了一个示例Flowable,用于ReportLab的商业RML模板语言。这里的示例非常有用,但需要稍微调整一下才能作为RML plugInFlow元素工作。
GitHub上有一个工作示例。
下面是Flowable本身: