最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

go - How to increase the quality of image extracted from a pdf using unipdf - Stack Overflow

programmeradmin5浏览0评论
func extractPDFPagesAsImages(pdfPath string, outputDir string) error {
// Open the PDF file
f, err := os.Open(pdfPath)
if err != nil {
    return err
}
defer f.Close()

// Read the PDF document
doc, err := model.NewPdfReader(f)
if err != nil {
    return err
}

// Iterate over each page
numPages, err := doc.GetNumPages()
if err != nil {
    return err
}
for i := 0; i < numPages; i++ {
    _, err := doc.GetPage(i + 1)
    if err != nil {
        return err
    }

    // Render the page to an image
    img, err := renderPDFPageToImage(doc, i+1)
    if err != nil {
        return err
    }

    // Save the image as PNG
    outputFilePath := fmt.Sprintf("%s/page_%d.png", outputDir, i+1)
    outFile, err := os.Create(outputFilePath)
    if err != nil {
        return err
    }
    defer outFile.Close()

    err = png.Encode(outFile, img)
    if err != nil {
        return err
    }
}

return nil

}

This is my function that uses unipdf to convert a pdf file to all of its pages. Now it works the only issue is that the image outputted is really low res. How do I increase its quality so that the texts appear crisp

I have looked up multiple documentations even tried using ChatGPT but nothing gives me the right answer

func extractPDFPagesAsImages(pdfPath string, outputDir string) error {
// Open the PDF file
f, err := os.Open(pdfPath)
if err != nil {
    return err
}
defer f.Close()

// Read the PDF document
doc, err := model.NewPdfReader(f)
if err != nil {
    return err
}

// Iterate over each page
numPages, err := doc.GetNumPages()
if err != nil {
    return err
}
for i := 0; i < numPages; i++ {
    _, err := doc.GetPage(i + 1)
    if err != nil {
        return err
    }

    // Render the page to an image
    img, err := renderPDFPageToImage(doc, i+1)
    if err != nil {
        return err
    }

    // Save the image as PNG
    outputFilePath := fmt.Sprintf("%s/page_%d.png", outputDir, i+1)
    outFile, err := os.Create(outputFilePath)
    if err != nil {
        return err
    }
    defer outFile.Close()

    err = png.Encode(outFile, img)
    if err != nil {
        return err
    }
}

return nil

}

This is my function that uses unipdf to convert a pdf file to all of its pages. Now it works the only issue is that the image outputted is really low res. How do I increase its quality so that the texts appear crisp

I have looked up multiple documentations even tried using ChatGPT but nothing gives me the right answer

Share Improve this question edited 11 hours ago Lakshya Agarwal asked 11 hours ago Lakshya AgarwalLakshya Agarwal 11 bronze badge New contributor Lakshya Agarwal is a new contributor to this site. Take care in asking for clarification, commenting, and answering. Check out our Code of Conduct.
Add a comment  | 

1 Answer 1

Reset to default 0

You need to modify the rendering DPI (dots per inch) and use the appropriate image extraction settings.

func renderPDFPageToImage(doc *model.PdfReader, pageNum int) (image.Image, error) {
    page, err := doc.GetPage(pageNum)
    if err != nil {
        return nil, err
    }

    // Create device with high DPI (e.g., 300 DPI)
    device := render.NewImageDevice(render.ImageDeviceOptions{
        DPI:                     300,
        BackgroundColor:         color.White,
        EnableImageCompression:  false,
        CompressedImageQuality:  100,
        ImageQuality:            100,
        Scale:                   2.0,         // Optional
        ColorSpace:              "DeviceRGB", // Optional
    })

    if err := device.RenderPage(page); err != nil {
        return nil, err
    }

    return device.Image(), nil
}

func extractPDFPagesAsImages(pdfPath string, outputDir string) error {
    f, err := os.Open(pdfPath)
    if err != nil {
        return err
    }
    defer f.Close()

    doc, err := model.NewPdfReader(f)
    if err != nil {
        return err
    }

    numPages, err := doc.GetNumPages()
    if err != nil {
        return err
    }

    for i := 0; i < numPages; i++ {
        img, err := renderPDFPageToImage(doc, i+1)
        if err != nil {
            return err
        }

        outputFilePath := fmt.Sprintf("%s/page_%d.png", outputDir, i+1)
        outFile, err := os.Create(outputFilePath)
        if err != nil {
            return err
        }

        encoder := png.Encoder{
            CompressionLevel: png.NoCompression,
        }
        
        err = encoder.Encode(outFile, img)
        outFile.Close()
        if err != nil {
            return err
        }
    }

    return nil
}
发布评论

评论列表(0)

  1. 暂无评论