Skip to content

Commit

Permalink
feat: support image replace
Browse files Browse the repository at this point in the history
- Read word/media/* in `parseArchive`
- Save media file changes in `Write`
- Added testcases for image replacement
- Update README to include image replace usage

closes #15
  • Loading branch information
TeCHiScy committed Apr 20, 2024
1 parent ec3bf4a commit 18f3362
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 1 deletion.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ func main() {
panic(err)
}

// read the replacement image
bs, err := os.ReadFile("./test/cameraman.jpg")
if err != nil {
panic(err)
}

// replace the lenna image with the cameraman image
// image attributes, e.g. size, position, keep unchanged
err = doc.SetFile("word/media/image1.jpg", bs)
if err != nil {
panic(err)
}

// write out a new file
err = doc.WriteToFile("replaced.docx")
if err != nil {
Expand All @@ -98,6 +111,13 @@ Although I do not recommend to do that as the WordprocessingML spec is somewhat

But, for whatever reason there might be, you can do that.

#### Image replace
Image replacing is slightly different from text replacing. To replace an image, you need to know its path within the docx archive, rather than using a placeholder.

To find the path, use unzip or similar tools to extract the contents of a docx file, then locate the image to be replaced inside the `word/media/` folder. Assuming the path is `word/media/image1.jpg`, you can use the SetFile method to overwrite the old image with a new one. It should be noted that:
- The image format (encoding) should keep the same during the replacement.
- Since the metadata of the image is not changed, only the image file itself is replaced, the new image will appear in its original location, with its original size. In other words, the image attributes keep unchanged.

### ➤ Terminology
To not cause too much confusion, here is a list of terms which you might come across.

Expand Down
13 changes: 12 additions & 1 deletion document.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ var (
HeaderPathRegex = regexp.MustCompile(`word/header[0-9]*.xml`)
// FooterPathRegex matches all footer files inside the docx-archive.
FooterPathRegex = regexp.MustCompile(`word/footer[0-9]*.xml`)
// MediaPathRegex matches all media files inside the docx-archive.
MediaPathRegex = regexp.MustCompile(`word/media/*`)
)

// Document exposes the main API of the library. It represents the actual docx document which is going to be modified.
Expand All @@ -42,6 +44,8 @@ type Document struct {
headerFiles []string
// paths to all footer files inside the zip archive
footerFiles []string
// paths to all media files inside the zip archive
mediaFiles []string
// The document contains multiple files which eventually need a parser each.
// The map key is the file path inside the document to which the parser belongs.
runParsers map[string]*RunParser
Expand Down Expand Up @@ -168,7 +172,7 @@ func (d *Document) GetPlaceHoldersList() ([]string, error) {
for file := range d.files {
if _, ok := d.runParsers[file]; !ok {
return nil, fmt.Errorf("no parser for file %s", file)
}
}
replacer := d.fileReplacers[file]
placeholders := replacer.placeholders
for _, placeholder := range placeholders {
Expand All @@ -178,6 +182,7 @@ func (d *Document) GetPlaceHoldersList() ([]string, error) {

return placeholdersTextList, nil
}

// replace will create a parser on the given bytes, execute it and replace every placeholders found with the data
// from the placeholderMap.
func (d *Document) replace(placeholderMap PlaceholderMap, file string) ([]byte, error) {
Expand Down Expand Up @@ -294,6 +299,7 @@ func (d *Document) SetFile(fileName string, fileBytes []byte) error {
// - word/document.xml
// - word/header*.xml
// - word/footer*.xml
// - word/media/*
func (d *Document) parseArchive() error {
readZipFile := func(file *zip.File) []byte {
readCloser, err := file.Open()
Expand All @@ -320,6 +326,10 @@ func (d *Document) parseArchive() error {
d.files[file.Name] = readZipFile(file)
d.footerFiles = append(d.footerFiles, file.Name)
}
if MediaPathRegex.MatchString(file.Name) {
d.files[file.Name] = readZipFile(file)
d.mediaFiles = append(d.mediaFiles, file.Name)
}
}
return nil
}
Expand Down Expand Up @@ -402,6 +412,7 @@ func (d *Document) Write(writer io.Writer) error {
// isModifiedFile will look through all modified files and check if the searchFileName exists
func (d *Document) isModifiedFile(searchFileName string) bool {
allFiles := append(d.headerFiles, d.footerFiles...)
allFiles = append(allFiles, d.mediaFiles...)
allFiles = append(allFiles, DocumentXml)

for _, file := range allFiles {
Expand Down
Binary file added examples/simple/cameraman.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions examples/simple/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"flag"
"log"
"os"
"time"

"github.com/lukasjarosch/go-docx"
Expand Down Expand Up @@ -48,6 +49,18 @@ func main() {

log.Printf("replace took: %s", time.Since(startTime))

bs, err := os.ReadFile("./cameraman.jpg")
if err != nil {
panic(err)
}

err = doc.SetFile("word/media/image1.jpg", bs)
if err != nil {
panic(err)
}

log.Printf("replace image took: %s", time.Since(startTime))

err = doc.WriteToFile(outputPath)
if err != nil {
panic(err)
Expand Down
Binary file modified examples/simple/template.docx
Binary file not shown.
12 changes: 12 additions & 0 deletions replace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ func TestReplacer_Replace(t *testing.T) {
return
}

bs, err := os.ReadFile("./test/cameraman.jpg")
if err != nil {
t.Error(err)
return
}

err = doc.SetFile("word/media/image1.jpg", bs)
if err != nil {
t.Error("replacing image failed", err)
return
}

err = doc.WriteToFile("./test/out.docx")
if err != nil {
t.Error("unable to write", err)
Expand Down
Binary file added test/cameraman.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified test/template.docx
Binary file not shown.

0 comments on commit 18f3362

Please sign in to comment.