From 7f1d15db4792d1fc6b36a6a571ce129f5c037632 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Mon, 10 Jun 2024 15:14:39 +0900 Subject: [PATCH] Avoid replacing DOIs with shorter ones --- .../src/main/java/org/grobid/core/document/Document.java | 2 +- .../src/main/java/org/grobid/core/engines/HeaderParser.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/document/Document.java b/grobid-core/src/main/java/org/grobid/core/document/Document.java index d7dc90b08b..3a4192da61 100755 --- a/grobid-core/src/main/java/org/grobid/core/document/Document.java +++ b/grobid-core/src/main/java/org/grobid/core/document/Document.java @@ -635,7 +635,7 @@ public String getAllBlocksClean(int toIgnore1, int toIgnore2) { } /* - * Try to match a DOI in the first page, independently from any preliminar + * Try to match a DOI in the first page, independently of any preliminary * segmentation. This can be useful for improving the chance to find a DOI * in headers or footnotes. */ diff --git a/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java b/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java index 185f3714d5..13fe07e8a7 100755 --- a/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java @@ -268,7 +268,9 @@ public String processingHeaderSection(GrobidAnalysisConfig config, Document doc, // DOI pass List dois = doc.getDOIMatches(); if (isNotEmpty(dois) && dois.size() == 1) { - resHeader.setDOI(dois.get(0)); + if (dois.get(0).length() > resHeader.getDOI().length()) { + resHeader.setDOI(dois.get(0)); + } } // normalization of dates