Skip to content

Latest commit

 

History

History
73 lines (62 loc) · 3.29 KB

Format Locator that ignores line wrapping.md

File metadata and controls

73 lines (62 loc) · 3.29 KB

How to Run a Format Locator across all the text of a document.

A Format Locator can use Dictionaries to search for phrases. Sometimes these phrases wrap around to different lines.
For example, you are looking for Medicare Claims on this document.
image

The normal format locator will find Medicare as one result with 57% and Claims will be a second result with 43%.
image

The script below returns the following results. Medicare Claims with 100%. The Green Box looks wrong, but that is the box that surrounds the words Medicare and Claims.
image

Steps

  1. Add a second class to your project called Phrases. This is a class that is not used by any other documents
  2. Add a locator to this class FL_Phrase and configure it however you like with expressions or dictionaries.
  3. Add a script locator SL_Phrases to your class with the script from below.
  4. Select a confidence threshold for your results.
  5. Run your locator
Option Explicit

' Class script: Document

Private Sub SL_Phrases_LocateAlternatives(ByVal pXDoc As CASCADELib.CscXDocument, ByVal pLocator As CASCADELib.CscXDocField)
   Locator_ExtractFromAllText(pXDoc,"Phrases","FL_Phrase",0.75,pLocator.Alternatives)
End Sub


Sub Locator_ExtractFromAllText(pXDoc As CscXDocument, ClassName As String, LocatorName As String, Threshold As Double, Results As CscXDocFieldAlternatives)
   'Run a locator over all the text of a document, ignoring line-wrapping
   Dim Temp As New CscXDocument, W As Long, Word As CscXDocWord, LeftPos As Long, A As Long, Alts As CscXDocFieldAlternatives, Result As CscXDocFieldAlternative
   Temp.CopyPages(pXDoc,0,1) ' copy the first page to a temp document
   'remove all words from the temp document
   While Temp.Words.Count>0
      Temp.Words.Remove(0)
   Wend
   LeftPos=0
   'Copy ALL the words from ALL the pages of the document onto a SINGLE TEXT LINE on the temp document
   For W=0 To pXDoc.Words.Count-1' pXDoc.Words.Count-1
      Set Word=New CscXDocWord
      Word.PageIndex=0
      Word.Top=0
      Word.Height=15
      Word.Left=LeftPos
      Word.Width=1
      Word.Text=pXDoc.Words(W).Text
      'Temp.Pages(0).AddWord(Word)
      Temp.Pages(0).AddWord(Word)
      LeftPos=LeftPos+(Len(Word.Text)+1)*15
   Next
   'recalculate the textlines that locators look at. There will be only one text line
   Temp.Representations(0).AnalyzeLines
   'Run the required Locator on the Temp document
   With Project.ClassByName(ClassName)
      .Locate(Temp,.Locators.ItemByName(LocatorName).Index)
   End With
   Set Alts=Temp.Locators.ItemByName(LocatorName).Alternatives
   'Copy the results back to the script locator.
   For A=0 To Alts.Count-1
      If Alts(A).Confidence > Threshold Then
         Set Result=Results.Create
         Result.Confidence=Alts(A).Confidence
         For W=0 To Alts(A).Words.Count-1
            Result.Words.Append(pXDoc.Words(Alts(A).Words(W).IndexOnDocument))
         Next
      End If
   Next
   Set Temp = Nothing 'Delete the temp document
End Sub