TextMap.vb
''
'' This code is part of Document Solutions for PDF demos.
'' Copyright (c) MESCIUS inc. All rights reserved.
''
Imports System.IO
Imports System.Drawing
Imports System.Numerics
Imports System.Collections.Generic
Imports System.Linq
Imports GrapeCity.Documents.Text
Imports GrapeCity.Documents.Drawing
Imports GrapeCity.Documents.Pdf
Imports GrapeCity.Documents.Pdf.Annotations
Imports GrapeCity.Documents.Pdf.Graphics
Imports GrapeCity.Documents.Pdf.TextMap

'' This sample shows how to use the text map for a page in a PDF
'' to find geometric positions of text lines on the page,
'' and to locate the text at a specific position.
'' The PDF used in this sample was created by TimeSheet.
Public Class TextMap
    Function CreatePDF(ByVal stream As Stream) As Integer
        Dim doc = New GcPdfDocument()
        Dim page = doc.NewPage()

        Dim rc = Util.AddNote(
            "This sample loads the PDF created by the TimeSheet sample into a temporary GcPdfDocument, " +
            "gets the text map for the first page, and prints out the coordinates and texts of all " +
            "line fragments in the map. " +
            "It also uses the map's HitTest method to find the text at specific coordinates in the PDF " +
            "and prints the result. " +
            "The original TimeSheet.pdf used by this sample (consisting of 1 page) is appended for reference.",
            page)

        '' Setup text formatting and layout:
        Dim tf = New TextFormat() With
        {
            .Font = StandardFonts.Times,
            .FontSize = 13
        }
        Dim tfFound = New TextFormat() With
        {
            .Font = StandardFonts.TimesBold,
            .FontSize = 14,
            .ForeColor = Color.DarkBlue
        }
        Dim tl = New TextLayout(72) With
        {
            .MaxWidth = doc.PageSize.Width,
            .MaxHeight = doc.PageSize.Height,
            .MarginAll = rc.Left,
            .MarginTop = rc.Bottom + 36,
            .TabStops = New List(Of TabStop)() From {New TabStop(72 * 2)}
        }
        Dim tso = New TextSplitOptions(tl) With
        {
            .MinLinesInFirstParagraph = 2,
            .MinLinesInLastParagraph = 2,
            .RestMarginTop = rc.Left
        }

        '' Open an arbitrary PDF, load it into a temp document and use the map to find some texts:
        Using fs = New FileStream(Path.Combine("Resources", "PDFs", "TimeSheet.pdf"), FileMode.Open, FileAccess.Read)
            Dim doc1 = New GcPdfDocument()
            doc1.Load(fs)
            Dim tmap = doc1.Pages(0).GetTextMap()

            '' We retrieve the text at a specific (known to us) geometric location on the page:
            Dim tx0 = 2.1F, ty0 = 3.37F, tx1 = 3.1F, ty1 = 3.5F
            Dim htiFrom = tmap.HitTest(tx0 * 72, ty0 * 72)
            Dim htiTo = tmap.HitTest(ty0 * 72, ty1 * 72)
            Dim range1 As TextMapFragment = Nothing, text1 As String = Nothing
            tmap.GetFragment(htiFrom.Pos, htiTo.Pos, range1, text1)
            tl.AppendLine($"Looked for text inside rectangle x={tx0:F2}"", y = {ty0: f2}"", width={tx1 - tx0:F2}"", height = {ty1 - ty0: f2}"", found:", tf)
            tl.AppendLine(text1, tfFound)
            tl.AppendLine()

            '' Get all text fragments and their locations on the page:
            tl.AppendLine("List of all texts found on the page", tf)
            Dim range As TextMapFragment = Nothing, text As String = Nothing
            tmap.GetFragment(range, text)
            For Each tlf In range
                Dim coords = tmap.GetCoords(tlf)
                tl.Append($"Text at ({coords.B.X / 72:F2}"", {coords.B.Y / 72:F2}""):{vbTab}", tf)
                tl.AppendLine(tmap.GetText(tlf), tfFound)
            Next

            '' Print the results:
            tl.PerformLayout(True)
            While True
                '' 'rest' will accept the text that did not fit:
                Dim rest As TextLayout = Nothing
                Dim splitResult = tl.Split(tso, rest)
                doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty)
                If splitResult <> SplitResult.Split Then
                    Exit While
                End If
                tl = rest
                doc.NewPage()
            End While

            '' Append the original document for reference:
            doc.MergeWithDocument(doc1, New MergeDocumentOptions())

            '' Done:
            doc.Save(stream)
        End Using
        Return doc.Pages.Count
    End Function
End Class