ReadTagsToOutlines.cs
//
// This code is part of Document Solutions for PDF demos.
// Copyright (c) MESCIUS inc. All rights reserved.
//
using System;
using System.IO;
using System.Drawing;
using System.Linq;
using System.Collections.Generic;
using GrapeCity.Documents.Pdf;
using GrapeCity.Documents.Text;
using GrapeCity.Documents.Pdf.TextMap;
using GrapeCity.Documents.Pdf.Structure;
using GrapeCity.Documents.Pdf.Recognition.Structure;

namespace DsPdfWeb.Demos
{
    // Find tables and read their data using structure tags.
    public class ReadTagsToOutlines
    {
        public int CreatePDF(Stream stream)
        {
            var doc = new GcPdfDocument();
            using var s = File.OpenRead(Path.Combine("Resources", "PDFs", "C1Olap-QuickStart.pdf"));
            doc.Load(s);

            // Get the LogicalStructure and top parent element:
            LogicalStructure ls = doc.GetLogicalStructure();
            Element root = ls.Elements[0];

            // Iterate over elements and select all heading elements (H1, H2, H3 etc.):
            OutlineNodeCollection outlines = doc.Outlines;
            int outlinesLevel = 1;
            foreach (Element e in root.Children)
            {
                string type = e.StructElement.Type;
                if (string.IsNullOrEmpty(type) || !type.StartsWith("H"))
                    continue;
                // Note: topmost level is 1:
                if (!int.TryParse(type.Substring(1), out int headingLevel) || headingLevel < 1)
                    continue;
                // Get the element text:
                string text = e.GetText();
                // Find the target page:
                var page = FindPage(e.StructElement);
                if (page != null)
                {
                    var o = new OutlineNode(text, new DestinationFit(page));
                    if (headingLevel > outlinesLevel)
                    {
                        ++outlinesLevel;
                        outlines = outlines.Last().Children;
                    }
                    else if (headingLevel < outlinesLevel)
                    {
                        --outlinesLevel;
                        var p = ((OutlineNode)outlines.Owner).Parent;
                        outlines = p == null ? doc.Outlines : p.Children;
                    }
                    outlines.Add(o);
                }
            }
            doc.Save(stream);
            return doc.Pages.Count;
        }

        private Page FindPage(StructElement se)
        {
            if (se.DefaultPage != null)
                return se.DefaultPage;
            if (se.HasChildren)
                foreach (var child in se.Children)
                {
                    var p = FindPage(child);
                    if (p != null)
                        return p;
                }
            return null;
        }
    }
}