C1TextParser

ExtractComments

ExtractComments

This view shows basic features of StartsUntilContinuesAfterExtractor.

Features

  • Sample Applications

  • Starts After Continues Until Extractor

    From amazon order emails extract relevant information about the order itself. This sample pretends to demonstrate the repeated place holder extraction capabilities of C1TextParser - Html extractor. The sample consists on three fixed place holders and one repeated block. The fixed place holders are the customer name, the order delivery date and also the total amount of the order. The repeated block is used to extract each article that appear in the ordered article list. It contains three repeated place holders. These are: the name, the price and the seller of the article. The amazon email used as the extraction source is "amazonEmail2.html"

    Input file

    // A utility function to swap two elements 
    void swap(int* a, int* b) 
    { 
        int t = *a; 
        *a = *b; 
        *b = t; 
    } 
      
    // This function takes last element as pivot, places 
    // the pivot element at its correct position in sorted 
    // array, and places all smaller (smaller than pivot) 
    // to left of pivot and all greater elements to right 
    // of pivot
    int partition (int arr[], int low, int high) 
    { 
        int pivot = arr[high];    // pivot 
        int i = (low - 1);        // Index of smaller element 
      
        for (int j = low; j <= high- 1; j++) 
        { 
            // If current element is smaller than or 
            // equal to pivot 
            if (arr[j] <= pivot) 
            { 
                i++;    // increment index of smaller element 
                swap(&arr[i], &arr[j]); 
            } 
        } 
        swap(&arr[i + 1], &arr[high]); 
        return (i + 1); 
    } 
      
    // The main function that implements QuickSort 
    // arr[] --> Array to be sorted, 
    // low  --> Starting index, 
    // high  --> Ending index
    void quickSort(int arr[], int low, int high) 
    { 
        if (low < high) 
        { 
            // pi is partitioning index, arr[p] is now at right place
            int pi = partition(arr, low, high); 
      
            // Separately sort elements before 
            // partition and after partition 
            quickSort(arr, low, pi - 1); 
            quickSort(arr, pi + 1, high); 
        } 
    } 
      
    // Function to print an array
    void printArray(int arr[], int size) 
    { 
        int i; 
        for (i=0; i < size; i++) 
            printf("%d ", arr[i]); 
        printf("n"); 
    } 
      
    // Driver program to test above functions 
    int main() 
    { 
        int arr[] = {10, 7, 8, 9, 1, 5}; 
        int n = sizeof(arr)/sizeof(arr[0]); 
        quickSort(arr, 0, n-1); 
        printf("Sorted array: n"); 
        printArray(arr, n); 
        return 0; 
    }

    Extracted result

    {
      "Extractor": "StartsAfterContinuesUntil",
      "Result": [
      {
        "StartIndex": 2,
        "ExtractedText": " A utility function to swap two elements "
      },
      {
        "StartIndex": 134,
        "ExtractedText": " This function takes last element as pivot, places "
      },
      {
        "StartIndex": 189,
        "ExtractedText": " the pivot element at its correct position in sorted "
      },
      {
        "StartIndex": 246,
        "ExtractedText": " array, and places all smaller (smaller than pivot) "
      },
      {
        "StartIndex": 302,
        "ExtractedText": " to left of pivot and all greater elements to right "
      },
      {
        "StartIndex": 358,
        "ExtractedText": " of pivot"
      },
      {
        "StartIndex": 452,
        "ExtractedText": " pivot "
      },
      {
        "StartIndex": 493,
        "ExtractedText": " Index of smaller element "
      },
      {
        "StartIndex": 586,
        "ExtractedText": " If current element is smaller than or "
      },
      {
        "StartIndex": 637,
        "ExtractedText": " equal to pivot "
      },
      {
        "StartIndex": 720,
        "ExtractedText": " increment index of smaller element "
      },
      {
        "StartIndex": 885,
        "ExtractedText": " The main function that implements QuickSort "
      },
      {
        "StartIndex": 934,
        "ExtractedText": " arr[] --> Array to be sorted, "
      },
      {
        "StartIndex": 969,
        "ExtractedText": " low  --> Starting index, "
      },
      {
        "StartIndex": 999,
        "ExtractedText": " high  --> Ending index"
      },
      {
        "StartIndex": 1115,
        "ExtractedText": " pi is partitioning index, arr[p] is now at right place"
      },
      {
        "StartIndex": 1232,
        "ExtractedText": " Separately sort elements before "
      },
      {
        "StartIndex": 1277,
        "ExtractedText": " partition and after partition "
      },
      {
        "StartIndex": 1407,
        "ExtractedText": " Function to print an array"
      },
      {
        "StartIndex": 1585,
        "ExtractedText": " Driver program to test above functions "
      }
    ]
    }
    using System.Collections;
    using System.Globalization;
    using System.Linq;
    using System.Web.Mvc;
    using C1.Web.Mvc;
    using SamplesExplorer.Models;
    using System.Collections.Generic;
    using System;
    using C1.TextParser;
    using System.IO;
    using System.Text;
    using System.Runtime.Serialization;
    
    namespace SamplesExplorer.Controllers
    {
        public partial class C1TextParserController : Controller
        {
            public ActionResult ExtractComments(FormCollection collection)
            {
                StartsAfterContinuesUntil startsAfterContinuesUntil = new StartsAfterContinuesUntil(@"//", @"\r\n");
    
                using (var inputStream = System.IO.File.Open(Server.MapPath("~/Content/sampleFiles/ExtractComments.txt"), FileMode.Open))
                {
                    IExtractionResult result = startsAfterContinuesUntil.Extract(inputStream);
                    ViewBag.ExtractionResult = result.ToJsonString();
                }
                return View();
            }
        }
    }
    
    @section Summary{
        <p>@Html.Raw(Resources.C1TextParser.StartsAfterExtractor_Text0)</p>
    }
    
    <div>
        <div>
            <h3>@Html.Raw(Resources.C1TextParser.StartsAfterExtractor_Title)</h3>
    
            <p>@Html.Raw(Resources.C1TextParser.ExtractComments_Text1)</p>
        </div>
        <div>
            <h3>Input file</h3>
            <pre class="scrollable-pre">@Html.Raw(ControlPages.GetSampleFileContent("ExtractComments.txt"))</pre>
        </div>
        <div>
    
            <h3>Extracted result</h3>
            <pre class="scrollable-pre">@Html.Raw(ViewBag.ExtractionResult)</pre>
        </div>
    </div>