ByteScout PDF Suite - C# - Human Assisted Invoice Parsing - ByteScout

ByteScout PDF Suite – C# – Human Assisted Invoice Parsing

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Suite – C# – Human Assisted Invoice Parsing

human assisted invoice parsing in C# and ByteScout PDF Suite

Simple tutorial on how to do human assisted invoice parsing in C#

We regularly create and update our sample code library so you may quickly learn human assisted invoice parsing and the step-by-step process in C#. ByteScout PDF Suite was made to help with human assisted invoice parsing in C#. ByteScout PDF Suite is the bundle that provides six different SDK libraries to work with PDF from generating rich PDF reports to extracting data from PDF documents and converting them to HTML. This bundle includes PDF (Generator) SDK, PDF Renderer SDK, PDF Extractor SDK, PDF to HTML SDK, PDF Viewer SDK and PDF Generator SDK for Javascript.

This rich and prolific sample source code in C# for ByteScout PDF Suite contains various functions and options you should do calling the API to implement human assisted invoice parsing. To use human assisted invoice parsing in your C# project or application just copy & paste the code and then run your app! Further improvement of the code will make it more robust.

ByteScout PDF Suite is available as a free trial. You may get it from our website along with all other source code samples for C# applications.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

Form1.Designer.cs
      
namespace SampleApplication { partial class Form1 { /// <summary> /// Required designer variable. /// </summary> private System.ComponentModel.IContainer components = null; /// <summary> /// Clean up any resources being used. /// </summary> /// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param> protected override void Dispose(bool disposing) { if (disposing && (components != null)) { components.Dispose(); } base.Dispose(disposing); } #region Windows Form Designer generated code /// <summary> /// Required method for Designer support - do not modify /// the contents of this method with the code editor. /// </summary> private void InitializeComponent() { this.pdfViewerControl1 = new Bytescout.PDFViewer.PDFViewerControl(); this.toolStrip1 = new System.Windows.Forms.ToolStrip(); this.tsbOpen = new System.Windows.Forms.ToolStripButton(); this.panel1 = new System.Windows.Forms.Panel(); this.btnClear = new System.Windows.Forms.Button(); this.btnGetData = new System.Windows.Forms.Button(); this.rbSelectTotal = new System.Windows.Forms.RadioButton(); this.rbSelectCustomerInfo = new System.Windows.Forms.RadioButton(); this.rbSelectInvoiceNumber = new System.Windows.Forms.RadioButton(); this.toolStrip1.SuspendLayout(); this.panel1.SuspendLayout(); this.SuspendLayout(); // // pdfViewerControl1 // this.pdfViewerControl1.AllowResizeSelectionRectangles = true; this.pdfViewerControl1.BackColor = System.Drawing.SystemColors.ButtonShadow; this.pdfViewerControl1.ClearSelectionOnClick = false; this.pdfViewerControl1.Dock = System.Windows.Forms.DockStyle.Fill; this.pdfViewerControl1.Location = new System.Drawing.Point(179, 25); this.pdfViewerControl1.MouseMode = Bytescout.PDFViewer.MouseMode.Selection; this.pdfViewerControl1.MultiSelectMode = true; this.pdfViewerControl1.Name = "pdfViewerControl1"; this.pdfViewerControl1.RegistrationKey = null; this.pdfViewerControl1.RegistrationName = null; this.pdfViewerControl1.SelectionColor = System.Drawing.Color.Red; this.pdfViewerControl1.SelectionModifierKey = Bytescout.PDFViewer.SelectionModifierKeys.None; this.pdfViewerControl1.Size = new System.Drawing.Size(829, 708); this.pdfViewerControl1.TabIndex = 0; this.pdfViewerControl1.CurrentPageIndexChanged += new System.EventHandler(this.pdfViewerControl1_CurrentPageIndexChanged); this.pdfViewerControl1.SelectionChanged += new System.EventHandler(this.pdfViewerControl1_SelectionChanged); this.pdfViewerControl1.CustomPaint += new System.Windows.Forms.PaintEventHandler(this.pdfViewerControl1_CustomPaint); this.pdfViewerControl1.DocumentChanged += new System.EventHandler(this.pdfViewerControl1_DocumentChanged); // // toolStrip1 // this.toolStrip1.Items.AddRange(new System.Windows.Forms.ToolStripItem[] { this.tsbOpen}); this.toolStrip1.Location = new System.Drawing.Point(0, 0); this.toolStrip1.Name = "toolStrip1"; this.toolStrip1.Size = new System.Drawing.Size(1008, 25); this.toolStrip1.TabIndex = 1; this.toolStrip1.Text = "toolStrip1"; // // tsbOpen // this.tsbOpen.Image = global::SampleApplication.Properties.Resources.folder_page; this.tsbOpen.ImageTransparentColor = System.Drawing.Color.Magenta; this.tsbOpen.Name = "tsbOpen"; this.tsbOpen.Size = new System.Drawing.Size(80, 22); this.tsbOpen.Text = "&Open PDF"; this.tsbOpen.Click += new System.EventHandler(this.tsbOpen_Click); // // panel1 // this.panel1.Controls.Add(this.btnClear); this.panel1.Controls.Add(this.btnGetData); this.panel1.Controls.Add(this.rbSelectTotal); this.panel1.Controls.Add(this.rbSelectCustomerInfo); this.panel1.Controls.Add(this.rbSelectInvoiceNumber); this.panel1.Dock = System.Windows.Forms.DockStyle.Left; this.panel1.Location = new System.Drawing.Point(0, 25); this.panel1.Name = "panel1"; this.panel1.Size = new System.Drawing.Size(179, 708); this.panel1.TabIndex = 2; // // btnClear // this.btnClear.Enabled = false; this.btnClear.Location = new System.Drawing.Point(93, 126); this.btnClear.Name = "btnClear"; this.btnClear.Size = new System.Drawing.Size(75, 23); this.btnClear.TabIndex = 4; this.btnClear.Text = "Clear"; this.btnClear.UseVisualStyleBackColor = true; this.btnClear.Click += new System.EventHandler(this.btnClear_Click); // // btnGetData // this.btnGetData.Enabled = false; this.btnGetData.Font = new System.Drawing.Font("Tahoma", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(204))); this.btnGetData.Location = new System.Drawing.Point(12, 97); this.btnGetData.Name = "btnGetData"; this.btnGetData.Size = new System.Drawing.Size(156, 23); this.btnGetData.TabIndex = 3; this.btnGetData.Text = "Get Data"; this.btnGetData.UseVisualStyleBackColor = true; this.btnGetData.Click += new System.EventHandler(this.btnGetData_Click); // // rbSelectTotal // this.rbSelectTotal.AutoSize = true; this.rbSelectTotal.Enabled = false; this.rbSelectTotal.Location = new System.Drawing.Point(12, 66); this.rbSelectTotal.Name = "rbSelectTotal"; this.rbSelectTotal.Size = new System.Drawing.Size(93, 17); this.rbSelectTotal.TabIndex = 2; this.rbSelectTotal.Text = "Select TOTAL"; this.rbSelectTotal.UseVisualStyleBackColor = true; // // rbSelectCustomerInfo // this.rbSelectCustomerInfo.AutoSize = true; this.rbSelectCustomerInfo.Enabled = false; this.rbSelectCustomerInfo.Location = new System.Drawing.Point(12, 43); this.rbSelectCustomerInfo.Name = "rbSelectCustomerInfo"; this.rbSelectCustomerInfo.Size = new System.Drawing.Size(123, 17); this.rbSelectCustomerInfo.TabIndex = 1; this.rbSelectCustomerInfo.Text = "Select Customer Info"; this.rbSelectCustomerInfo.UseVisualStyleBackColor = true; // // rbSelectInvoiceNumber // this.rbSelectInvoiceNumber.AutoSize = true; this.rbSelectInvoiceNumber.Checked = true; this.rbSelectInvoiceNumber.Enabled = false; this.rbSelectInvoiceNumber.Location = new System.Drawing.Point(12, 20); this.rbSelectInvoiceNumber.Name = "rbSelectInvoiceNumber"; this.rbSelectInvoiceNumber.Size = new System.Drawing.Size(133, 17); this.rbSelectInvoiceNumber.TabIndex = 0; this.rbSelectInvoiceNumber.TabStop = true; this.rbSelectInvoiceNumber.Text = "Select Invoice Number"; this.rbSelectInvoiceNumber.UseVisualStyleBackColor = true; // // Form1 // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.ClientSize = new System.Drawing.Size(1008, 733); this.Controls.Add(this.pdfViewerControl1); this.Controls.Add(this.panel1); this.Controls.Add(this.toolStrip1); this.Name = "Form1"; this.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen; this.Text = "Form1"; this.toolStrip1.ResumeLayout(false); this.toolStrip1.PerformLayout(); this.panel1.ResumeLayout(false); this.panel1.PerformLayout(); this.ResumeLayout(false); this.PerformLayout(); } #endregion private Bytescout.PDFViewer.PDFViewerControl pdfViewerControl1; private System.Windows.Forms.ToolStrip toolStrip1; private System.Windows.Forms.ToolStripButton tsbOpen; private System.Windows.Forms.Panel panel1; private System.Windows.Forms.Button btnGetData; private System.Windows.Forms.RadioButton rbSelectTotal; private System.Windows.Forms.RadioButton rbSelectCustomerInfo; private System.Windows.Forms.RadioButton rbSelectInvoiceNumber; private System.Windows.Forms.Button btnClear; } }

ON-PREMISE OFFLINE SDK

60 Day Free Trial or Visit ByteScout PDF Suite Home Page

Explore ByteScout PDF Suite Documentation

Explore Samples

Sign Up for ByteScout PDF Suite Online Training

ON-DEMAND REST WEB API

Get Your API Key

Explore Web API Docs

Explore Web API Samples

Form1.cs
      
using System; using System.Collections.Generic; using System.Drawing; using System.Text; using System.Windows.Forms; using Bytescout.PDFExtractor; namespace SampleApplication { public partial class Form1 : Form { List<String> _dataLabels = new List<string>(); public Form1() { InitializeComponent(); pdfViewerControl1.RegistrationName = "demo"; pdfViewerControl1.RegistrationKey = "demo"; } protected override void OnLoad(EventArgs e) { OpenDocument(@"Invoice.pdf"); base.OnLoad(e); } private void tsbOpen_Click(object sender, EventArgs e) { using (OpenFileDialog openFileDialog = new OpenFileDialog()) { openFileDialog.Title = @"Open PDF Document"; openFileDialog.Filter = @"PDF Files (*.pdf)|*.pdf|All Files|*.*"; if (openFileDialog.ShowDialog() == DialogResult.OK) { OpenDocument(openFileDialog.FileName); } } } private void OpenDocument(string filePath) { Text = filePath; Cursor = Cursors.WaitCursor; try { pdfViewerControl1.InputFile = filePath; } catch (Exception exception) { MessageBox.Show(exception.Message); } finally { Cursor = Cursors.Default; } } private void pdfViewerControl1_DocumentChanged(object sender, EventArgs e) { ClearSelections(); UpdateControls(); } private void pdfViewerControl1_CurrentPageIndexChanged(object sender, EventArgs e) { ClearSelections(); UpdateControls(); } private void btnClear_Click(object sender, EventArgs e) { ClearSelections(); UpdateControls(); } void ClearSelections() { _dataLabels.Clear(); pdfViewerControl1.ClearSelection(); UpdateControls(); } void UpdateControls() { rbSelectInvoiceNumber.Enabled = pdfViewerControl1.IsDocumentLoaded; rbSelectCustomerInfo.Enabled = pdfViewerControl1.IsDocumentLoaded; rbSelectTotal.Enabled = pdfViewerControl1.IsDocumentLoaded; btnGetData.Enabled = pdfViewerControl1.IsDocumentLoaded && pdfViewerControl1.Selection.Length > 0; btnClear.Enabled = pdfViewerControl1.IsDocumentLoaded && pdfViewerControl1.Selection.Length > 0; } private void pdfViewerControl1_SelectionChanged(object sender, EventArgs e) { UpdateControls(); if (pdfViewerControl1.Selection.Length > _dataLabels.Count) { for (int i = _dataLabels.Count; i < pdfViewerControl1.Selection.Length; i++) { string dataLabel; if (rbSelectInvoiceNumber.Checked) dataLabel = "Invoice Number:"; else if (rbSelectCustomerInfo.Checked) dataLabel = "Customer Info:"; else dataLabel = "TOTAL:"; _dataLabels.Add(dataLabel); } } } private void pdfViewerControl1_CustomPaint(object sender, PaintEventArgs e) { Rectangle[] selection = pdfViewerControl1.Selection; // Paint labels for (int i = 0; i < selection.Length; i++) { Rectangle r = Rectangle.Round(selection[i]); r = pdfViewerControl1.TranslateRectangleToViewport(r); Size textSize = TextRenderer.MeasureText(_dataLabels[i], Font); Rectangle textRectangle = new Rectangle(r.Left, r.Top - textSize.Height - 6, textSize.Width + 2, textSize.Height + 2); e.Graphics.FillRectangle(Brushes.DarkBlue, textRectangle); TextRenderer.DrawText(e.Graphics, _dataLabels[i], Font, textRectangle, Color.White, TextFormatFlags.HorizontalCenter | TextFormatFlags.VerticalCenter); } } private void btnGetData_Click(object sender, EventArgs e) { StringBuilder result = new StringBuilder(); RectangleF[] selection = pdfViewerControl1.SelectionInPoints; using (TextExtractor extractor = new TextExtractor()) { extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; extractor.LoadDocumentFromFile(pdfViewerControl1.InputFile); extractor.OCRMode = OCRMode.Auto; extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\net4.00\tessdata\"; extractor.OCRResolution = 300; for (int i = 0; i < selection.Length; i++) { extractor.SetExtractionArea(selection[i]); result.AppendLine(_dataLabels[i]); result.AppendLine(extractor.GetText(pdfViewerControl1.CurrentPageIndex, pdfViewerControl1.CurrentPageIndex)); result.AppendLine(); } } MessageBox.Show(result.ToString()); } } }

ON-PREMISE OFFLINE SDK

60 Day Free Trial or Visit ByteScout PDF Suite Home Page

Explore ByteScout PDF Suite Documentation

Explore Samples

Sign Up for ByteScout PDF Suite Online Training

ON-DEMAND REST WEB API

Get Your API Key

Explore Web API Docs

Explore Web API Samples

Program.cs
      
using System; using System.Collections.Generic; using System.Windows.Forms; namespace SampleApplication { static class Program { /// <summary> /// The main entry point for the application. /// </summary> [STAThread] static void Main() { Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); Application.Run(new Form1()); } } }

ON-PREMISE OFFLINE SDK

60 Day Free Trial or Visit ByteScout PDF Suite Home Page

Explore ByteScout PDF Suite Documentation

Explore Samples

Sign Up for ByteScout PDF Suite Online Training

ON-DEMAND REST WEB API

Get Your API Key

Explore Web API Docs

Explore Web API Samples

VIDEO

ON-PREMISE OFFLINE SDK

60 Day Free Trial or Visit ByteScout PDF Suite Home Page

Explore ByteScout PDF Suite Documentation

Explore Samples

Sign Up for ByteScout PDF Suite Online Training

ON-DEMAND REST WEB API

Get Your API Key

Explore Web API Docs

Explore Web API Samples

Tutorials:

prev
next