RPA Robotic Process Automation - PDF Data Remover Tool - C# - ByteScout
Announcement
Our ByteScout SDK products are sunsetting as we focus on expanding new solutions.
Learn More Open modal
Close modal
Announcement Important Update
ByteScout SDK Sunsetting Notice
Our ByteScout SDK products are sunsetting as we focus on our new & improved solutions. Thank you for being part of our journey, and we look forward to supporting you in this next chapter!

RPA Robotic Process Automation – PDF Data Remover Tool – C#

  • Home
  • /
  • Articles
  • /
  • RPA Robotic Process Automation – PDF Data Remover Tool – C#

PDF data remover tool in C# with ByteScout Robotic Process Automation

ByteScout Robotic Process Automation is tools and API for rapid manual data entry automation using Robotic Process Automation based on ByteScout data extraction tools.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

Form1.Designer.cs

      
namespace Sample_UI_Application { partial class Form1 { /// <summary> /// Required designer variable. /// </summary> private System.ComponentModel.IContainer components = null; /// <summary> /// Clean up any resources being used. /// </summary> /// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param> protected override void Dispose(bool disposing) { if (disposing && (components != null)) { components.Dispose(); } base.Dispose(disposing); } #region Windows Form Designer generated code /// <summary> /// Required method for Designer support - do not modify /// the contents of this method with the code editor. /// </summary> private void InitializeComponent() { this.components = new System.ComponentModel.Container(); System.ComponentModel.ComponentResourceManager resources = new System.ComponentModel.ComponentResourceManager(typeof(Form1)); this.pdfViewerControl1 = new Bytescout.PDFViewer.PDFViewerControl(); this.toolStrip1 = new System.Windows.Forms.ToolStrip(); this.tsbOpen = new System.Windows.Forms.ToolStripButton(); this.btnProceed = new System.Windows.Forms.Button(); this.tbSearchExpression = new System.Windows.Forms.TextBox(); this.btnFind = new System.Windows.Forms.Button(); this.groupBox1 = new System.Windows.Forms.GroupBox(); this.cbRegex = new System.Windows.Forms.CheckBox(); this.label1 = new System.Windows.Forms.Label(); this.cbMaskRemovedText = new System.Windows.Forms.CheckBox(); this.cbMakeUnsearchable = new System.Windows.Forms.CheckBox(); this.toolTip1 = new System.Windows.Forms.ToolTip(this.components); this.toolStrip1.SuspendLayout(); this.groupBox1.SuspendLayout(); this.SuspendLayout(); // // pdfViewerControl1 // this.pdfViewerControl1.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); this.pdfViewerControl1.BackColor = System.Drawing.SystemColors.ButtonShadow; this.pdfViewerControl1.BorderStyle = System.Windows.Forms.BorderStyle.FixedSingle; this.pdfViewerControl1.CacheVisitedPages = true; this.pdfViewerControl1.Location = new System.Drawing.Point(275, 28); this.pdfViewerControl1.MouseMode = Bytescout.PDFViewer.MouseMode.Selection; this.pdfViewerControl1.Name = "pdfViewerControl1"; this.pdfViewerControl1.RegistrationKey = null; this.pdfViewerControl1.RegistrationName = null; this.pdfViewerControl1.ShowToolbarFind = false; this.pdfViewerControl1.Size = new System.Drawing.Size(866, 662); this.pdfViewerControl1.TabIndex = 0; this.pdfViewerControl1.PreProcessKey += new Bytescout.PDFViewer.PreProcessKeyEventHandler(this.PdfViewerControl1_PreProcessKey); this.pdfViewerControl1.CurrentPageIndexChanged += new System.EventHandler(this.PdfViewerControl1_CurrentPageIndexChanged); this.pdfViewerControl1.SelectionChanged += new Bytescout.PDFViewer.SelectionChangedEventHandler(this.PdfViewerControl1_SelectionChanged); this.pdfViewerControl1.ValidateContextMenu += new Bytescout.PDFViewer.ValidateContextMenuEventHandler(this.PdfViewerControl1_ValidateContextMenu); // // toolStrip1 // this.toolStrip1.ImageScalingSize = new System.Drawing.Size(20, 20); this.toolStrip1.Items.AddRange(new System.Windows.Forms.ToolStripItem[] { this.tsbOpen}); this.toolStrip1.Location = new System.Drawing.Point(0, 0); this.toolStrip1.Name = "toolStrip1"; this.toolStrip1.Size = new System.Drawing.Size(1153, 25); this.toolStrip1.TabIndex = 1; this.toolStrip1.Text = "toolStrip1"; // // tsbOpen // this.tsbOpen.ImageTransparentColor = System.Drawing.Color.Magenta; this.tsbOpen.Name = "tsbOpen"; this.tsbOpen.Size = new System.Drawing.Size(64, 22); this.tsbOpen.Text = "&Open PDF"; this.tsbOpen.Click += new System.EventHandler(this.tsbOpen_Click); // // btnProceed // this.btnProceed.Location = new System.Drawing.Point(140, 207); this.btnProceed.Margin = new System.Windows.Forms.Padding(2); this.btnProceed.Name = "btnProceed"; this.btnProceed.Size = new System.Drawing.Size(129, 23); this.btnProceed.TabIndex = 4; this.btnProceed.Text = "Perform Removal"; this.btnProceed.UseVisualStyleBackColor = true; this.btnProceed.Click += new System.EventHandler(this.BtnProceed_Click); // // tbSearchExpression // this.tbSearchExpression.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); this.tbSearchExpression.Location = new System.Drawing.Point(68, 19); this.tbSearchExpression.Name = "tbSearchExpression"; this.tbSearchExpression.Size = new System.Drawing.Size(183, 20); this.tbSearchExpression.TabIndex = 0; // // btnFind // this.btnFind.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.btnFind.Location = new System.Drawing.Point(176, 69); this.btnFind.Name = "btnFind"; this.btnFind.Size = new System.Drawing.Size(75, 23); this.btnFind.TabIndex = 2; this.btnFind.Text = "Find All"; this.btnFind.UseVisualStyleBackColor = true; this.btnFind.Click += new System.EventHandler(this.BtnFindAll_Click); // // groupBox1 // this.groupBox1.Controls.Add(this.cbRegex); this.groupBox1.Controls.Add(this.label1); this.groupBox1.Controls.Add(this.tbSearchExpression); this.groupBox1.Controls.Add(this.btnFind); this.groupBox1.Location = new System.Drawing.Point(12, 28); this.groupBox1.Name = "groupBox1"; this.groupBox1.Size = new System.Drawing.Size(257, 98); this.groupBox1.TabIndex = 1; this.groupBox1.TabStop = false; this.groupBox1.Text = "Find"; // // cbRegex // this.cbRegex.AutoSize = true; this.cbRegex.Location = new System.Drawing.Point(9, 45); this.cbRegex.Name = "cbRegex"; this.cbRegex.Size = new System.Drawing.Size(144, 17); this.cbRegex.TabIndex = 1; this.cbRegex.Text = "Use Regular Expressions"; this.cbRegex.UseVisualStyleBackColor = true; // // label1 // this.label1.AutoSize = true; this.label1.Location = new System.Drawing.Point(6, 22); this.label1.Name = "label1"; this.label1.Size = new System.Drawing.Size(56, 13); this.label1.TabIndex = 6; this.label1.Text = "Find what:"; // // cbMaskRemovedText // this.cbMaskRemovedText.AutoSize = true; this.cbMaskRemovedText.Location = new System.Drawing.Point(12, 151); this.cbMaskRemovedText.Name = "cbMaskRemovedText"; this.cbMaskRemovedText.Size = new System.Drawing.Size(238, 17); this.cbMaskRemovedText.TabIndex = 2; this.cbMaskRemovedText.Text = "Draw black rectangles over the removed text"; this.toolTip1.SetToolTip(this.cbMaskRemovedText, "Mask removed text fragments with black rectangles to make the output document loo" + "k like \"censored\"."); this.cbMaskRemovedText.UseVisualStyleBackColor = true; // // cbMakeUnsearchable // this.cbMakeUnsearchable.AutoSize = true; this.cbMakeUnsearchable.Location = new System.Drawing.Point(12, 174); this.cbMakeUnsearchable.Name = "cbMakeUnsearchable"; this.cbMakeUnsearchable.Size = new System.Drawing.Size(221, 17); this.cbMakeUnsearchable.TabIndex = 3; this.cbMakeUnsearchable.Text = "Make the output document unsearchable"; this.toolTip1.SetToolTip(this.cbMakeUnsearchable, "Make the output document unsearchable. If checked, all PDF pages will be replaced" + " with rendered images."); this.cbMakeUnsearchable.UseVisualStyleBackColor = true; // // Form1 // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.ClientSize = new System.Drawing.Size(1153, 702); this.Controls.Add(this.cbMakeUnsearchable); this.Controls.Add(this.cbMaskRemovedText); this.Controls.Add(this.groupBox1); this.Controls.Add(this.btnProceed); this.Controls.Add(this.pdfViewerControl1); this.Controls.Add(this.toolStrip1); this.Icon = ((System.Drawing.Icon)(resources.GetObject("$this.Icon"))); this.Name = "Form1"; this.Text = "PDF Data Remover Tool"; this.toolStrip1.ResumeLayout(false); this.toolStrip1.PerformLayout(); this.groupBox1.ResumeLayout(false); this.groupBox1.PerformLayout(); this.ResumeLayout(false); this.PerformLayout(); } #endregion private Bytescout.PDFViewer.PDFViewerControl pdfViewerControl1; private System.Windows.Forms.ToolStrip toolStrip1; private System.Windows.Forms.ToolStripButton tsbOpen; private System.Windows.Forms.Button btnProceed; private System.Windows.Forms.TextBox tbSearchExpression; private System.Windows.Forms.Button btnFind; private System.Windows.Forms.GroupBox groupBox1; private System.Windows.Forms.CheckBox cbRegex; private System.Windows.Forms.Label label1; private System.Windows.Forms.CheckBox cbMaskRemovedText; private System.Windows.Forms.CheckBox cbMakeUnsearchable; private System.Windows.Forms.ToolTip toolTip1; } }

Form1.cs

      
using System; using System.Collections.Generic; using System.Diagnostics; using System.Drawing; using System.IO; using System.Linq; using System.Windows.Forms; using Bytescout.PDFExtractor; using Bytescout.PDFViewer; namespace Sample_UI_Application { public partial class Form1 : Form { // Keep selected rectangles for all document pages Dictionary<int, RectangleF[]> _foundTextRectangles = new Dictionary<int, RectangleF[]>(); public Form1() { InitializeComponent(); // Tune PDF Viewer control pdfViewerControl1.MouseMode = MouseMode.Selection; pdfViewerControl1.MultiSelectMode = true; pdfViewerControl1.AllowResizeSelectionRectangles = true; pdfViewerControl1.ShowResizeHandlesForActiveSelectionOnly = true; pdfViewerControl1.ClearSelectionOnClick = false; // Load document into PDF viewer pdfViewerControl1.InputFile = "sample.pdf"; } private void tsbOpen_Click(object sender, EventArgs e) { using (OpenFileDialog openFileDialog = new OpenFileDialog()) { openFileDialog.Title = @"Open PDF Document"; openFileDialog.Filter = @"PDF Files (*.pdf)|*.pdf|All Files|*.*"; if (openFileDialog.ShowDialog() == DialogResult.OK) { this.Text = openFileDialog.FileName; Cursor = Cursors.WaitCursor; try { // Open file in PDF Viewer control pdfViewerControl1.InputFile = openFileDialog.FileName; } catch (Exception exception) { MessageBox.Show(exception.Message); } finally { Cursor = Cursors.Default; } } } } private void PdfViewerControl1_SelectionChanged(object sender, SelectionChange selectionChange, int selectionIndex) { // Store selection changes if (selectionChange != SelectionChange.Cleared) { _foundTextRectangles[pdfViewerControl1.CurrentPageIndex] = pdfViewerControl1.SelectionInPoints; } } private void PdfViewerControl1_CurrentPageIndexChanged(object sender, EventArgs e) { // Show stored selection on page switching if (_foundTextRectangles.ContainsKey(pdfViewerControl1.CurrentPageIndex)) { pdfViewerControl1.SelectionInPoints = _foundTextRectangles[pdfViewerControl1.CurrentPageIndex]; } } private void PdfViewerControl1_ValidateContextMenu(object source, ContextMenuStrip menu) { // Add context menu item to delete active selection menu.Items.Insert(0, new ToolStripMenuItem("Delete active selection", null, (sender, args) => { int activeSelectionIndex = pdfViewerControl1.ActiveSelectionIndex; if (activeSelectionIndex != -1) pdfViewerControl1.RemoveSelectionAt(activeSelectionIndex); }) { Enabled = pdfViewerControl1.ActiveSelectionIndex != -1 }); menu.Items.Insert(1, new ToolStripSeparator()); } private void PdfViewerControl1_PreProcessKey(object source, Keys keyData, ref bool handled) { // `Delete` key will delete active selection if (keyData == Keys.Delete) { int activeSelectionIndex = pdfViewerControl1.ActiveSelectionIndex; if (activeSelectionIndex != -1) { pdfViewerControl1.RemoveSelectionAt(activeSelectionIndex); handled = true; } } } private void BtnFindAll_Click(object sender, EventArgs e) { if (tbSearchExpression.Text.Length > 1) { // Prepare TextExtractor using (TextExtractor textExtractor = new TextExtractor("demo", "demo")) { // Load document into TextExtractor textExtractor.LoadDocumentFromFile(pdfViewerControl1.InputFile); // Set options from UI textExtractor.RegexSearch = cbRegex.Checked; textExtractor.WordMatchingMode = WordMatchingMode.None; // Search for text in all pages and store rectangles of found pieces for (int pageIndex = 0; pageIndex < textExtractor.GetPageCount(); pageIndex++) { ISearchResult[] searchResults = textExtractor.FindAll(pageIndex, tbSearchExpression.Text, caseSensitive: true); if (searchResults.Length > 0) { _foundTextRectangles[pageIndex] = searchResults.Select(searchResult => searchResult.Bounds).ToArray(); } } } // Select fount rectangles in PDF Viewer if (_foundTextRectangles.ContainsKey(pdfViewerControl1.CurrentPageIndex)) pdfViewerControl1.SelectionInPoints = _foundTextRectangles[pdfViewerControl1.CurrentPageIndex]; } else { MessageBox.Show(@"Try larger search string"); } } private void BtnProceed_Click(object sender, EventArgs e) { var outputFile = "output.pdf"; MemoryStream tempStream = new MemoryStream(); // Create `Bytescout.PDFExtractor.Remover2` instance using (Remover2 remover = new Remover2("demo", "demo")) { // Load document into remover remover.LoadDocumentFromFile(pdfViewerControl1.InputFile); // Set options from UI remover.MaskRemovedText = cbMaskRemovedText.Checked; remover.MakePDFUnsearchable = cbMakeUnsearchable.Checked; // Add fragments to remove foreach (KeyValuePair<int, RectangleF[]> keyValuePair in _foundTextRectangles) remover.AddTextToRemove(keyValuePair.Key, keyValuePair.Value); // Perform removal and save result document to file remover.PerformRemoval(outputFile); } // Open output PDF file in default associated application Process.Start(outputFile); } } }

PdfDataRemoverTool.csproj

      
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <SchemaVersion>2.0</SchemaVersion> <ProjectGuid>{0DB74CC3-1DD8-4A58-94FC-CA9A60E2F8A2}</ProjectGuid> <OutputType>WinExe</OutputType> <RootNamespace>PdfDataRemoverTool</RootNamespace> <AssemblyName>PdfDataRemoverTool</AssemblyName> <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <Optimize>false</Optimize> <OutputPath>bin\Debug\</OutputPath> <DefineConstants>DEBUG;TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <DebugType>pdbonly</DebugType> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DefineConstants>TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <ItemGroup> <Reference Include="Bytescout.PDF, Version=1.9.4.317, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> <HintPath>..\..\..\Program Files\Bytescout PDF SDK\net4.5\Bytescout.PDF.dll</HintPath> </Reference> <Reference Include="Bytescout.PDFExtractor, Version=10.3.0.3566, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> </Reference> <Reference Include="Bytescout.PDFViewer, Version=2.20.0.543, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> </Reference> <Reference Include="System" /> <Reference Include="System.Data" /> <Reference Include="System.Deployment" /> <Reference Include="System.Drawing" /> <Reference Include="System.Windows.Forms" /> <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <EmbeddedResource Include="Form1.resx"> <DependentUpon>Form1.cs</DependentUpon> </EmbeddedResource> </ItemGroup> <ItemGroup> <Compile Include="Form1.cs"> <SubType>Form</SubType> </Compile> <Compile Include="Form1.Designer.cs"> <DependentUpon>Form1.cs</DependentUpon> </Compile> <Compile Include="Program.cs" /> </ItemGroup> <ItemGroup> <None Include="sample.pdf"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. <Target Name="BeforeBuild"> </Target> <Target Name="AfterBuild"> </Target> --> </Project>

PdfDataRemoverTool.sln

      
Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.29025.244 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PdfDataRemoverTool", "PdfDataRemoverTool.csproj", "{0DB74CC3-1DD8-4A58-94FC-CA9A60E2F8A2}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {0DB74CC3-1DD8-4A58-94FC-CA9A60E2F8A2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {0DB74CC3-1DD8-4A58-94FC-CA9A60E2F8A2}.Debug|Any CPU.Build.0 = Debug|Any CPU {0DB74CC3-1DD8-4A58-94FC-CA9A60E2F8A2}.Release|Any CPU.ActiveCfg = Release|Any CPU {0DB74CC3-1DD8-4A58-94FC-CA9A60E2F8A2}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {6A423F77-480F-401A-9016-BED7CAE557A3} EndGlobalSection EndGlobal

Program.cs

      
using System; using System.Collections.Generic; using System.Windows.Forms; namespace Sample_UI_Application { static class Program { /// <summary> /// The main entry point for the application. /// </summary> [STAThread] static void Main() { Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); Application.Run(new Form1()); } } }

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also:

Tutorials:

prev
next