Here is what I do:
''' <summary>
''' Gets the PDF text from a file
''' requires pdftotext.exe from
http://www.foolabs.com/xpdf
''' </summary>
''' <param name="filename">The filename.</param>
''' <returns>PDF Text</returns>
Public Function getPDFtext(ByVal filename As String) As String
Dim p As New System.Diagnostics.Process
Dim std_out As IO.StreamReader
Dim txtStdout As String = ""
Try
p.StartInfo.FileName = "Asset Search\pdftotext.exe"
p.StartInfo.Arguments = filename & " -"
p.StartInfo.UseShellExecute = False
p.StartInfo.CreateNoWindow = True
p.StartInfo.RedirectStandardOutput = True
p.Start()
std_out = p.StandardOutput()
'Get the text from standard output
txtStdout = std_out.ReadToEnd()
std_out.Close()
Catch ex As Exception
MsgBox("Error in while extracting PDF text, the error is: " &
ex.Message.ToString)
End Try
Return txtStdout
End Function
I wouldn't use it for anything serious, business critical, or Realtime. For
that you should probably go with a commercial control like
http://www.pdfonline.com/. But for quick and dirty text extraction it works
fine for me.
Best Regards,
Chris