Analyse PDF Text Contents by PDF Parser & Modify SDK from VC++

PDF Parser & Modify SDK can be downloaded from following web page,

 

https://www.verydoc.com/pdfparsersdk.html

 

You can use following VC++ source code to extract text contents from PDF file, you can also use following source code to replace text contents in PDF file easily.

 

#include <windows.h>

#include <assert.h>

#include <stdio.h>

#include <io.h>

#include <vector>

using namespace std;

/*

"lpOptions" parameter supports following options:

 

-f <int>           : first page to convert

-l <int>           : last page to convert

-r <int>           : resolution for both X and Y, in DPI (default is 150)

-opw <string>      : owner password (for encrypted files)

-upw <string>      : user password (for encrypted files)

-html              : output text information in HTML format instead of CSV format

*/

 

typedef int (WINAPI *PDFParserSDKFunc)(char *lpPDFFile, char *lpOutFile, char *lpOptions);

typedef int (WINAPI *PDFParserSDKFromMemoryFunc)(LPBYTE lpPDFData, int nDataLen, char *lpOutFile, char *lpOptions);

typedef HANDLE (WINAPI *PDFParserSDK_GetHandleFunc)(char *lpPDFFile, char *lpOptions);

typedef int (WINAPI *PDFParserSDK_ParseFunc)(HANDLE hPDFParserData, char *lpOptions);

typedef int (WINAPI *PDFParserSDK_GetCountFunc)(HANDLE hPDFParserData);

typedef int (WINAPI *PDFParserSDK_GetImageLengthFunc)(HANDLE hPDFParserData, int nIndex);

typedef int (WINAPI *PDFParserSDK_GetImageDataFunc)(HANDLE hPDFParserData, int nIndex, LPBYTE lpData, int nBufLen);

typedef int (WINAPI *PDFParserSDK_GetTextInfoLengthFunc)(HANDLE hPDFParserData, int nIndex);

typedef int (WINAPI *PDFParserSDK_GetTextInfoDataFunc)(HANDLE hPDFParserData, int nIndex, LPBYTE lpData, int nBufLen);

typedef int (WINAPI *PDFParserSDK_FreeFunc)(HANDLE hPDFParserData);

typedef int (WINAPI *PDFParserSDK_GetPageCountFunc)(char *lpPDFFile);

typedef int (WINAPI *PDFParserSDK_GetAllPagesCountFunc)(HANDLE hPDFParserData);

 

PDFParserSDKFunc PDFParserSDK = NULL;

PDFParserSDKFromMemoryFunc PDFParserSDKFromMemory = NULL;

PDFParserSDK_GetHandleFunc PDFParserSDK_GetHandle = NULL;

PDFParserSDK_ParseFunc PDFParserSDK_Parse = NULL;

PDFParserSDK_GetCountFunc PDFParserSDK_GetCount = NULL;

PDFParserSDK_GetImageLengthFunc PDFParserSDK_GetImageLength = NULL;

PDFParserSDK_GetImageDataFunc PDFParserSDK_GetImageData = NULL;

PDFParserSDK_GetTextInfoLengthFunc PDFParserSDK_GetTextInfoLength = NULL;

PDFParserSDK_GetTextInfoDataFunc PDFParserSDK_GetTextInfoData = NULL;

PDFParserSDK_FreeFunc PDFParserSDK_Free = NULL;

PDFParserSDK_GetPageCountFunc PDFParserSDK_GetPageCount = NULL;

PDFParserSDK_GetAllPagesCountFunc PDFParserSDK_GetAllPagesCount = NULL;

 

typedef HANDLE (WINAPI *VeryPDF_ModifyPDF_OpenFileFunc)(char *lpInPDFFile, char *lpOutPDFFile);

typedef BOOL (WINAPI *VeryPDF_ModifyPDF_CloseFileFunc)(HANDLE hPDF);

typedef BOOL (WINAPI *VeryPDF_ModifyPDF_ModifyTextFunc)(HANDLE hPDF, int nPage, int x, int y, int nWidth, int nHeight, char *lpOldText, char *lpNewText);

typedef void (WINAPI *VeryPDF_ModifyPDF_SetCodeFunc)(char *lpLicenseKey);

 

VeryPDF_ModifyPDF_OpenFileFunc VeryPDF_ModifyPDF_OpenFile = NULL;

VeryPDF_ModifyPDF_CloseFileFunc VeryPDF_ModifyPDF_CloseFile = NULL;

VeryPDF_ModifyPDF_ModifyTextFunc VeryPDF_ModifyPDF_ModifyText = NULL;

VeryPDF_ModifyPDF_SetCodeFunc VeryPDF_ModifyPDF_SetCode = NULL;

 

#define LICENSE_KEY_PDFPARSERSDK "XXXXXXXXXXXXXXXXXXX"

#define LICENSE_KEY_PDFMODIFYSDK "Your License Key for PDF Modify SDK"

 

void GetModulePath(char *out_path,char *in_name)

{

       char *p;

       GetModuleFileName(NULL,out_path,256);

       p =strrchr(out_path,'\\');

       p[1]=0;

       strcat(out_path,in_name);

}

 

int TestPDFParserSDKInMemory(char *lpInFile, char *lpOutFile)

{

       LPBYTE lpData = NULL;

       int nLength = 0;

       FILE *file = fopen(lpInFile,"rb");

       if(!file)

              return -1001;

       nLength = _filelength(fileno(file));

       if(nLength <= 0)

       {

              fclose(file);

              return -1002;

       }

       lpData = new BYTE[nLength];

       if(lpData == NULL)

              return -1003;

       fread(lpData,1,nLength,file);

       fclose(file);

       int nRet = PDFParserSDKFromMemory(lpData, nLength, lpOutFile, "-$ "LICENSE_KEY_PDFPARSERSDK);

       delete []lpData;

      

       return nRet;

}

HMODULE LoadPDFLibrary()

{

       int nRet = 0;

       char szLibPath[_MAX_PATH];

       GetModulePath(szLibPath, "pdfparsersdk2.dll");

       HMODULE dll_handle = LoadLibrary(szLibPath);

       if(dll_handle == NULL)

              return FALSE;

       PDFParserSDK = (PDFParserSDKFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK");

       PDFParserSDKFromMemory = (PDFParserSDKFromMemoryFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDKFromMemory");

       PDFParserSDK_GetHandle = (PDFParserSDK_GetHandleFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetHandle");

       PDFParserSDK_Parse = (PDFParserSDK_ParseFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_Parse");

       PDFParserSDK_GetCount = (PDFParserSDK_GetCountFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetCount");

       PDFParserSDK_GetImageLength = (PDFParserSDK_GetImageLengthFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetImageLength");

       PDFParserSDK_GetImageData = (PDFParserSDK_GetImageDataFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetImageData");

       PDFParserSDK_GetTextInfoLength = (PDFParserSDK_GetTextInfoLengthFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetTextInfoLength");

       PDFParserSDK_GetTextInfoData = (PDFParserSDK_GetTextInfoDataFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetTextInfoData");

       PDFParserSDK_Free = (PDFParserSDK_FreeFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_Free");

       PDFParserSDK_GetPageCount = (PDFParserSDK_GetPageCountFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetPageCount");

       PDFParserSDK_GetAllPagesCount = (PDFParserSDK_GetAllPagesCountFunc)GetProcAddress(dll_handle, "VeryPDF_PDFParserSDK_GetAllPagesCount");

 

       VeryPDF_ModifyPDF_OpenFile = (VeryPDF_ModifyPDF_OpenFileFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_OpenFile");

       VeryPDF_ModifyPDF_CloseFile = (VeryPDF_ModifyPDF_CloseFileFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_CloseFile");

       VeryPDF_ModifyPDF_ModifyText = (VeryPDF_ModifyPDF_ModifyTextFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_ModifyText");

       VeryPDF_ModifyPDF_SetCode = (VeryPDF_ModifyPDF_SetCodeFunc)GetProcAddress(dll_handle, "VeryPDF_ModifyPDF_SetCode");

       return dll_handle;

}

int Test_PDFParserSDK_1(char *pdf_filename, char *out_filename)

{

       int nRet = 0;

       nRet = PDFParserSDK(pdf_filename, out_filename, "-html -$ "LICENSE_KEY_PDFPARSERSDK);

       return nRet;

}

 

int Test_PDFParserSDK_2(char *pdf_filename, char *out_filename)

{

       int nRet = 0;

       nRet = TestPDFParserSDKInMemory(pdf_filename, out_filename);

       return nRet;

}

int Test_PDFParserSDK_3(char *pdf_filename, char *out_filename)

{

       int nRet = 0;

       HANDLE hPDFSDK = PDFParserSDK_GetHandle(pdf_filename, "-$ "LICENSE_KEY_PDFPARSERSDK);

       if(hPDFSDK == NULL)

              return nRet;

       int nCount = PDFParserSDK_GetCount(hPDFSDK);

       for(int i = 0; i < nCount; i++)

       {

              int nImageDataLen = PDFParserSDK_GetImageLength(hPDFSDK, i);

              int nTextInfoLen = PDFParserSDK_GetTextInfoLength(hPDFSDK, i);

              vector<BYTE> vecImgData;

              vector<BYTE> vecTxtData;

              vecImgData.resize(nImageDataLen);

              vecTxtData.resize(nTextInfoLen);

              PDFParserSDK_GetImageData(hPDFSDK, i, vecImgData.begin(), vecImgData.size());

              PDFParserSDK_GetTextInfoData(hPDFSDK, i, vecTxtData.begin(), vecTxtData.size());

       }

       PDFParserSDK_Free(hPDFSDK);

       hPDFSDK = NULL;

       return nRet;

}

int Test_PDFParserSDK_4(char *pdf_filename, char *out_filename, char *lpOptions)

{

       int nRet = 0;

       int nPageCount = PDFParserSDK_GetPageCount(pdf_filename);

       for(int page = 1; page <= nPageCount; page++)

       {

              printf("Parser page %d ...\n", page);

              char szOptions[300];

              sprintf(szOptions, "-$ "LICENSE_KEY_PDFPARSERSDK" -f %d -l %d", page, page);

              if(lpOptions)

              {

                     strcat(szOptions, " ");

                     strcat(szOptions, lpOptions);

              }

              HANDLE hPDFSDK = PDFParserSDK_GetHandle(pdf_filename, szOptions);

              if(hPDFSDK == NULL)

                     break;

              int nCount = PDFParserSDK_GetCount(hPDFSDK);

              for(int i = 0; i < nCount; i++)

              {

                     int nImageDataLen = PDFParserSDK_GetImageLength(hPDFSDK, i);

                     int nTextInfoLen = PDFParserSDK_GetTextInfoLength(hPDFSDK, i);

                     vector<BYTE> vecImgData;

                     vector<BYTE> vecTxtData;

                     vecImgData.resize(nImageDataLen);

                     vecTxtData.resize(nTextInfoLen);

                     PDFParserSDK_GetImageData(hPDFSDK, i, vecImgData.begin(), vecImgData.size());

                     PDFParserSDK_GetTextInfoData(hPDFSDK, i, vecTxtData.begin(), vecTxtData.size());

              }

              PDFParserSDK_Free(hPDFSDK);

              hPDFSDK = NULL;     

       }

       return nRet;

}

 

int Test_PDFParserSDK_ModifyPDF()

{

       char szPDFFile[_MAX_PATH];

       GetModulePath(szPDFFile, "example1.pdf");

       char szOutPDFFile[_MAX_PATH];

       GetModulePath(szOutPDFFile, "_modified.pdf");

      

       VeryPDF_ModifyPDF_SetCode(LICENSE_KEY_PDFMODIFYSDK);

 

       HANDLE hPDF = VeryPDF_ModifyPDF_OpenFile(szPDFFile, szOutPDFFile);

       if(hPDF == NULL)

              return 0;

 

       int nPage = 1;

       //The default DPI for these values is 72DPI

       int dpi = 72;

       int x         = 319*72/dpi;

       int y         = 56*72/dpi;

       int nWidth    = 37*72/dpi;

       int nHeight = 7*72/dpi;

       char *lpOldText = "September";

       char *lpNewText = "VeryPDF";

       BOOL bRet = VeryPDF_ModifyPDF_ModifyText(hPDF, nPage, x, y, nWidth, nHeight, lpOldText, lpNewText);

       VeryPDF_ModifyPDF_CloseFile(hPDF);

       return 1;

}

 

int main(int argc, char* argv[])

{

       if(argc != 3)

       {

              printf("%s test.pdf out.png\n", argv[0]);

              return 0;

       }

       int nRet = 0;

       char *pdf_filename = argv[1];

       char *out_filename = argv[2];

       HMODULE hPDFLib = LoadPDFLibrary();

       if(hPDFLib == NULL)

              return 0;

       int nTime1 = GetTickCount();

       /////////////////////////////////

       nRet = Test_PDFParserSDK_1(pdf_filename, out_filename);

       //nRet = Test_PDFParserSDK_2(pdf_filename, out_filename);

       //nRet = Test_PDFParserSDK_3(pdf_filename, out_filename);

       //nRet = Test_PDFParserSDK_4(pdf_filename, out_filename, NULL);

       //nRet = Test_PDFParserSDK_4(pdf_filename, out_filename, "-noimg");

       nRet = Test_PDFParserSDK_ModifyPDF();

       /////////////////////////////////

       int nTime2 = GetTickCount();

       printf("Spend Time: %dms (%.2fs)\n", nTime2-nTime1, (nTime2-nTime1)/1000.0);

       FreeLibrary(hPDFLib);

       return nRet;

}

 

 

VN:F [1.9.20_1166]
Rating: 0.0/10 (0 votes cast)
VN:F [1.9.20_1166]
Rating: 0 (from 0 votes)

Related Posts

Leave a Reply

Your email address will not be published. Required fields are marked *


Verify Code   If you cannot see the CheckCode image,please refresh the page again!