使用Solid Framework可以很方便的已编程方式将PDF转换成Word文件格式
首先准备一套Solid Framework
在Visual Studio中建立一个项目并引用SolidFramework.dll
添加命名空间 SolidFramework
using SolidFramework;using SolidFramework.Configuration;using SolidFramework.Converters;using SolidFramework.Converters.Plumbing;
前期准备, 设置路径个许可证
string directoryName = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);char directorySeparatorChar = Path.DirectorySeparatorChar;string str = string.Concat(directoryName, directorySeparatorChar.ToString(), "SolidFramework");Installer.NativePlatformDirectory = str;Installer.ForceUnpack = false;License.Import("Solid Framework", "xxxx", "xxx", "xxxxxxxxxxxxxxx", "NOCALL");
初始化SolidFrame Pdf Converter
//Add the PDF file to convertpdfToWordConverter.AddSourceFile(path);//SettingspdfToWordConverter.ReconstructionMode = option.C_ReconstructionMode;pdfToWordConverter.DetectTables = option.Table_Detection;pdfToWordConverter.OutputType = WordDocumentType.DocX;pdfToWordConverter.HeaderAndFooterMode = option.C_HeaderAndFooterMode;pdfToWordConverter.ImageAnchoringMode=option.C_ImageAnchoringMode;pdfToWordConverter.OverwriteMode = SolidFramework.Plumbing.OverwriteMode.ForceOverwrite;pdfToWordConverter.KeepCharacterSpacing = false;FileInfo fileInfo = new FileInfo(path);pdfToWordConverter.TextRecoveryType = option.Recognize_Text;pdfToWordConverter.OutputDirectory= fileInfo.DirectoryName;pdfToWordConverter.SupportRightToLeftWritingDirection = true;pdfToWordConverter.DetectLists = true;pdfToWordConverter.DetectStyles = true;pdfToWordConverter.DetectToc = true;pdfToWordConverter.MarkupAnnotConversionType = MarkupAnnotConversionType.Never;pdfToWordConverter.TextRecoveryNseType = TextRecoveryNSE.Never;
OCR识别引擎, 这里使用内置引擎
pdfToWordConverter.TextRecoveryEngine = TextRecoveryEngine.SolidOCR;
一切就绪开始转换
pdfToWordConverter.Convert();ConversionStatus status = pdfToWordConverter.Results[0].Status;
关于ConversionStatus
ConversionStatus 定义了多种转换状态IO错误密码错误等
public enum ConversionStatus{Success = 0,Canceled = 1,InternalError = 2,Unknown = 200,Fail = 3,BadData = 5,IOError = 6,IOFileLocked = 7,NotEnoughMemory = 9,FileHasCopyProtection = 10,InvalidPagesRange = 8,UnsupportedEncryptionHandler = 11,MissingCertificate = 12,OCRCanceled = 13,NoTablesToExtract = 0xF,NoImagesToExtract = 0x10,NoBppConversion = 150,NoGrayscale = 151,PSDUnsupportedMode = 152,PdfAError = 20,PdfAFatalError = 21,CanceledExists = 14,WrongPassword = 0x1F,NoUserNoOwner = 0x20,NoUserOwner = 33,UserNoOwner = 34,UserOwner = 35,InvalidLicense = 36,AlreadyLoaded = 30,UnavailableAction = 4}
关于输出格式
pdfToWordConverter.OutputType = WordDocumentType.DocX;
可以是Doc或Docx
执行后
就可以将PDF转换成Word了