1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > C#实现将HTML转换成纯文本办法

C#实现将HTML转换成纯文本办法

时间:2023-08-07 03:30:58

相关推荐

C#实现将HTML转换成纯文本办法

后端开发|C#.Net教程

C# HTML 纯文本

后端开发-C#.Net教程

本文实例讲述了C#实现将HTML转换成纯文本的方法。分享给大家供大家参考。具体如下:

equalsbuilder 源码,ubuntu.vhd引导,tomcat需要密码登录么,爬虫软件 用途,php 近7天,电商运营工作流程图seolzw

使用方法:

vb ftp winsock 源码,整合vscode,ubuntu iso源,重置tomcat,sqlite非空,远程服务器登陆设置工具,shopex分页插件,前端框架+饼图,java微博爬虫,php可逆加密函数,seo培训西安,国外优秀购物网站,网页屏幕上雪花飘落动画特效,bs网站模板,登录页面气泡特效,java 进存管理系统,模拟退火算法matlab程序lzw

HtmlToText convert = new HtmlToText();textBox2.Text = convert.Convert(textBox1.Text);

C#代码如下:

thinkphp 进销存源码,ubuntu解压egg文件,电商直播爬虫,.ashx .php,seo薪资架构lzw

/// class HtmlToText{ // Static data tables protected static Dictionary _tags; protected static HashSet _ignoreTags; // Instance variables protected TextBuilder _text; protected string _html; protected int _pos; // Static constructor (one time only) static HtmlToText() { _tags = new Dictionary(); _tags.Add("address", "\n"); _tags.Add("blockquote", "\n"); _tags.Add("div", "\n"); _tags.Add("dl", "\n"); _tags.Add("fieldset", "\n"); _tags.Add("form", "\n"); _tags.Add("h1", "\n"); _tags.Add("/h1", "\n"); _tags.Add("h2", "\n"); _tags.Add("/h2", "\n"); _tags.Add("h3", "\n"); _tags.Add("/h3", "\n"); _tags.Add("h4", "\n"); _tags.Add("/h4", "\n"); _tags.Add("h5", "\n"); _tags.Add("/h5", "\n"); _tags.Add("h6", "\n"); _tags.Add("/h6", "\n"); _tags.Add("p", "\n"); _tags.Add("/p", "\n"); _tags.Add("table", "\n"); _tags.Add("/table", "\n"); _tags.Add("ul", "\n"); _tags.Add("/ul", "\n"); _tags.Add("ol", "\n"); _tags.Add("/ol", "\n"); _tags.Add("/li", "\n"); _tags.Add("br", "\n"); _tags.Add("/td", "\t"); _tags.Add("/tr", "\n"); _tags.Add("/pre", "\n"); _ignoreTags = new HashSet(); _ignoreTags.Add("script"); _ignoreTags.Add("noscript"); _ignoreTags.Add("style"); _ignoreTags.Add("object"); } /// /// HTML to be converted /// Resulting plain text public string Convert(string html) { // Initialize state variables _text = new TextBuilder(); _html = html; _pos = 0; // Process input while (!EndOfText) {if (Peek() == <){ // HTML tag bool selfClosing; string tag = ParseTag(out selfClosing); // Handle special tag cases if (tag == "body") {// Discard content before _text.Clear(); } else if (tag == "/body") {// Discard content after _pos = _html.Length; } else if (tag == "pre") {// Enter preformatted mode_text.Preformatted = true;EatWhitespaceToNextLine(); } else if (tag == "/pre") {// Exit preformatted mode_text.Preformatted = false; } string value; if (_tags.TryGetValue(tag, out value))_text.Write(value); if (_ignoreTags.Contains(tag))EatInnerContent(tag);}else if (Char.IsWhiteSpace(Peek())){ // Whitespace (treat all as space) _text.Write(_text.Preformatted ? Peek() : ); MoveAhead();}else{ // Other text _text.Write(Peek()); MoveAhead();} } // Return result return HttpUtility.HtmlDecode(_text.ToString()); } // Eats all characters that are part of the current tag // and returns information about that tag protected string ParseTag(out bool selfClosing) { string tag = String.Empty; selfClosing = false; if (Peek() == \) MoveAhead();tag = _html.Substring(start, _pos - start).ToLower();// Parse rest of tagwhile (!EndOfText && Peek() != >){ if (Peek() == \" || Peek() == \\)EatQuotedValue(); else {if (Peek() == /) selfClosing = true;MoveAhead(); }}MoveAhead(); } return tag; } // Consumes inner content from the current tag protected void EatInnerContent(string tag) { string endTag = "/" + tag; while (!EndOfText) {if (Peek() == = _html.Length); } } // Safely returns the character at the current position protected char Peek() { return (_pos < _html.Length) ? _html[_pos] : (char)0; } // Safely advances to current position to the next character protected void MoveAhead() { _pos = Math.Min(_pos + 1, _html.Length); } // Moves the current position to the next non-whitespace // character. protected void EatWhitespace() { while (Char.IsWhiteSpace(Peek()))MoveAhead(); } // Moves the current position to the next non-whitespace // character or the start of the next line, whichever // comes first protected void EatWhitespaceToNextLine() { while (Char.IsWhiteSpace(Peek())) {char c = Peek();MoveAhead();if (c == \ ) break; } } // Moves the current position past a quoted value protected void EatQuotedValue() { char c = Peek(); if (c == \" || c == \\) {// Opening quoteMoveAhead();// Find end of valueint start = _pos;_pos = _html.IndexOfAny(new char[] { c, \ , \ }, _pos);if (_pos < 0) _pos = _html.Length;else MoveAhead(); // Closing quote } } /// protected class TextBuilder { private StringBuilder _text; private StringBuilder _currLine; private int _emptyLines; private bool _preformatted; // Construction public TextBuilder() {_text = new StringBuilder();_currLine = new StringBuilder();_emptyLines = 0;_preformatted = false; } ///public bool Preformatted {get{ return _preformatted;}set{ if (value) {// Clear line buffer if changing to// preformatted modeif (_currLine.Length > 0) FlushCurrLine();_emptyLines = 0; } _preformatted = value;} } ///public void Clear() {_text.Length = 0;_currLine.Length = 0;_emptyLines = 0; } //////public void Write(string s) {foreach (char c in s) Write(c); } ////// Character to write public void Write(char c) {if (_preformatted){ // Write preformatted character _text.Append(c);}else{ if (c == \ ) {// Ignore carriage returns. Well process// \ if it comes next } else if (c == \ ) {// Flush current lineFlushCurrLine(); } else if (Char.IsWhiteSpace(c)) {// Write single space characterint len = _currLine.Length;if (len == 0 || !Char.IsWhiteSpace(_currLine[len - 1])) _currLine.Append( ); } else {// Add character to current line_currLine.Append(c); }} } // Appends the current line to output buffer protected void FlushCurrLine() {// Get current linestring line = _currLine.ToString().Trim();// Determine if line contains non-space charactersstring tmp = line.Replace(" ", String.Empty);if (tmp.Length == 0){ // An empty line _emptyLines++; if (_emptyLines 0)_text.AppendLine(line);}else{ // A non-empty line _emptyLines = 0; _text.AppendLine(line);}// Reset current line_currLine.Length = 0; } ///public override string ToString() {if (_currLine.Length > 0) FlushCurrLine();return _text.ToString(); } }}

希望本文所述对大家的C#程序设计有所帮助。

更多C#实现将HTML转换成纯文本的方法相关文章请关注PHP中文网!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。