惯性聚合 高效追踪和阅读你感兴趣的博客、新闻、科技资讯
阅读原文 在惯性聚合中打开

推荐订阅源

L
LINUX DO - 热门话题
Stack Overflow Blog
Stack Overflow Blog
B
Blog
WordPress大学
WordPress大学
Project Zero
Project Zero
P
Palo Alto Networks Blog
阮一峰的网络日志
阮一峰的网络日志
博客园 - 司徒正美
有赞技术团队
有赞技术团队
S
SegmentFault 最新的问题
freeCodeCamp Programming Tutorials: Python, JavaScript, Git & More
小众软件
小众软件
T
Tailwind CSS Blog
Forbes - Security
Forbes - Security
F
Full Disclosure
SecWiki News
SecWiki News
钛媒体:引领未来商业与生活新知
钛媒体:引领未来商业与生活新知
Hacker News: Ask HN
Hacker News: Ask HN
C
Check Point Blog
Microsoft Security Blog
Microsoft Security Blog
Threat Intelligence Blog | Flashpoint
Threat Intelligence Blog | Flashpoint
F
Fortinet All Blogs
Cisco Talos Blog
Cisco Talos Blog
G
Google Developers Blog
J
Java Code Geeks
Google DeepMind News
Google DeepMind News
人人都是产品经理
人人都是产品经理
CTFtime.org: upcoming CTF events
CTFtime.org: upcoming CTF events
Recorded Future
Recorded Future
O
OpenAI News
Spread Privacy
Spread Privacy
MongoDB | Blog
MongoDB | Blog
H
Hackread – Cybersecurity News, Data Breaches, AI and More
C
Cybersecurity and Infrastructure Security Agency CISA
S
Securelist
V
Vulnerabilities – Threatpost
Y
Y Combinator Blog
IT之家
IT之家
U
Unit 42
腾讯CDC
S
Security Affairs
C
Cisco Blogs
Schneier on Security
Schneier on Security
The Last Watchdog
The Last Watchdog
B
Blog RSS Feed
宝玉的分享
宝玉的分享
cs.AI updates on arXiv.org
cs.AI updates on arXiv.org
S
Security @ Cisco Blogs
Cyberwarzone
Cyberwarzone
T
The Blog of Author Tim Ferriss

博客园 - litsword

[转]IDENT_CURRENT、SCOPE_IDENTITY、@@IDENTITY 差異對照表 [转贴]TFS Power Tools–September 2010 Release SQL 中使用正则表达式过滤字母或数字 [转载]记不住ASP.NET页面生命周期的苦恼 Split Full Name as First and Last HTML 合并单元格示例 动态加载配置文件 [转载]实现PadLeft的SQL脚本 Useful SQL script 在Update 和 Delete语句中使用 Inner Join SQL游标遍历时的变量赋值 SQL Date Time format SCOPE_IDENTITY、IDENT_CURRENT 和 @@IDENTITY 的区别 Union合并数组(去掉重复的项目) Winform 版本信息 Sql语句 生日提醒 事件的定义 SQL数据类型nchar,char,varchar与nvarchar区别 JavaScript里面三个等号和两个等号的区别
【原创】RSS开发心得小结
litsword · 2010-10-14 · via 博客园 - litsword

几经面试和简历更新,发现自己做了这么久的开发,却少于做总结,一个个项目过去了,但是知识的累积沉淀却很少……

借着这次机会,把以前的skill整理一下,浓缩的才是精华。为自己也为其他初学的朋友做个参考。

RSS(全称Really Simple Syndication) 目前广泛用于网上新闻频道,blog和wiki,主要的版本有0.91, 1.0, 2.0。

另外还有Google制定的ATOM格式,以及作为Feed集合的OPML文件。

最常见的Feed格式是Rss1.0,2.0和ATOM,解析时通过不同的命名空间来处理不同的版本,下面是解析的主要代码:

代码

/// <summary>
/// 根据xml内容解析Feed
/// </summary>
/// <param name="url">Feed源地址</param>
/// <param name="xmlContent">Feed xml 内容</param>
/// <returns>返回解析后的Feed对象实例</returns>
public static Feed AnalyseFeedContent(string url, string xmlContent)
{
Feed feed
= new Feed();
feed.Url
= url;
feed.ChannelInfo
= new FeedChannel();

XmlDocument doc

= new XmlDocument();
doc
= ReadGlobals.LoadXml(doc, xmlContent, url);// 添加常用的命名空间
XmlNamespaceManager mgr = new XmlNamespaceManager(doc.NameTable);
mgr.AddNamespace(
"rdf", "http://purl.org/rss/1.0/");
mgr.AddNamespace(
"content", "http://purl.org/rss/1.0/modules/content/");
mgr.AddNamespace(
"dc", "http://purl.org/dc/elements/1.1/");
XmlNode nodeRoot
= doc.DocumentElement;
XmlNode nodeChannel;
XmlNodeList nodeList;
try
{
if (nodeRoot != null)
{
// enclosure地址
if (nodeRoot.Attributes["xmlns:enc"] != null)
{
mgr.AddNamespace(
"enc", nodeRoot.Attributes["xmlns:enc"].Value);
}
else
{
mgr.AddNamespace(
"enc", "http://crocodile.org/ns/rss/2.0/enclosures");
}
// trackback地址
if (nodeRoot.Attributes["xmlns:trackback"] != null)
{
mgr.AddNamespace(
"trackback", nodeRoot.Attributes["xmlns:trackback"].Value);
}
else
{
mgr.AddNamespace(
"trackback", "http://madskills.com/public/xml/rss/module/trackback/");
}
if (nodeRoot.Name.ToLower() == "opml")
{
// 是opml文件
feed.Type = FeedType.OPML;
}
if (nodeRoot.Name.ToLower() == "feed")
{
// 是atom文件
feed.Type = FeedType.ATOM_0_3;string strAtomNameSpace = "http://www.w3.org/2005/Atom";
// 以头部的命名空间为准;
if (nodeRoot.Attributes["xmlns"] != null)
{
strAtomNameSpace
= nodeRoot.Attributes["xmlns"].Value;
}
mgr.AddNamespace(
"atom", strAtomNameSpace);

feed.ChannelInfo

= GetChannel(doc, mgr, "atom");

nodeList

= doc.SelectNodes("//atom:entry", mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}
if (nodeRoot.Name.ToLower() == "rdf:rdf")
{
// 是rss1.0文件
feed.Type = FeedType.RSS_1_0;
feed.ChannelInfo
= GetChannel(doc, mgr, "rdf");

nodeList

= doc.SelectNodes("//rdf:item", mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}
if (nodeRoot.Name.ToLower() == "rss")
{
// 是rss2.0文件
feed.Type = FeedType.RSS_2_0;

nodeChannel

= doc.SelectSingleNode("rss/channel");
feed.ChannelInfo
= GetChannelForRss20(nodeChannel);

nodeList

= nodeChannel.SelectNodes("item", mgr);
feed.Items
= GetItems(nodeList, feed.Type, mgr);
}
}
}
catch (Exception ex)
{
Log.Write(ex);
}
return feed;
}

 Feed主要分为两部分,Channel和ItemList部分,分别用如下方法解析:

FeedChannel解析

/// <summary>
/// 获取 FeedChannel 数据
/// </summary>
private static FeedChannel GetChannel(XmlDocument xdtDoc, XmlNamespaceManager mgr, string prefix)
{
FeedChannel channel
= new FeedChannel();if (xdtDoc != null)
{
string title = string.Empty, generator = string.Empty, link = string.Empty, description = string.Empty, language = string.Empty;
string managingeditor = string.Empty, webmaster = string.Empty, copyright = string.Empty, pubdate = string.Empty, lastbuilddate = string.Empty;string strPrefix = prefix;

XmlNode xneLogo

= xdtDoc.SelectSingleNode("//" + strPrefix + ":logo", mgr);
if (xneLogo != null)
{
channel.Logo.Src
= xneLogo.InnerText;
}

XmlNode snTitle

= xdtDoc.SelectSingleNode("//" + strPrefix + ":title", mgr);
if (snTitle != null)
{
channel.Title
= snTitle.InnerText;
}

XmlNode snLink

= xdtDoc.SelectSingleNode("//" + strPrefix + ":link[@rel='alternate']/@href", mgr);
if (snLink != null)
{
channel.Link
= snLink.InnerText;
}
else
{
channel.Link
= xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr) == null ? string.Empty : xdtDoc.SelectSingleNode("//" + strPrefix + ":link", mgr).InnerText;
}

XmlNode snDescription

= xdtDoc.SelectSingleNode("//" + strPrefix + ":tagline", mgr);
if (snDescription != null)
{
channel.Description
= snDescription.InnerText;
}

XmlNode snLanguage

= xdtDoc.SelectSingleNode("//" + strPrefix + ":feed/@xml:lang", mgr);
if (snLanguage != null)
{
try
{
Thread.CurrentThread.CurrentUICulture
= new CultureInfo(snLanguage.InnerText);
channel.Language
= CultureInfo.CreateSpecificCulture(language).LCID;
}
catch
{
channel.Language
= 0;
}
}
}
return channel;
}

FeedItem解析

/// <summary>
/// 获取item列表
/// </summary>
/// <param name="xnlItems"></param>
public static List<FeedItem> GetItems(XmlNodeList xnlItems, FeedType type, XmlNamespaceManager mgr)
{
List
<FeedItem> lstItems = new List<FeedItem>();
if (xnlItems != null)
{
switch (type)
{
case FeedType.RSS_1_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode("rdf:title", mgr) != null ? xne.SelectSingleNode("rdf:title", mgr).InnerText : string.Empty;
fim.Link
= xne.SelectSingleNode("rdf:link", mgr) != null ? xne.SelectSingleNode("rdf:link", mgr).InnerText : string.Empty;
//rim.Description = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
string strDescription = xne.SelectSingleNode("rdf:description", mgr) != null ? xne.SelectSingleNode("rdf:description", mgr).InnerText : string.Empty;
fim.Description
= strContent == string.Empty ? strDescription : strContent;
fim.Description
= fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
string strAuthorName = xne.SelectSingleNode("//rdf:author", mgr) != null ? xne.SelectSingleNode("//rdf:author", mgr).InnerText : string.Empty;
if (strAuthorName != string.Empty)
{
strAuthorName
= xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
}
if (strAuthorName != string.Empty)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= strAuthorName;
}
fim.PubDate
= xne.SelectSingleNode("rdf:pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("rdf:pubDate", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlSubjects
= xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + ",";
}
}
XmlNode xndEnclosure
= xne.SelectSingleNode("enclosure", mgr);
if (xndEnclosure != null)
{
fim.Enclosures
= new List<FeedEnclosure>();
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
case FeedType.RSS_2_0:
foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode("title", mgr) != null ? xne.SelectSingleNode("title", mgr).InnerText : string.Empty;
fim.Link
= xne.SelectSingleNode("link", mgr) != null ? xne.SelectSingleNode("link", mgr).InnerText : string.Empty;
string strContent = xne.SelectSingleNode("content:encoded", mgr) != null ? xne.SelectSingleNode("content:encoded", mgr).InnerText : string.Empty;
string strDescription = xne.SelectSingleNode("description", mgr) != null ? xne.SelectSingleNode("description", mgr).InnerText : string.Empty;
fim.Description
= strContent == string.Empty ? strDescription : strContent;
fim.Description
= fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);
string strAuthorName = xne.SelectSingleNode("author", mgr) != null ? xne.SelectSingleNode("author", mgr).InnerText : string.Empty;
if (strAuthorName != string.Empty)
{
strAuthorName
= xne.SelectSingleNode("dc:creator", mgr) != null ? xne.SelectSingleNode("dc:creator", mgr).InnerText : string.Empty;
}
if (strAuthorName != string.Empty)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= strAuthorName;
}
fim.PubDate
= xne.SelectSingleNode("pubDate", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("pubDate", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("dc:date", mgr) != null ? GetDateTimeByUrl(fim.Link,xne.SelectSingleNode("dc:date", mgr).InnerText) : DateTime.MinValue;
}
fim.Guid
= xne.SelectSingleNode("guid", mgr) != null ? xne.SelectSingleNode("guid", mgr).InnerText : string.Empty;
XmlNodeList xnlSubjects
= xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + ",";
}
}
XmlNodeList xnlCategorys
= xne.SelectNodes("category", mgr);
if (xnlCategorys != null)
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category
+= xnlCategory != null ? xnlCategory.InnerText : string.Empty;
fim.Category
+= ",";
}
}
XmlNode xndEnclosure
= xne.SelectSingleNode("enclosure", mgr);
if (xndEnclosure != null)
{
fim.Enclosures
= new List<FeedEnclosure>();
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes["url"] != null ? xndEnclosure.Attributes["url"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
case FeedType.ATOM_0_3:foreach (XmlNode xne in xnlItems)
{
FeedItem fim
= new FeedItem();
fim.Title
= xne.SelectSingleNode("atom:title", mgr) != null ? xne.SelectSingleNode("atom:title", mgr).InnerText : string.Empty;
fim.Link
= xne.SelectSingleNode("atom:link[@rel='alternate']", mgr) != null ? xne.SelectSingleNode("atom:link[@rel='alternate']", mgr).Attributes["href"].InnerText : string.Empty;
fim.Summary
= xne.SelectSingleNode("atom:summary", mgr) != null ? xne.SelectSingleNode("atom:summary", mgr).InnerText : string.Empty;
fim.Description
= xne.SelectSingleNode("atom:content", mgr) != null ? xne.SelectSingleNode("atom:content", mgr).InnerText : string.Empty;
fim.Description
= fim.Description.Replace("<![CDATA[", string.Empty).Replace("]]>", string.Empty);

fim.Guid

= xne.SelectSingleNode("atom:id", mgr) != null ? xne.SelectSingleNode("atom:id", mgr).InnerText : string.Empty;
fim.Contributor
= xne.SelectSingleNode("atom:contributor", mgr) != null ? xne.SelectSingleNode("atom:contributor", mgr).InnerText : string.Empty;
XmlNode xneAuthor
= xne.SelectSingleNode("atom:author", mgr);
if (xneAuthor != null)
{
fim.Author
= new FeedPerson();
fim.Author.Name
= xneAuthor.SelectSingleNode("atom:name", mgr) != null ? xneAuthor.SelectSingleNode("atom:name", mgr).InnerText : string.Empty;
fim.Author.Url
= xneAuthor.SelectSingleNode("atom:uri", mgr) != null ? xneAuthor.SelectSingleNode("atom:uri", mgr).InnerText : string.Empty;
fim.Author.Email
= xneAuthor.SelectSingleNode("atom:email", mgr) != null ? xneAuthor.SelectSingleNode("atom:email", mgr).InnerText : string.Empty;
}
fim.UpdateDate
= xne.SelectSingleNode("atom:updated", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:updated", mgr).InnerText) : DateTime.MinValue;
if (fim.UpdateDate == DateTime.MinValue)
{
fim.UpdateDate
= xne.SelectSingleNode("atom:modified", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:modified", mgr).InnerText) : DateTime.MinValue;
}
fim.PubDate
= xne.SelectSingleNode("atom:published", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:published", mgr).InnerText) : DateTime.MinValue;
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("atom:issued", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:issued", mgr).InnerText) : DateTime.MinValue;
}
if (fim.PubDate == DateTime.MinValue)
{
fim.PubDate
= xne.SelectSingleNode("atom:created", mgr) != null ? GetDateTimeByUrl(fim.Link, xne.SelectSingleNode("atom:created", mgr).InnerText) : DateTime.MinValue;
}
XmlNodeList xnlTags
= xne.SelectNodes("dc:subject", mgr);
XmlNodeList xnlSubjects
= xne.SelectNodes("dc:subject", mgr);
if (xnlSubjects != null)
{
foreach (XmlNode xnlSubject in xnlSubjects)
{
fim.Subject
+= xnlSubject.InnerText + ",";
}
}
XmlNodeList xnlCategorys
= xne.SelectNodes("atom:category", mgr);
if (xnlCategorys != null)
{
foreach (XmlNode xnlCategory in xnlCategorys)
{
fim.Category
+= xnlCategory.Attributes["term"] != null ? xnlCategory.Attributes["term"].Value : string.Empty;
fim.Category
+= ",";
}
}
XmlNodeList xnlEnclosures
= xne.SelectNodes("atom:link[@rel='enclosure']", mgr);
if (xnlEnclosures != null)
{
fim.Enclosures
= new List<FeedEnclosure>();
foreach (XmlNode xndEnclosure in xnlEnclosures)
{
FeedEnclosure enc
= new FeedEnclosure();
enc.Type
= xndEnclosure.Attributes["type"] != null ? xndEnclosure.Attributes["type"].Value : string.Empty;
string strLength = xndEnclosure.Attributes["length"] != null ? xndEnclosure.Attributes["length"].Value : string.Empty;
try
{
enc.Length
= Convert.ToInt32(strLength);
}
catch { }
enc.Url
= xndEnclosure.Attributes["href"] != null ? xndEnclosure.Attributes["href"].Value : string.Empty;
enc.Title
= xndEnclosure.Attributes["title"] != null ? xndEnclosure.Attributes["title"].Value : string.Empty;
fim.Enclosures.Add(enc);
}
}
fim.Description
+= GetHtmlByByEnclosure(fim.Enclosures);
fim.TrackbackPing
= xne.SelectSingleNode("trackback:ping", mgr) != null ? xne.SelectSingleNode("trackback:ping", mgr).InnerText : string.Empty;
fim.Rights
= xne.SelectSingleNode("atom:rights", mgr) != null ? xne.SelectSingleNode("atom:rights", mgr).InnerText : string.Empty;
lstItems.Add(fim);
}
break;
default:
return null;
}
}
return lstItems;

}