15 lines
509 B
C#
15 lines
509 B
C#
using System.Text.RegularExpressions;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace BlueLaminate.Scraper.Wiki;
|
|
|
|
/// <summary>Text helpers shared by wiki scrapers.</summary>
|
|
public static class WikiText
|
|
{
|
|
private static readonly Regex Whitespace = new(@"\s+", RegexOptions.Compiled);
|
|
|
|
/// <summary>Decodes HTML entities and collapses whitespace runs to single spaces.</summary>
|
|
public static string Normalize(string raw) =>
|
|
Whitespace.Replace(HtmlEntity.DeEntitize(raw) ?? string.Empty, " ").Trim();
|
|
}
|