SensitiveWordUtility.cs 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. /// <summary>
  2. /// Copyright (c) 2025 MirzkisD1Ex0 All rights reserved.
  3. /// Code Version 1.5.2
  4. /// </summary>
  5. using System.Collections;
  6. using System.Collections.Generic;
  7. using System.Text;
  8. using Newtonsoft.Json;
  9. using System.IO;
  10. using UnityEngine;
  11. namespace ToneTuneToolkit.Data
  12. {
  13. public class SensitiveWordUtility : MonoBehaviour
  14. {
  15. private const string END_FLAG = "IsEnd";
  16. private static Hashtable hashtable = new Hashtable();
  17. private List<string> lexicon = new List<string>();
  18. private string sensitiveWordsConfigPath = Application.streamingAssetsPath + "/configs/sensitivewords.json";
  19. // ==================================================
  20. private void Awake() => Init();
  21. // ==================================================
  22. private void Init()
  23. {
  24. lexicon = JsonConvert.DeserializeObject<List<string>>(File.ReadAllText(sensitiveWordsConfigPath, Encoding.UTF8));
  25. InitLexicon(lexicon);
  26. return;
  27. }
  28. // ==================================================
  29. /// <summary>
  30. /// 初始化词库
  31. /// </summary>
  32. public static void InitLexicon(List<string> lexicon)
  33. {
  34. hashtable = new Hashtable(lexicon.Count);
  35. foreach (string word in lexicon)
  36. {
  37. Hashtable tempHashtable = hashtable;
  38. for (int i = 0; i < word.Length; i++)
  39. {
  40. char c = word[i];
  41. if (IsSymbol(c)) { continue; }
  42. if (tempHashtable.ContainsKey(c))
  43. {
  44. tempHashtable = (Hashtable)tempHashtable[c];
  45. }
  46. else
  47. {
  48. var newHashtable = new Hashtable
  49. {
  50. { END_FLAG, 0 }
  51. };
  52. tempHashtable.Add(c, newHashtable);
  53. tempHashtable = newHashtable;
  54. }
  55. if (i == word.Length - 1)
  56. {
  57. if (tempHashtable.ContainsKey(END_FLAG))
  58. {
  59. tempHashtable[END_FLAG] = 1;
  60. }
  61. else
  62. {
  63. tempHashtable.Add(END_FLAG, 1);
  64. }
  65. }
  66. }
  67. }
  68. return;
  69. }
  70. public static bool CheckSensitiveWords(string text)
  71. {
  72. List<string> result = new List<string>();
  73. for (int i = 0; i < text.Length; i++)
  74. {
  75. int length = SearchSensitiveWord(text, i);
  76. if (length > 0)
  77. {
  78. result.Add(text.Substring(i, length));
  79. i = i + length - 1;
  80. return true;
  81. }
  82. }
  83. return false;
  84. }
  85. /// <summary>
  86. /// 查找所有敏感词,找到则返回敏感词长度
  87. /// </summary>
  88. /// <param name="text">需要过滤的字符串</param>
  89. /// <param name="startIndex">查找的起始位置</param>
  90. /// <returns></returns>
  91. public static int SearchSensitiveWord(string text, int startIndex)
  92. {
  93. Hashtable newMap = hashtable;
  94. bool flag = false;
  95. int len = 0;
  96. for (int i = startIndex; i < text.Length; i++)
  97. {
  98. char word = text[i];
  99. if (IsSymbol(word))
  100. {
  101. len++;
  102. continue;
  103. }
  104. Hashtable temp = (Hashtable)newMap[word];
  105. if (temp != null)
  106. {
  107. if ((int)temp[END_FLAG] == 1) flag = true;
  108. else newMap = temp;
  109. len++;
  110. }
  111. else break;
  112. }
  113. if (!flag) len = 0;
  114. return len;
  115. }
  116. /// <summary>
  117. /// 找到内容字符串内所有敏感词
  118. /// </summary>
  119. /// <param name="text">需要处理的文本</param>
  120. /// <returns></returns>
  121. public static List<string> GetAllSensitiveWords(string text)
  122. {
  123. List<string> result = new List<string>();
  124. for (int i = 0; i < text.Length; i++)
  125. {
  126. int length = SearchSensitiveWord(text, i);
  127. if (length > 0)
  128. {
  129. result.Add(text.Substring(i, length));
  130. i = i + length - 1;
  131. }
  132. }
  133. return result;
  134. }
  135. /// <summary>
  136. /// 替换 需要剔除的 敏感字
  137. /// </summary>
  138. /// <param name="text">需要处理的文本</param>
  139. /// <returns></returns>
  140. public static string ReplaceSensitiveWords(string text)
  141. {
  142. int i = 0;
  143. StringBuilder builder = new StringBuilder(text);
  144. while (i < text.Length)
  145. {
  146. int len = SearchSensitiveWord(text, i);
  147. if (len > 0)
  148. {
  149. for (int j = 0; j < len; j++)
  150. {
  151. builder[i + j] = '*';
  152. }
  153. i += len;
  154. }
  155. else ++i;
  156. }
  157. return builder.ToString();
  158. }
  159. /// <summary>
  160. /// 判断是否是一个符号
  161. /// </summary>
  162. /// <param name="c"></param>
  163. /// <returns></returns>
  164. private static bool IsSymbol(char c)
  165. {
  166. int ic = c;
  167. // 0x2E80-0x9FFF 东亚文字范围
  168. return !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) && (ic < 0x2E80 || ic > 0x9FFF);
  169. }
  170. }
  171. }