src/encode/Unicode.js

/**
 * The script is part of Mojix.
 *
 * AUTHOR:
 *  natade (http://twitter.com/natadea)
 *
 * LICENSE:
 *  The MIT license https://opensource.org/licenses/MIT
 */

/**
 * 制御文字マップ
 * @type {Record<number, string>}
 * @ignore
 */
let control_charcter_map = null;

/**
 * コードポイントからUnicodeのブロック名に変換する
 * @type {(codepoint: number) => (string)}
 * @ignore
 */
let toBlockNameFromUnicode = null;

/**
 * コードポイントから異体字セレクタの判定をする
 * @type {(codepoint: number, annotate?: boolean) => (string|null)}
 * @ignore
 */
let getVariationSelectorsnumberFromCodePoint = null;

/**
 * コードポイントからタグ文字の判定をする
 * @type {(codepoint: number) => (string|null)}
 * @ignore
 */
let getTagCharacterFromCodePoint = null;

/**
 * Unicode を扱うクラス
 * @ignore
 */
export default class Unicode {
	/**
	 * 初期化
	 */
	static init() {
		if (Unicode.is_initmap) {
			return;
		}
		Unicode.is_initmap = true;

		/**
		 * 制御文字、VS、タグ文字は多いため含めていない
		 */
		// prettier-ignore
		control_charcter_map = {
			// --- C0 control characters (ASCII 0x00–0x1F) ---
			0: "NUL", // Null
			1: "SOH", // Start of Heading
			2: "STX", // Start of Text
			3: "ETX", // End of Text
			4: "EOT", // End of Transmission
			5: "ENQ", // Enquiry
			6: "ACK", // Acknowledge
			7: "BEL", // Bell (beep)

			8: "BS",  // Backspace
			9: "HT",  // Horizontal Tab
			10: "LF",  // Line Feed
			11: "VT",  // Vertical Tab
			12: "FF",  // Form Feed
			13: "CR",  // Carriage Return
			14: "SO",  // Shift Out
			15: "SI",  // Shift In

			16: "DLE", // Data Link Escape
			17: "DC1", // Device Control 1 (XON)
			18: "DC2", // Device Control 2
			19: "DC3", // Device Control 3 (XOFF)
			20: "DC4", // Device Control 4
			21: "NAK", // Negative Acknowledge
			22: "SYN", // Synchronous Idle
			23: "ETB", // End of Transmission Block

			24: "CAN", // Cancel
			25: "EM",  // End of Medium
			26: "SUB", // Substitute
			27: "ESC", // Escape
			28: "FS",  // File Separator
			29: "GS",  // Group Separator
			30: "RS",  // Record Separator
			31: "US",  // Unit Separator

			// --- DEL ---
			127: "DEL", // Delete

			// --- C1 control characters (ISO/IEC 6429, 0x80–0x9F) ---
			128: "PAD", // Padding Character
			129: "HOP", // High Octet Preset
			130: "BPH", // Break Permitted Here
			131: "NBH", // No Break Here
			132: "IND", // Index
			133: "NEL", // Next Line
			134: "SSA", // Start of Selected Area
			135: "ESA", // End of Selected Area
			136: "HTS", // Horizontal Tab Set
			137: "HTJ", // Horizontal Tab with Justification
			138: "VTS", // Vertical Tab Set
			139: "PLD", // Partial Line Down
			140: "PLU", // Partial Line Up
			141: "RI",  // Reverse Index
			142: "SS2", // Single Shift 2
			143: "SS3", // Single Shift 3
			144: "DCS", // Device Control String
			145: "PU1", // Private Use 1
			146: "PU2", // Private Use 2
			147: "STS", // Set Transmit State
			148: "CCH", // Cancel Character
			149: "MW",  // Message Waiting
			150: "SPA", // Start of Protected Area
			151: "EPA", // End of Protected Area
			152: "SOS", // Start of String
			153: "SGCI", // Single Graphic Character Introducer
			154: "SCI", // Single Character Introducer
			155: "CSI", // Control Sequence Introducer
			156: "ST",  // String Terminator
			157: "OSC", // Operating System Command
			158: "PM",  // Privacy Message
			159: "APC", // Application Program Command

			// --- Unicode but制御的に扱われる文字 ---
			160: "NBSP", // No-Break Space（表示は空白だが改行不可）
			173: "SHY",  // Soft Hyphen（通常は表示されない）

			// --- Unicode Interlinear Annotation ---
			65529: "IAA", // Interlinear Annotation Anchor
			65530: "IAS", // Interlinear Annotation Separator
			65531: "IAT", // Interlinear Annotation Terminator

			// Zero Width / Joiner 系（Cf）
			0x200B: "ZWSP",   // ZERO WIDTH SPACE ゼロ幅スペース
			0x200C: "ZWNJ",   // ZERO WIDTH NON-JOINER ゼロ幅非接合子
			0x200D: "ZWJ",    // ZERO WIDTH JOINER ゼロ幅接合子
			0x2060: "WJ",     // WORD JOINER 単語結合子
			0xFEFF: "BOM",    // BYTE ORDER MARK / ZERO WIDTH NO-BREAK SPACE

			// 双方向（BiDi）制御文字
			0x202A: "LRE",    // LEFT-TO-RIGHT EMBEDDING
			0x202B: "RLE",    // RIGHT-TO-LEFT EMBEDDING
			0x202C: "PDF",    // POP DIRECTIONAL FORMATTING
			0x202D: "LRO",    // LEFT-TO-RIGHT OVERRIDE
			0x202E: "RLO",    // RIGHT-TO-LEFT OVERRIDE

			0x2066: "LRI",    // LEFT-TO-RIGHT ISOLATE
			0x2067: "RLI",    // RIGHT-TO-LEFT ISOLATE
			0x2068: "FSI",    // FIRST STRONG ISOLATE
			0x2069: "PDI",    // POP DIRECTIONAL ISOLATE

			// Unicode Noncharacter（検証・防御用途）
			0xFFFE: "NONCHAR_FFFE",
			0xFFFF: "NONCHAR_FFFF"
		};

		// prettier-ignore
		const unicode_blockname_array = [
			"Basic Latin", "Latin-1 Supplement", "Latin Extended-A", "Latin Extended-B", "IPA Extensions", "Spacing Modifier Letters", "Combining Diacritical Marks", "Greek and Coptic",
			"Cyrillic", "Cyrillic Supplement", "Armenian", "Hebrew", "Arabic", "Syriac", "Arabic Supplement", "Thaana",
			"NKo", "Samaritan", "Mandaic", "Syriac Supplement", "Arabic Extended-B", "Arabic Extended-A", "Devanagari", "Bengali",
			"Gurmukhi", "Gujarati", "Oriya", "Tamil", "Telugu", "Kannada", "Malayalam", "Sinhala",
			"Thai", "Lao", "Tibetan", "Myanmar", "Georgian", "Hangul Jamo", "Ethiopic", "Ethiopic Supplement",
			"Cherokee", "Unified Canadian Aboriginal Syllabics", "Ogham", "Runic", "Tagalog", "Hanunoo", "Buhid", "Tagbanwa",
			"Khmer", "Mongolian", "Unified Canadian Aboriginal Syllabics Extended", "Limbu", "Tai Le", "New Tai Lue", "Khmer Symbols", "Buginese",
			"Tai Tham", "Combining Diacritical Marks Extended", "Balinese", "Sundanese", "Batak", "Lepcha", "Ol Chiki", "Cyrillic Extended-C",
			"Georgian Extended", "Sundanese Supplement", "Vedic Extensions", "Phonetic Extensions", "Phonetic Extensions Supplement", "Combining Diacritical Marks Supplement", "Latin Extended Additional", "Greek Extended",
			"General Punctuation", "Superscripts and Subscripts", "Currency Symbols", "Combining Diacritical Marks for Symbols", "Letterlike Symbols", "Number Forms", "Arrows", "Mathematical Operators",
			"Miscellaneous Technical", "Control Pictures", "Optical Character Recognition", "Enclosed Alphanumerics", "Box Drawing", "Block Elements", "Geometric Shapes", "Miscellaneous Symbols",
			"Dingbats", "Miscellaneous Mathematical Symbols-A", "Supplemental Arrows-A", "Braille Patterns", "Supplemental Arrows-B", "Miscellaneous Mathematical Symbols-B", "Supplemental Mathematical Operators", "Miscellaneous Symbols and Arrows",
			"Glagolitic", "Latin Extended-C", "Coptic", "Georgian Supplement", "Tifinagh", "Ethiopic Extended", "Cyrillic Extended-A", "Supplemental Punctuation",
			"CJK Radicals Supplement", "Kangxi Radicals", "Ideographic Description Characters", "CJK Symbols and Punctuation", "Hiragana", "Katakana", "Bopomofo", "Hangul Compatibility Jamo",
			"Kanbun", "Bopomofo Extended", "CJK Strokes", "Katakana Phonetic Extensions", "Enclosed CJK Letters and Months", "CJK Compatibility", "CJK Unified Ideographs Extension A", "Yijing Hexagram Symbols",
			"CJK Unified Ideographs", "Yi Syllables", "Yi Radicals", "Lisu", "Vai", "Cyrillic Extended-B", "Bamum", "Modifier Tone Letters",
			"Latin Extended-D", "Syloti Nagri", "Common Indic Number Forms", "Phags-pa", "Saurashtra", "Devanagari Extended", "Kayah Li", "Rejang",
			"Hangul Jamo Extended-A", "Javanese", "Myanmar Extended-B", "Cham", "Myanmar Extended-A", "Tai Viet", "Meetei Mayek Extensions", "Ethiopic Extended-A",
			"Latin Extended-E", "Cherokee Supplement", "Meetei Mayek", "Hangul Syllables", "Hangul Jamo Extended-B", "High Surrogates", "High Private Use Surrogates", "Low Surrogates",
			"Private Use Area", "CJK Compatibility Ideographs", "Alphabetic Presentation Forms", "Arabic Presentation Forms-A", "Variation Selectors", "Vertical Forms", "Combining Half Marks", "CJK Compatibility Forms",
			"Small Form Variants", "Arabic Presentation Forms-B", "Halfwidth and Fullwidth Forms", "Specials", "Linear B Syllabary", "Linear B Ideograms", "Aegean Numbers", "Ancient Greek Numbers",
			"Ancient Symbols", "Phaistos Disc", "Lycian", "Carian", "Coptic Epact Numbers", "Old Italic", "Gothic", "Old Permic",
			"Ugaritic", "Old Persian", "Deseret", "Shavian", "Osmanya", "Osage", "Elbasan", "Caucasian Albanian",
			"Vithkuqi", "Linear A", "Latin Extended-F", "Cypriot Syllabary", "Imperial Aramaic", "Palmyrene", "Nabataean", "Hatran",
			"Phoenician", "Lydian", "Meroitic Hieroglyphs", "Meroitic Cursive", "Kharoshthi", "Old South Arabian", "Old North Arabian", "Manichaean",
			"Avestan", "Inscriptional Parthian", "Inscriptional Pahlavi", "Psalter Pahlavi", "Old Turkic", "Old Hungarian", "Hanifi Rohingya", "Rumi Numeral Symbols",
			"Yezidi", "Arabic Extended-C", "Old Sogdian", "Sogdian", "Old Uyghur", "Chorasmian", "Elymaic", "Brahmi",
			"Kaithi", "Sora Sompeng", "Chakma", "Mahajani", "Sharada", "Sinhala Archaic Numbers", "Khojki", "Multani",
			"Khudawadi", "Grantha", "Newa", "Tirhuta", "Siddham", "Modi", "Mongolian Supplement", "Takri",
			"Ahom", "Dogra", "Warang Citi", "Dives Akuru", "Nandinagari", "Zanabazar Square", "Soyombo", "Unified Canadian Aboriginal Syllabics Extended-A",
			"Pau Cin Hau", "Devanagari Extended-A", "Bhaiksuki", "Marchen", "Masaram Gondi", "Gunjala Gondi", "Makasar", "Kawi",
			"Lisu Supplement", "Tamil Supplement", "Cuneiform", "Cuneiform Numbers and Punctuation", "Early Dynastic Cuneiform", "Cypro-Minoan", "Egyptian Hieroglyphs", "Egyptian Hieroglyph Format Controls",
			"Anatolian Hieroglyphs", "Bamum Supplement", "Mro", "Tangsa", "Bassa Vah", "Pahawh Hmong", "Medefaidrin", "Miao",
			"Ideographic Symbols and Punctuation", "Tangut", "Tangut Components", "Khitan Small Script", "Tangut Supplement", "Kana Extended-B", "Kana Supplement", "Kana Extended-A",
			"Small Kana Extension", "Nushu", "Duployan", "Shorthand Format Controls", "Znamenny Musical Notation", "Byzantine Musical Symbols", "Musical Symbols", "Ancient Greek Musical Notation",
			"Kaktovik Numerals", "Mayan Numerals", "Tai Xuan Jing Symbols", "Counting Rod Numerals", "Mathematical Alphanumeric Symbols", "Sutton SignWriting", "Latin Extended-G", "Glagolitic Supplement",
			"Cyrillic Extended-D", "Nyiakeng Puachue Hmong", "Toto", "Wancho", "Nag Mundari", "Ethiopic Extended-B", "Mende Kikakui", "Adlam",
			"Indic Siyaq Numbers", "Ottoman Siyaq Numbers", "Arabic Mathematical Alphabetic Symbols", "Mahjong Tiles", "Domino Tiles", "Playing Cards", "Enclosed Alphanumeric Supplement", "Enclosed Ideographic Supplement",
			"Miscellaneous Symbols and Pictographs", "Emoticons", "Ornamental Dingbats", "Transport and Map Symbols", "Alchemical Symbols", "Geometric Shapes Extended", "Supplemental Arrows-C", "Supplemental Symbols and Pictographs",
			"Chess Symbols", "Symbols and Pictographs Extended-A", "Symbols for Legacy Computing", "CJK Unified Ideographs Extension B", "CJK Unified Ideographs Extension C", "CJK Unified Ideographs Extension D", "CJK Unified Ideographs Extension E", "CJK Unified Ideographs Extension F", "CJK Unified Ideographs Extension I",
			"CJK Compatibility Ideographs Supplement", "CJK Unified Ideographs Extension G", "CJK Unified Ideographs Extension H", "CJK Unified Ideographs Extension J", "Tags", "Variation Selectors Supplement", "Supplementary Private Use Area-A", "Supplementary Private Use Area-B"
		];

		/* eslint-disable max-len */
		// prettier-ignore
		const unicode_blockaddress_array = [
			0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF, 0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF,
			0x07FF, 0x083F, 0x085F, 0x086F, 0x089F, 0x08FF, 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, 0x0C7F, 0x0CFF, 0x0D7F, 0x0DFF,
			0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F, 0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F,
			0x17FF, 0x18AF, 0x18FF, 0x194F, 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1AFF, 0x1B7F, 0x1BBF, 0x1BFF, 0x1C4F, 0x1C7F, 0x1C8F,
			0x1CBF, 0x1CCF, 0x1CFF, 0x1D7F, 0x1DBF, 0x1DFF, 0x1EFF, 0x1FFF, 0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF,
			0x23FF, 0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF, 0x27FF, 0x28FF, 0x297F, 0x29FF, 0x2AFF, 0x2BFF,
			0x2C5F, 0x2C7F, 0x2CFF, 0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF, 0x2FDF, 0x2FFF, 0x303F, 0x309F, 0x30FF, 0x312F, 0x318F,
			0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF, 0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F, 0xA6FF, 0xA71F,
			0xA7FF, 0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F, 0xA95F, 0xA97F, 0xA9DF, 0xA9FF, 0xAA5F, 0xAA7F, 0xAADF, 0xAAFF, 0xAB2F,
			0xAB6F, 0xABBF, 0xABFF, 0xD7AF, 0xD7FF, 0xDB7F, 0xDBFF, 0xDFFF, 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F, 0xFE2F, 0xFE4F,
			0xFE6F, 0xFEFF, 0xFFEF, 0xFFFF, 0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF, 0x102FF, 0x1032F, 0x1034F, 0x1037F,
			0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x104FF, 0x1052F, 0x1056F, 0x105BF, 0x1077F, 0x107BF, 0x1083F, 0x1085F, 0x1087F, 0x108AF, 0x108FF,
			0x1091F, 0x1093F, 0x1099F, 0x109FF, 0x10A5F, 0x10A7F, 0x10A9F, 0x10AFF, 0x10B3F, 0x10B5F, 0x10B7F, 0x10BAF, 0x10C4F, 0x10CFF, 0x10D3F, 0x10E7F,
			0x10EBF, 0x10EFF, 0x10F2F, 0x10F6F, 0x10FAF, 0x10FDF, 0x10FFF, 0x1107F, 0x110CF, 0x110FF, 0x1114F, 0x1117F, 0x111DF, 0x111FF, 0x1124F, 0x112AF,
			0x112FF, 0x1137F, 0x1147F, 0x114DF, 0x115FF, 0x1165F, 0x1167F, 0x116CF, 0x1174F, 0x1184F, 0x118FF, 0x1195F, 0x119FF, 0x11A4F, 0x11AAF, 0x11ABF,
			0x11AFF, 0x11B5F, 0x11C6F, 0x11CBF, 0x11D5F, 0x11DAF, 0x11EFF, 0x11F5F, 0x11FBF, 0x11FFF, 0x123FF, 0x1247F, 0x1254F, 0x12FFF, 0x1342F, 0x1345F,
			0x1467F, 0x16A3F, 0x16A6F, 0x16ACF, 0x16AFF, 0x16B8F, 0x16E9F, 0x16F9F, 0x16FFF, 0x187FF, 0x18AFF, 0x18CFF, 0x18D7F, 0x1AFFF, 0x1B0FF, 0x1B12F,
			0x1B16F, 0x1B2FF, 0x1BC9F, 0x1BCAF, 0x1CFCF, 0x1D0FF, 0x1D1FF, 0x1D24F, 0x1D2DF, 0x1D2FF, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1DAAF, 0x1DFFF, 0x1E02F,
			0x1E08F, 0x1E14F, 0x1E2BF, 0x1E2FF, 0x1E4FF, 0x1E7FF, 0x1E8DF, 0x1E95F, 0x1ECBF, 0x1ED4F, 0x1EEFF, 0x1F02F, 0x1F09F, 0x1F0FF, 0x1F1FF, 0x1F2FF,
			0x1F5FF, 0x1F64F, 0x1F67F, 0x1F6FF, 0x1F77F, 0x1F7FF, 0x1F8FF, 0x1F9FF, 0x1FA6F, 0x1FAFF, 0x1FBFF, 0x2A6DF, 0x2B73F, 0x2B81F, 0x2CEAF, 0x2EBEF, 0x2EE5F,
			0x2FA1F, 0x3134F, 0x323AF, 0x3347F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
		];
		/* eslint-enable max-len */

		/**
		 * コードポイントからUnicodeのブロック名に変換する
		 * 変換できない場合は "-" を返す
		 * @param {number} codepoint - コードポイント
		 * @returns {string}
		 */
		toBlockNameFromUnicode = function (codepoint) {
			for (let i = 0; i < unicode_blockname_array.length; i++) {
				if (codepoint <= unicode_blockaddress_array[i]) {
					return unicode_blockname_array[i];
				}
			}
			return "-";
		};

		/**
		 * コードポイントから異体字セレクタの判定
		 * @param {number} codepoint - コードポイント
		 * @param {boolean} [annotate = false] - 注釈をつけるか否か
		 * @returns {string|null} 確認結果(異体字セレクタではない場合はNULLを返す)
		 */
		getVariationSelectorsnumberFromCodePoint = function (codepoint, annotate) {
			// prettier-ignore
			if (0x180B <= codepoint && codepoint <= 0x180D) {
				// モンゴル自由字形選択子 U+180B〜U+180D (3個)
				// prettier-ignore
				return "FVS" + (codepoint - 0x180B + 1);
			}
			// prettier-ignore
			if (0xFE00 <= codepoint && codepoint <= 0xFE0F) {
				// SVSで利用される異体字セレクタ U+FE00〜U+FE0F (VS1～VS16) (16個)
				// prettier-ignore
				const n = codepoint - 0xFE00 + 1;
				if (!annotate) { return "VS" + n; }
				// prettier-ignore
				if (codepoint === 0xFE0E) { return "VS15 (text)"; }
				// prettier-ignore
				if (codepoint === 0xFE0F) { return "VS16 (emoji)"; }
				return "VS" + n;
			// prettier-ignore
			} else if (0xE0100 <= codepoint && codepoint <= 0xE01EF) {
				// IVSで利用される異体字セレクタ U+E0100〜U+E01EF (VS17～VS256) (240個)
				// prettier-ignore
				return "VS" + (codepoint - 0xE0100 + 17);
			}
			return null;
		};

		/**
		 * コードポイントからタグ文字の判定
		 * @param {number} codepoint - コードポイント
		 * @returns {string|null} 確認結果(タグ文字ではない場合はNULLを返す)
		 */
		getTagCharacterFromCodePoint = function (codepoint) {
			// TAG characters U+E0020..U+E007F
			// prettier-ignore
			if (0xE0020 <= codepoint && codepoint <= 0xE007F) {
				// CANCEL TAG
				// prettier-ignore
				if (codepoint === 0xE007F) {
					return "CANCEL_TAG";
				}
				// TAG_20..TAG_7E のように返す
				// prettier-ignore
				const ascii = codepoint - 0xE0000; // 0x20..0x7E
				return "TAG_" + ascii.toString(16).toUpperCase().padStart(2, "0");
			}
			return null;
		};
	}

	/**
	 * 上位のサロゲートペアの判定
	 * @param {string} text - 対象テキスト
	 * @param {number} index - インデックス
	 * @returns {boolean} 確認結果
	 */
	static isHighSurrogateAt(text, index) {
		const ch = text.charCodeAt(index);
		// prettier-ignore
		return 0xD800 <= ch && ch <= 0xDBFF;
	}

	/**
	 * 下位のサロゲートペアの判定
	 * @param {string} text - 対象テキスト
	 * @param {number} index - インデックス
	 * @returns {boolean} 確認結果
	 */
	static isLowSurrogateAt(text, index) {
		const ch = text.charCodeAt(index);
		// prettier-ignore
		return 0xDC00 <= ch && ch <= 0xDFFF;
	}

	/**
	 * サロゲートペアの判定
	 * @param {string} text - 対象テキスト
	 * @param {number} index - インデックス
	 * @returns {boolean} 確認結果
	 */
	static isSurrogatePairAt(text, index) {
		const ch = text.charCodeAt(index);
		// prettier-ignore
		return 0xD800 <= ch && ch <= 0xDFFF;
	}

	/**
	 * サロゲートペア対応のコードポイント取得
	 * @param {string} text - 対象テキスト
	 * @param {number} [index = 0] - インデックス
	 * @returns {number} コードポイント
	 */
	static codePointAt(text, index) {
		const index_ = index !== undefined ? index : 0;
		if (Unicode.isHighSurrogateAt(text, index_)) {
			const high = text.charCodeAt(index_);
			const low = text.charCodeAt(index_ + 1);
			// prettier-ignore
			return (((high - 0xD800) << 10) | (low - 0xDC00)) + 0x10000;
		} else {
			return text.charCodeAt(index_);
		}
	}

	/**
	 * インデックスの前にあるコードポイント
	 * @param {string} text - 対象テキスト
	 * @param {number} index - インデックス
	 * @returns {number} コードポイント
	 */
	static codePointBefore(text, index) {
		if (!Unicode.isLowSurrogateAt(text, index - 1)) {
			return text.charCodeAt(index - 1);
		} else {
			return text.codePointAt(index - 2);
		}
	}

	/**
	 * コードポイント換算で文字列数をカウント
	 * @param {string} text - 対象テキスト
	 * @param {number} [beginIndex=0] - 最初のインデックス（省略可）
	 * @param {number} [endIndex] - 最後のインデックス（ここは含めない）（省略可）
	 * @returns {number} 文字数
	 */
	static codePointCount(text, beginIndex, endIndex) {
		if (beginIndex === undefined) {
			beginIndex = 0;
		}
		if (endIndex === undefined) {
			endIndex = text.length;
		}
		let count = 0;
		for (; beginIndex < endIndex; beginIndex++) {
			count++;
			if (Unicode.isSurrogatePairAt(text, beginIndex)) {
				beginIndex++;
			}
		}
		return count;
	}

	/**
	 * コードポイント換算で文字列配列の位置を計算
	 * @param {string} text - 対象テキスト
	 * @param {number} index - オフセット
	 * @param {number} codePointOffset - ずらすコードポイント数
	 * @returns {number} ずらしたインデックス
	 */
	static offsetByCodePoints(text, index, codePointOffset) {
		let count = 0;
		if (codePointOffset === 0) {
			return index;
		}
		if (codePointOffset > 0) {
			for (; index < text.length; index++) {
				count++;
				if (Unicode.isHighSurrogateAt(text, index)) {
					index++;
				}
				if (count === codePointOffset) {
					return index + 1;
				}
			}
		} else {
			codePointOffset = -codePointOffset;
			for (; index >= 0; index--) {
				count++;
				if (Unicode.isLowSurrogateAt(text, index - 1)) {
					index--;
				}
				if (count === codePointOffset) {
					return index - 1;
				}
			}
		}
		throw "error offsetByCodePoints";
	}

	/**
	 * コードポイントの数値データをUTF16の配列に変換
	 * @param {...(number|number[])} codepoint - 変換したいUTF-32の配列、又はコードポイントを並べた可変引数
	 * @returns {number[]} 変換後のテキスト
	 */
	static toUTF16ArrayFromCodePoint() {
		/**
		 * @type {number[]}
		 */
		const utf16_array = [];
		/**
		 * @type {number[]}
		 */
		let codepoint_array = [];
		if (arguments[0].length) {
			codepoint_array = arguments[0];
		} else {
			for (let i = 0; i < arguments.length; i++) {
				codepoint_array[i] = arguments[i];
			}
		}
		for (let i = 0; i < codepoint_array.length; i++) {
			const codepoint = codepoint_array[i];
			if (0x10000 <= codepoint) {
				// prettier-ignore
				const high = ((codepoint - 0x10000) >> 10) + 0xD800;
				// prettier-ignore
				const low = (codepoint & 0x3FF) + 0xDC00;
				utf16_array.push(high);
				utf16_array.push(low);
			} else {
				utf16_array.push(codepoint);
			}
		}
		return utf16_array;
	}

	/**
	 * コードポイントの数値データを文字列に変換
	 * @param {...(number|number[])} codepoint - 変換したいコードポイントの数値配列、又は数値を並べた可変引数
	 * @returns {string} 変換後のテキスト
	 */
	static fromCodePoint(codepoint) {
		let utf16_array = null;
		if (Array.isArray(codepoint)) {
			utf16_array = Unicode.toUTF16ArrayFromCodePoint(codepoint);
		} else {
			const codepoint_array = [];
			for (let i = 0; i < arguments.length; i++) {
				codepoint_array[i] = arguments[i];
			}
			utf16_array = Unicode.toUTF16ArrayFromCodePoint(codepoint_array);
		}
		const text = [];
		for (let i = 0; i < utf16_array.length; i++) {
			text[text.length] = String.fromCharCode(utf16_array[i]);
		}
		return text.join("");
	}

	/**
	 * 文字列をUTF32(コードポイント)の配列に変換
	 * @param {string} text - 変換したいテキスト
	 * @returns {number[]} UTF32(コードポイント)のデータが入った配列
	 */
	static toUTF32Array(text) {
		const utf32 = [];
		for (let i = 0; i < text.length; i = Unicode.offsetByCodePoints(text, i, 1)) {
			utf32.push(Unicode.codePointAt(text, i));
		}
		return utf32;
	}

	/**
	 * UTF32の配列から文字列に変換
	 * @param {number[]} utf32 - 変換したいテキスト
	 * @returns {string} 変換後のテキスト
	 */
	static fromUTF32Array(utf32) {
		return Unicode.fromCodePoint(utf32);
	}

	/**
	 * 文字列をUTF16の配列に変換
	 * @param {string} text - 変換したいテキスト
	 * @returns {number[]} UTF16のデータが入った配列
	 */
	static toUTF16Array(text) {
		const utf16 = [];
		for (let i = 0; i < text.length; i++) {
			utf16[i] = text.charCodeAt(i);
		}
		return utf16;
	}

	/**
	 * UTF16の配列から文字列に変換
	 * @param {number[]} utf16 - 変換したいテキスト
	 * @returns {string} 変換後のテキスト
	 */
	static fromUTF16Array(utf16) {
		const text = [];
		for (let i = 0; i < utf16.length; i++) {
			text[i] = String.fromCharCode(utf16[i]);
		}
		return text.join("");
	}

	/**
	 * 文字列をUTF8の配列に変換
	 * @param {string} text - 変換したいテキスト
	 * @returns {number[]} UTF8のデータが入った配列
	 */
	static toUTF8Array(text) {
		return Unicode.toUTFBinaryFromCodePoint(Unicode.toUTF32Array(text), "utf-8", false);
	}

	/**
	 * UTF8の配列から文字列に変換
	 * @param {number[]} utf8 - 変換したいテキスト
	 * @returns {string} 変換後のテキスト
	 */
	static fromUTF8Array(utf8) {
		return Unicode.fromCodePoint(Unicode.toCodePointFromUTFBinary(utf8, "utf-8"));
	}

	/**
	 * 指定したテキストを切り出す
	 * - 単位は文字数
	 * @param {string} text - 切り出したいテキスト
	 * @param {number} offset - 切り出し位置
	 * @param {number} size - 切り出す長さ
	 * @returns {string} 切り出したテキスト
	 */
	static cutTextForCodePoint(text, offset, size) {
		const utf32 = Unicode.toUTF32Array(text);
		const cut = [];
		for (let i = 0, point = offset; i < size && point < utf32.length; i++, point++) {
			cut.push(utf32[point]);
		}
		return Unicode.fromUTF32Array(cut);
	}

	/**
	 * UTFのバイナリ配列からバイトオーダーマーク(BOM)を調査する
	 * @param {number[]} utfbinary - 調査するバイナリ配列
	 * @returns {string} 符号化形式(不明時はnull)
	 */
	static getCharsetFromBOM(utfbinary) {
		if (utfbinary.length >= 4) {
			// prettier-ignore
			if (utfbinary[0] === 0x00 && utfbinary[1] === 0x00 && utfbinary[2] === 0xFE && utfbinary[3] === 0xFF) {
				return "UTF-32BE";
			}
			// prettier-ignore
			if (utfbinary[0] === 0xFF && utfbinary[1] === 0xFE && utfbinary[2] === 0x00 && utfbinary[3] === 0x00) {
				return "UTF-32LE";
			}
		}
		if (utfbinary.length >= 3) {
			// prettier-ignore
			if (utfbinary[0] === 0xEF && utfbinary[1] === 0xBB && utfbinary[2] === 0xBF) {
				return "UTF-8";
			}
		}
		if (utfbinary.length >= 2) {
			// prettier-ignore
			if (utfbinary[0] === 0xFE && utfbinary[1] === 0xFF) {
				return "UTF-16BE";
			}
			// prettier-ignore
			if (utfbinary[0] === 0xFF && utfbinary[1] === 0xFE) {
				return "UTF-16LE";
			}
		}
		return null;
	}

	/**
	 * UTFのバイナリ配列からコードポイントに変換
	 * @param {number[]} binary - 変換したいバイナリ配列
	 * @param {string} [charset] - UTFの種類（省略した場合はBOM付きを期待する）
	 * @returns {number[]} コードポイントの配列(失敗時はnull)
	 */
	static toCodePointFromUTFBinary(binary, charset) {
		const utf32_array = [];
		let check_charset = charset;
		let offset = 0;
		// バイトオーダーマーク(BOM)がある場合は BOM を優先
		const charset_for_bom = Unicode.getCharsetFromBOM(binary);
		if (charset_for_bom) {
			check_charset = charset_for_bom;
			if (/utf-?8/i.test(charset_for_bom)) {
				offset = 3;
			} else if (/utf-?16/i.test(charset_for_bom)) {
				offset = 2;
			} else if (/utf-?32/i.test(charset_for_bom)) {
				offset = 4;
			}
		}
		// BOM付きではない＋指定もしていないので変換失敗
		if (!charset_for_bom && !charset) {
			return null;
		}
		if (/utf-?8n?/i.test(check_charset)) {
			// UTF-8
			let size = 0;
			let write = 0;
			for (let i = offset; i < binary.length; i++) {
				const bin = binary[i];
				if (size === 0) {
					if (bin < 0x80) {
						utf32_array.push(bin);
						// prettier-ignore
					} else if (bin < 0xE0) {
						size = 1;
						// prettier-ignore
						write = bin & 0x1F; // 0001 1111
						// prettier-ignore
					} else if (bin < 0xF0) {
						size = 2;
						// prettier-ignore
						write = bin & 0xF; // 0000 1111
					} else {
						size = 3;
						// prettier-ignore
						write = bin & 0x7; // 0000 0111
					}
				} else {
					write <<= 6;
					// prettier-ignore
					write |= bin & 0x3F; // 0011 1111
					size--;
					if (size === 0) {
						utf32_array.push(write);
					}
				}
			}
			return utf32_array;
		} else if (/utf-?16/i.test(check_charset)) {
			// UTF-16
			// UTF-16 につめる
			const utf16 = [];
			if (/utf-?16(be)/i.test(check_charset)) {
				// UTF-16BE
				for (let i = offset; i < binary.length; i += 2) {
					utf16.push((binary[i] << 8) | binary[i + 1]);
				}
			} else if (/utf-?16(le)?/i.test(check_charset)) {
				// UTF-16LE
				for (let i = offset; i < binary.length; i += 2) {
					utf16.push(binary[i] | (binary[i + 1] << 8));
				}
			}
			// UTF-32 につめる
			for (let i = 0; i < utf16.length; i++) {
				// prettier-ignore
				if (0xD800 <= utf16[i] && utf16[i] <= 0xDBFF) {
					if (i + 2 <= utf16.length) {
						const high = utf16[i];
						const low = utf16[i + 1];
						// prettier-ignore
						utf32_array.push((((high - 0xD800) << 10) | (low - 0xDC00)) + 0x10000);
					}
					i++;
				} else {
					utf32_array.push(utf16[i]);
				}
			}
			return utf32_array;
		} else {
			// UTF-32
			if (/utf-?32(be)/i.test(check_charset)) {
				// UTF-32BE
				for (let i = offset; i < binary.length; i += 4) {
					utf32_array.push((binary[i] << 24) | (binary[i + 1] << 16) | (binary[i + 2] << 8) | binary[i + 3]);
				}
				return utf32_array;
			} else if (/utf-?32(le)?/i.test(check_charset)) {
				// UTF-32LE
				for (let i = offset; i < binary.length; i += 4) {
					utf32_array.push(binary[i] | (binary[i + 1] << 8) | (binary[i + 2] << 16) | (binary[i + 3] << 24));
				}
				return utf32_array;
			}
		}
		return null;
	}

	/**
	 * UTF32配列からバイナリ配列に変換
	 * @param {number[]} utf32_array - 変換したいUTF-32配列
	 * @param {string} charset - UTFの種類
	 * @param {boolean} [is_with_bom=true] - BOMをつけるかどうか
	 * @returns {number[]} バイナリ配列(失敗時はnull)
	 */
	static toUTFBinaryFromCodePoint(utf32_array, charset, is_with_bom) {
		let is_with_bom_ = is_with_bom !== undefined ? is_with_bom : true;
		// charset に" with BOM" が入っている場合はBOM付きとする
		if (/\s+with\s+bom$/i.test(charset)) {
			is_with_bom_ = true;
		}
		/**
		 * @type {number[]}
		 */
		const binary = [];
		// UTF-8
		if (/utf-?8n?/i.test(charset)) {
			// bom をつける
			if (is_with_bom_) {
				// prettier-ignore
				binary.push(0xEF);
				// prettier-ignore
				binary.push(0xBB);
				// prettier-ignore
				binary.push(0xBF);
			}
			for (let i = 0; i < utf32_array.length; i++) {
				let codepoint = utf32_array[i];
				// 1バイト文字
				if (codepoint <= 0x7F) {
					binary.push(codepoint);
					continue;
				}
				const buffer = [];
				let size = 0;
				// 2バイト以上
				if (codepoint < 0x800) {
					size = 2;
				} else if (codepoint < 0x10000) {
					size = 3;
				} else {
					size = 4;
				}
				for (let j = 0; j < size; j++) {
					let write = codepoint & ((1 << 6) - 1);
					if (j === size - 1) {
						if (size === 2) {
							// prettier-ignore
							write |= 0xC0; // 1100 0000
						} else if (size === 3) {
							// prettier-ignore
							write |= 0xE0; // 1110 0000
						} else {
							// prettier-ignore
							write |= 0xF0; // 1111 0000
						}
						buffer.push(write);
						break;
					}
					buffer.push(write | 0x80); // 1000 0000
					codepoint = codepoint >> 6;
				}
				// 反転
				for (let j = buffer.length - 1; j >= 0; j--) {
					binary.push(buffer[j]);
				}
			}
			return binary;
		} else if (/utf-?16/i.test(charset)) {
			// UTF-16
			// UTF-16 に詰め替える
			const utf16_array = Unicode.toUTF16ArrayFromCodePoint(utf32_array);
			if (/utf-?16(be)/i.test(charset)) {
				// UTF-16BE
				// bom をつける
				if (is_with_bom_) {
					binary.push(0xFE);
					binary.push(0xFF);
				}
				for (let i = 0; i < utf16_array.length; i++) {
					binary.push(utf16_array[i] >> 8);
					binary.push(utf16_array[i] & 0xFF);
				}
			} else if (/utf-?16(le)?/i.test(charset)) {
				// UTF-16LE
				// bom をつける
				if (is_with_bom_) {
					binary.push(0xFF);
					binary.push(0xFE);
				}
				for (let i = 0; i < utf16_array.length; i++) {
					binary.push(utf16_array[i] & 0xFF);
					binary.push(utf16_array[i] >> 8);
				}
			}
			return binary;
		} else if (/utf-?32/i.test(charset)) {
			// UTF-32
			if (/utf-?32(be)/i.test(charset)) {
				// UTF-32BE
				// bom をつける
				if (is_with_bom_) {
					binary.push(0x00);
					binary.push(0x00);
					binary.push(0xFE);
					binary.push(0xFF);
				}
				for (let i = 0; i < utf32_array.length; i++) {
					binary.push((utf32_array[i] >> 24) & 0xFF);
					binary.push((utf32_array[i] >> 16) & 0xFF);
					binary.push((utf32_array[i] >> 8) & 0xFF);
					binary.push(utf32_array[i] & 0xFF);
				}
			} else if (/utf-?32(le)?/i.test(charset)) {
				// UTF-32LE
				// bom をつける
				if (is_with_bom_) {
					binary.push(0xFF);
					binary.push(0xFE);
					binary.push(0x00);
					binary.push(0x00);
				}
				for (let i = 0; i < utf32_array.length; i++) {
					binary.push(utf32_array[i] & 0xFF);
					binary.push((utf32_array[i] >> 8) & 0xFF);
					binary.push((utf32_array[i] >> 16) & 0xFF);
					binary.push((utf32_array[i] >> 24) & 0xFF);
				}
			}
			return binary;
		}
		return null;
	}

	/**
	 * コードポイントからUnicodeのブロック名に変換する
	 * 変換できない場合は "-" を返す
	 * @param {number} codepoint - コードポイント
	 * @returns {string}
	 */
	static toBlockNameFromUnicode(codepoint) {
		Unicode.init();
		return toBlockNameFromUnicode(codepoint);
	}

	/**
	 * コードポイントから制御文字名に変換する
	 * 変換できない場合は null を返す
	 * @param {number} codepoint - コードポイント
	 * @returns {string|null}
	 */
	static toControlCharcterName(codepoint) {
		Unicode.init();

		// 異体字セレクタの確認を行い、異体字セレクタ用の制御文字(FVS, VSx)を返す
		const info_variation_selectors_number = getVariationSelectorsnumberFromCodePoint(codepoint);
		if (info_variation_selectors_number !== null) {
			return info_variation_selectors_number;
		}
		// タグ文字の確認を行い、タグ文字用の制御文字(TAG_xx)を返す
		const info_tag_character = getTagCharacterFromCodePoint(codepoint);
		if (info_tag_character !== null) {
			return info_tag_character;
		}
		// その他の制御文字の確認を行う
		const name = control_charcter_map[codepoint];
		return name ? name : null;
	}

	/**
	 * コードポイントからグラフェム（見た目の1文字）を構成する文字の判定
	 *
	 * ※単独では新しいグラフェムを開始せず、直前のベース文字に結合・修飾される要素
	 *
	 * 含まれるもの:
	 * - 結合文字 (Mn / Mc / Me ※VS除外)
	 * - 異体字セレクタ (VS / IVS / FVS)
	 * - スキントーン修飾子（EMOJI MODIFIER FITZPATRICK）
	 * - タグ文字（TAG CHARACTER）
	 * - ゼロ幅接合子
	 *
	 * 含まれないもの
	 * - 国旗（Regional Indicator）※ペア規則
	 *
	 * @param {number} codepoint - コードポイント
	 * @returns {boolean} 確認結果
	 */
	static isGraphemeComponentFromCodePoint(codepoint) {
		// prettier-ignore
		return (
			Unicode.isCombiningMarkFromCodePoint(codepoint) // 結合文字
			|| Unicode.isVariationSelectorFromCodePoint(codepoint) // 異体字セレクタ
			|| Unicode.isEmojiModifierFromCodePoint(codepoint) // スキントーン修飾子
			|| Unicode.isTagCharacterFromCodePoint(codepoint) // タグ文字
			|| codepoint === 0x200D // ZWJ (ZERO WIDTH JOINER) ゼロ幅接合子
		);
	}

	/**
	 * コードポイントから国旗（Regional Indicator）を構成する文字の判定
	 *
	 * @param {number} codepoint - コードポイント
	 * @returns {boolean} 確認結果
	 */
	static isRegionalIndicatorFromCodePoint(codepoint) {
		// prettier-ignore
		return (0x1F1E6 <= codepoint && codepoint <= 0x1F1FF);
	}

	/**
	 * 2つのコードポイントが結合する場合の判定処理
	 *
	 * 含まれるもの:
	 * - 国旗（Regional Indicator）
	 *
	 * @param {number|null} codepoint1 - 直前のコードポイント
	 * @param {number|null} codepoint2 - 現在のコードポイント
	 * @returns {boolean} 確認結果
	 */
	static isRegionalIndicatorContinuation(codepoint1, codepoint2) {
		if ((codepoint1 == null || codepoint1 === undefined) || codepoint2 == null || codepoint2 === undefined) {
			return false;
		}
		return Unicode.isRegionalIndicatorFromCodePoint(codepoint1)
			&& Unicode.isRegionalIndicatorFromCodePoint(codepoint2);
	}

	/**
	 * コードポイントから「表示上の横幅が 0 の文字」の文字の判定
	 *
	 * 含まれるもの:
	 * - ゼロ幅スペース, ゼロ幅非接合子, ゼロ幅接合子, 単語結合子
	 * @param {number} codepoint - コードポイント
	 * @returns {boolean} 確認結果
	 */
	static isZeroWidthCharacterFromCodePoint(codepoint) {
		// prettier-ignore
		return (
			codepoint === 0x200B // ZWSP (ZERO WIDTH SPACE) ゼロ幅スペース
			|| codepoint === 0x200C // ZWNJ (ZERO WIDTH NON-JOINER) ゼロ幅非接合子
			|| codepoint === 0x200D // ZWJ (ZERO WIDTH JOINER) ゼロ幅接合子
			|| codepoint === 0x2060 // WJ (WORD JOINER) 単語結合子
		);
	}

	/**
	 * コードポイントから結合文字の判定
	 * @param {number} codepoint - コードポイント
	 * @returns {boolean} 確認結果
	 */
	static isCombiningMarkFromCodePoint(codepoint) {
		// 異体字セレクタは除外
		if (Unicode.isVariationSelectorFromCodePoint(codepoint)) {
			return false;
		}
		try {
			return new RegExp("\\p{Mark}", "u").test(String.fromCodePoint(codepoint));
		// eslint-disable-next-line no-unused-vars
		} catch (e) {
			// フォールバック処理
			return (
				// Combining Diacritical Marks
				// prettier-ignore
				(0x0300 <= codepoint && codepoint <= 0x036F)
				// Combining Diacritical Marks Extended
				// prettier-ignore
				|| (0x1AB0 <= codepoint && codepoint <= 0x1AFF)
				// Combining Diacritical Marks Supplement
				// prettier-ignore
				|| (0x1DC0 <= codepoint && codepoint <= 0x1DFF)
				// Combining Diacritical Marks for Symbols
				// prettier-ignore
				|| (0x20D0 <= codepoint && codepoint <= 0x20FF)
				// 日本語に含まれる2種類の文字
				// COMBINING VOICED SOUND MARK
				// COMBINING SEMI-VOICED SOUND MARK
				// prettier-ignore
				|| (0x3099 <= codepoint && codepoint <= 0x309A)
				// Combining Half Marks
				// prettier-ignore
				|| (0xFE20 <= codepoint && codepoint <= 0xFE2F)
			);
		}
	}

	/**
	 * コードポイントから異体字セレクタの判定
	 * @param {number} codepoint - コードポイント
	 * @returns {boolean} 確認結果
	 */
	static isVariationSelectorFromCodePoint(codepoint) {
		return (
			// モンゴル自由字形選択子 U+180B〜U+180D (3個)
			// prettier-ignore
			(0x180B <= codepoint && codepoint <= 0x180D)
			// SVSで利用される異体字セレクタ U+FE00〜U+FE0F (VS1～VS16) (16個)
			// prettier-ignore
			|| (0xFE00 <= codepoint && codepoint <= 0xFE0F)
			// IVSで利用される異体字セレクタ U+E0100〜U+E01EF (VS17～VS256) (240個)
			// prettier-ignore
			|| (0xE0100 <= codepoint && codepoint <= 0xE01EF)
		);
	}

	/**
	 * コードポイントからスキントーン修飾子の判定
	 * @param {number} codepoint - コードポイント
	 * @returns {boolean} 確認結果
	 */
	static isEmojiModifierFromCodePoint(codepoint) {
		return (
			// EMOJI MODIFIER FITZPATRICK
			// prettier-ignore
			0x1F3FB <= codepoint && codepoint <= 0x1F3FF
		);
	}

	/**
	 * コードポイントからタグ文字の判定
	 * @param {number} codepoint - コードポイント
	 * @returns {boolean} 確認結果
	 */
	static isTagCharacterFromCodePoint(codepoint) {
		return (
			// TAG CHARACTER
			// prettier-ignore
			0xE0000 <= codepoint && codepoint <= 0xE007F
		);
	}
}

/**
 * マップを初期化した否か
 */
Unicode.is_initmap = false;