Files
NaverSearcher/NaverSearcher/StringParser.cs
2021-08-05 10:03:57 +09:00

299 lines
9.4 KiB
C#

using System;
using System.Collections.Generic;
using System.Text;
namespace NaverSearcher
{
class StringParser
{
///< 초성 테이블
private static readonly char[] wcHead =
{
'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ',
'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ',
'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ',
'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ',
'ㅌ', 'ㅍ', 'ㅎ'
};
///< 중성 테이블
private static readonly char[] wcMid =
{
'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ',
'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ',
'ㅗ', 'ㅘ', 'ㅙ', 'ㅚ',
'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ',
'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ'
};
///< 중성 합성 테이블
private static readonly char[,] wcMidMix =
{
{'ㅗ', 'ㅏ', 'ㅘ'},
{'ㅗ', 'ㅐ', 'ㅙ'},
{ 'ㅗ', 'ㅑ', 'ㅚ'},
{ 'ㅜ', 'ㅓ', 'ㅝ'},
{ 'ㅜ', 'ㅔ', 'ㅞ'},
{ 'ㅜ', 'ㅣ', 'ㅟ'},
{ 'ㅡ', 'ㅣ', 'ㅢ'},
};
///< 종성 테이블
private static readonly char[] wcTail =
{
' ', 'ㄱ', 'ㄲ', 'ㄳ',
'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ',
'ㄹ', 'ㄺ', 'ㄻ', 'ㄼ',
'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ',
'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ',
'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ',
'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
};
///< 중성 합성 테이블
private static readonly char[,] wcTailMix =
{
{ 'ㄱ', 'ㅅ', 'ㄳ'},
{ 'ㄴ', 'ㅈ', 'ㄵ'},
{ 'ㄴ', 'ㅎ', 'ㄶ'},
{ 'ㄹ', 'ㄱ', 'ㄺ'},
{ 'ㄹ', 'ㅁ', 'ㄻ'},
{ 'ㄹ', 'ㅂ', 'ㄼ'},
{ 'ㄹ', 'ㅅ', 'ㄽ'},
{ 'ㄹ', 'ㅌ', 'ㄾ'},
{ 'ㄹ', 'ㅍ', 'ㄿ'},
{ 'ㄹ', 'ㅎ', 'ㅀ'},
{ 'ㅂ', 'ㅅ', 'ㅄ'},
};
// c++ -> C#
private static bool IsMidMix(char wc, ref char pFirst, ref char pSeconde)
{
for (int i = 0; i < wcMidMix.GetLength(0); i++)
{
if (wcMidMix[i, 2] == wc)
{
pFirst = wcMidMix[i, 0];
pSeconde = wcMidMix[i, 1];
return true;
}
}
return false;
}
// C++ -> C#
private static bool IsTailMix(char wc, ref char pFirst, ref char pSeconde)
{
for (int i = 0; i < wcTailMix.GetLength(0); i++)
{
if (wcTailMix[i, 2] == wc)
{
pFirst = wcTailMix[i, 0];
pSeconde = wcTailMix[i, 1];
return true;
}
}
return false;
}
// C++ -> C#
private static bool IsFusionTailMix(char wcFirst, char wcSeconde, ref char pResult)
{
int dIndex = -1;
for (int i = 0; i < wcTailMix.GetLength(0); i++)
{
if (wcTailMix[i, 0] == wcFirst && wcTailMix[i, 1] == wcSeconde)
{
dIndex = i;
break;
}
}
if (dIndex == -1)
return false;
pResult = wcTailMix[dIndex, 2];
return true;
}
// C++ -> C#
private static int Get_T_Index(char ch)
{
for (int i = 0; i < wcTail.GetLength(0); i++)
{
if (wcTail[i] == ch)
return i;
}
return -1;
}
// C++ -> C#
private static int Get_M_Index(char ch)
{
for (int i = 0; i < wcMid.GetLength(0); i++)
{
if (wcMid[i] == ch)
return i;
}
return -1;
}
// C++ -> C#
public static List<string> CharacterMakeHistory(string wstr)
{
List<string> m_buffer = new List<string>();
string strMakeString = string.Empty;
string strIngInput = string.Empty;
char wcFirst = '\0';
char wcSeconde = '\0';
int dPos = 0;
char[] str = wstr.ToCharArray();
while (dPos < str.Length)
{
// KR 조합형
if (str[dPos] >= 'ㄱ' && str[dPos] <= 'ㅣ')
{
if (IsMidMix(str[dPos], ref wcFirst, ref wcSeconde))
{
strIngInput = strMakeString;
strIngInput += String.Format("{0}", wcFirst);
m_buffer.Add(strIngInput);
}
else if (IsTailMix(str[dPos], ref wcFirst, ref wcSeconde))
{
strIngInput = strMakeString;
strIngInput += String.Format("{0}", wcFirst);
m_buffer.Add(strIngInput);
}
strIngInput = strMakeString;
strIngInput += String.Format("{0}", wcFirst);
m_buffer.Add(strIngInput);
}
// KR 완성형
else if (str[dPos] >= '가' && str[dPos] <= '힣')
{
// 초성
int dHeadPos = Math.Abs(str[dPos] - 0xAC00) / (21 * 28);
char wcTemp = '\0';
// 앞 글자에 초성 합성이 가능할 경우 체크하여 합성
if ((strIngInput.Length >= 1))
{
wcTemp = strIngInput[strIngInput.Length - 1];
// 이전 글자 종성과 현재 글자 초성으로 혼합된 종성을 얻는다.
char wcMixTail = ' ';
// 합성에 성공한 경우
if (IsFusionTailMix(wcTail[Math.Abs(wcTemp - 0xAC00) % 28], wcHead[dHeadPos], ref wcMixTail))
{
// 이전 글자 종성 제거
wcTemp = (char)(wcTemp - Get_T_Index(wcTail[(Math.Abs(wcTemp - 0xAC00)) % 28]));
// 이전 글자 새로운 종성 삽입
wcTemp = (char)(wcTemp + Get_T_Index(wcMixTail));
}
// 이전 글자에 종성이 없고 현재 글자의 초성이 정상일 경우
else if (Get_T_Index(wcTail[Math.Abs(wcTemp - 0xAC00) % 28]) == 0 && Get_T_Index(wcHead[dHeadPos]) != -1)
{
wcTemp = (char)(wcTemp + Get_T_Index(wcHead[dHeadPos]));
}
else
{
wcTemp = '\0';
}
}
strIngInput = strMakeString;
if (wcTemp != '\0')
{
strIngInput = strIngInput.Substring(0, strIngInput.Length - 1);
strIngInput += String.Format("{0}", wcTemp);
}
else
{
strIngInput += String.Format("{0}", wcHead[dHeadPos]);
}
m_buffer.Add(strIngInput);
// 중성
int dMidPos = Math.Abs(str[dPos] - 0xAC00) % (21 * 28) / 28;
if (IsMidMix(wcMid[dMidPos], ref wcFirst, ref wcSeconde))
{
int szMindex2 = Get_M_Index(wcFirst);
char TszM_0 = (char)(0xAC00 + (dHeadPos * 588) + (szMindex2 * 28));
strIngInput = strMakeString;
strIngInput += String.Format("{0}", TszM_0);
m_buffer.Add(strIngInput);
}
char TszM = (char)(0xAC00 + (dHeadPos * 588) + (dMidPos * 28));
strIngInput = strMakeString;
strIngInput += String.Format("{0}", TszM);
m_buffer.Add(strIngInput);
// 종성
int dTailPos = Math.Abs(str[dPos] - 0xAC00) % 28;
if (wcTail[dTailPos] != ' ')
{
if (IsTailMix(wcTail[dTailPos], ref wcFirst, ref wcSeconde))
{
int szTindex2 = Get_T_Index(wcFirst);
char TszT_0 = (char)(0xAC00 + (dHeadPos * 588) + (dMidPos * 28) + szTindex2);
strIngInput = strMakeString;
strIngInput += String.Format("{0}", TszT_0);
m_buffer.Add(strIngInput);
}
char TszT = (char)(0xAC00 + (dHeadPos * 588) + (dMidPos * 28) + dTailPos);
strIngInput = strMakeString;
strIngInput += String.Format("{0}", TszT);
m_buffer.Add(strIngInput);
}
}
else
{
strIngInput = strMakeString;
strIngInput += String.Format("{0}", str[dPos]);
m_buffer.Add(strIngInput);
}
strMakeString = strIngInput;
dPos++;
}
return m_buffer;
}
}
}