ユーザー定義関数で、
VLOOKUPにDIFFのアルゴリズムを組込んだ、
VlookUpPatient という関数を作成。
基本的には、大文字小文字、半角・全角、ひらがな・カタカナを無視して検索し、
もっとも類似度の高い箇所を見つけ出してくる。
くわしく説明すると、
Mayersのアルゴリズムで、レーベンシュタイン距離(LD)を算出し、
更にLDを標準化して類似度の精度をアップさせ、
同じ精度であれば、大文字小文字等を考慮し、
なるべく精度の高いものを拾ってくるよう調整。
引数については、
数式の挿入ダイアログを表示させ、
関数の分類に「検索/行列」を指定すると
関数名の一番下に該当関数が表示されるので、
選択してOKボタン押下で詳細が表示されます。
Option Explicit
'LD(LevenshteinDistance)=編集距離 :2つの要素列の違いを数値化したもの
Private Enum E_Diff_Snake
None
Horizontal
Vertical
Diagonal
End Enum
Private Sub Auto_Open()
'ブックが開かれた時に、数式の挿入ダイアログで表示される関数名の一覧に、本モジュールの関数の情報を追加
'※セル上で数式選択時に、各種情報を表示させたいが、Excelにその機能が無いため断念
Call Application.MacroOptions( _
Macro:="VlookUpPatient", _
Description:="それらしい値を見つけるVLOOKUPです", _
Category:="検索/行列", _
ArgumentDescriptions:=Array("※VLOOKUPと同様", _
"※VLOOKUPと同様", _
"※VLOOKUPと同様", _
"には Hitさせる最低類似度をセットします" & vbCrLf & "省略した場合、0になります" _
) _
)
End Sub
Public Function VlookUpPatient(検索値 As String, 範囲 As Range, 列番号 As Long, _
Optional 最低精度 As Double = 0) As Variant
'VLOOKUPに「レーベンシュタイン距離」の考慮を追加し、
'一番それらしい値を見つけ出します
Dim Ret As Variant
Ret = CVErr(XlCVError.xlErrDiv0)
On Error GoTo Finish
'引数を処理用の変数にセット
Dim FindValue As String: FindValue = 検索値
Dim DataArea As Range: Set DataArea = 範囲
Dim IndexCol As Long: IndexCol = 列番号
Dim MinPoint As Double: MinPoint = 最低精度
'検索値が空の場合、終了
If CStr(FindValue) = "" Then
Ret = CVErr(XlCVError.xlErrValue)
GoTo Finish
End If
'複数エリアの場合、終了
If DataArea.Areas.Count <> 1 Then
Ret = CVErr(XlCVError.xlErrRef)
GoTo Finish
End If
'範囲を、使用部分に絞り込む
Set DataArea = Application.Intersect(DataArea.Worksheet.UsedRange, DataArea)
If DataArea Is Nothing Then
Ret = CVErr(XlCVError.xlErrRef)
GoTo Finish
End If
'指定範囲の開始・終了列を取得
Dim Col_S As Long
Dim Col_E As Long
Col_S = DataArea.Column
Col_E = Col_S + DataArea.Columns.Count - 1
Dim Col_T As Long
Col_T = Col_S + IndexCol - 1
'指定列が範囲に無ければ、終了
If ((Col_S <= Col_T) And (Col_T <= Col_T)) = False Then
Ret = CVErr(XlCVError.xlErrNum)
GoTo Finish
End If
'1セルの場合、そのまま返す
If DataArea.Cells.Count = 1 Then
Ret = DataArea.Value
GoTo Finish
End If
'データを配列に格納
Dim DataAry As Variant
DataAry = DataArea.Value
'検索値を1文字毎に配列化
Dim FindChars() As String
FindChars = fDiff_String_to_Chars(CStr(FindValue))
Dim i As Long
For i = LBound(DataAry, 1) To UBound(DataAry, 1)
'データ範囲の1列目の値を取得
Dim DataValue As String
DataValue = DataAry(i, 1)
If DataValue <> "" Then
'データ値を1文字毎の配列に変換
Dim DataChars() As String
DataChars = fDiff_String_to_Chars(DataValue)
'レーベンシュタイン距離を算出
Dim LD As Long
Dim Sn() As E_Diff_Snake
LD = fDiff_LD_Myers(FindChars, DataChars, Sn, vbTextCompare)
'精度を算出
Dim Pre As Long
Pre = fDiff_LD_Precision(LD, Len(FindValue), Len(DataValue))
'指定精度より高い精度の場合
If MinPoint <= Pre Then
'精度の高い値があった場合
Dim Max As Long
If Max < Pre Then
Max = Pre
'該当箇所を格納
Dim IndexRow As Long
IndexRow = i
'後続の処理用に文字列配列を確保
Dim BackChars() As String
BackChars = DataChars
Dim Sb() As E_Diff_Snake
Sb = Sn
'同じ精度の値があった場合
ElseIf Max = Pre Then
Dim DataPoint As Double
DataPoint = fDiff_Snake_Myers(Sn, FindChars, DataChars)
Dim BackPoint As Double
BackPoint = fDiff_Snake_Myers(Sb, FindChars, BackChars)
If DataPoint < BackPoint Then
IndexRow = i
Sb = Sn
BackChars = fDiff_String_to_Chars(DataValue)
End If
End If
End If
End If
Next
If IndexRow = 0 Then
Ret = CVErr(XlCVError.xlErrNull)
GoTo Finish
End If
Ret = DataAry(IndexRow, IndexCol)
Finish:
VlookUpPatient = Ret
Resume Next
End Function
Private Function fArray_Length(DataAry As Variant, Optional Dimension As XlRowCol = XlRowCol.xlRows) As Long
On Error Resume Next
fArray_Length = UBound(DataAry, Dimension) - LBound(DataAry, Dimension) + 1
On Error GoTo 0
End Function
Private Function fDiff_String_to_Chars(Str As String) As String()
If Len(Str) = 0 Then Exit Function
Dim Chars() As String
ReDim Chars(0 To Len(Str) - 1)
Dim i As Long
For i = 1 To Len(Str)
Chars(i - 1) = Mid$(Str, i, 1)
Next
fDiff_String_to_Chars = Chars
End Function
Public Function fDiff_LD_Precision(LD As Long, aLen As Long, bLen As Long) As Long
'レーベンシュタイン距離(LD)を標準化し,精度をもとめる
Dim LenX As Long
If aLen < bLen Then
LenX = bLen
Else
LenX = aLen
End If
If LenX = 0 Then Exit Function
'LDを、比較文字列[A][B]の長い方の文字長で割る
Dim 類似度 As Long
類似度 = LD * 100 / LenX
'類似度を返したいので、100%-上記の編集度を引く
fDiff_LD_Precision = 100 - 類似度
End Function
Private Function fDiff_LD_Myers(aChars() As String, bChars() As String, Sn() As E_Diff_Snake, _
Optional CompareMode As VbCompareMethod = vbBinaryCompare) As Long
'Myers のアルゴリズム O(ND)
Dim LD As Long '編集距離:LevenshteinDistance
'文字列の長さを取得
Dim aLen As Long, bLen As Long
aLen = UBound(aChars, 1) - LBound(aChars, 1) + 1
bLen = UBound(bChars, 1) - LBound(bChars, 1) + 1
If bLen = 0 Or aLen = 0 Then
LD = bLen - aLen
GoTo Finish
End If
'その他、準備
Dim LenAB As Long
LenAB = aLen + bLen
Dim v() As Long
ReDim v(-LenAB To LenAB)
Dim D As Long
Dim k As Long
Dim bi As Long
Dim ai As Long
Dim s As E_Diff_Snake
ReDim Sn(aLen, bLen) As E_Diff_Snake
For D = 0 To LenAB
For k = -D To D Step 2
If -aLen <= k Then
If k <= bLen Then
'縦範囲の境界
If IIf(D < aLen, D, aLen) = -k Then
bi = v(k + 1)
s = E_Diff_Snake.Vertical
'横範囲の境界
ElseIf IIf(D < bLen, D, bLen) = k Then
bi = v(k - 1) + 1
s = E_Diff_Snake.Horizontal
'その間
Else
If v(k + 1) < v(k - 1) + 1 Then
bi = v(k - 1) + 1
s = E_Diff_Snake.Horizontal
Else
bi = v(k + 1)
s = E_Diff_Snake.Vertical
End If
End If
ai = bi - k
If ai <= aLen Then
If bi <= bLen Then
Sn(ai, bi) = s
End If
End If
Do
If ai >= aLen Then Exit Do
If bi >= bLen Then Exit Do
If StrComp(aChars(ai), bChars(bi), CompareMode) <> 0 Then Exit Do
ai = ai + 1
bi = bi + 1
Sn(ai, bi) = E_Diff_Snake.Diagonal
Loop
v(k) = bi
'- 到達したときの編集距離を返す
If ai >= aLen Then
If bi >= bLen Then
LD = D
GoTo Finish
End If
End If
End If
End If
Next
Next
Finish: fDiff_LD_Myers = LD
End Function
Private Function fDiff_Snake_Myers(Sn() As E_Diff_Snake, aChars() As String, bChars() As String) As Double
Const ClearValue As String = vbTab
Dim aLen As Long
aLen = fArray_Length(aChars)
Dim bLen As Long
bLen = fArray_Length(bChars)
'文字列が無い場合、終了
If aLen = 0 Or bLen = 0 Then Exit Function
Dim aStrs() As String
Dim bStrs() As String
aStrs = aChars
bStrs = bChars
Dim ai As Long
Dim bi As Long
ai = aLen - 1
bi = bLen - 1
Dim Point As Double
While (0 <= ai And 0 <= bi)
Select Case Sn(ai + 1, bi + 1)
Case E_Diff_Snake.Diagonal
'半角・全角、大文字小文字、ひら・カナの差異があった場合、ポイントを調整
If aStrs(ai) <> bStrs(bi) Then
Point = Point + 0.1
End If
bStrs(bi) = ClearValue
aStrs(ai) = ClearValue
ai = ai - 1
bi = bi - 1
Case E_Diff_Snake.Horizontal
bi = bi - 1
Case E_Diff_Snake.Vertical
ai = ai - 1
Case Else
ai = ai - 1
bi = bi - 1
End Select
Wend
'独自ポイントを算出
Dim bString As String
bString = Join(bStrs, "")
Const Clear2 As String = ClearValue & ClearValue
Do
If InStr(1, bString, Clear2) = 0 Then Exit Do
bString = Replace(bString, Clear2, ClearValue)
Loop
fDiff_Snake_Myers = Len(bString) + Point
End Function