Soundex (code snippet)

Description

Computes a Soundex code from any string.

See Wikipedia: Soundex and Understanding Classic SoundEx Algorithms: SoundEx and the Census.

Three versions are presented, each using subtly different techniques.

Requires: None

Local Variable Version

This conventional version uses entirely local variables.

Soundex.fh_lua
--[[
@Title:		Soundex Calculator
@Author:	Jane Taubman
@Version:	1.0
@LastUpdated:	February 2012
@Description:	Function to compute SoundEx codes on any string
]]
function soundex(str)
	local codes = {
	A=0,E=0,I=0,O=0,U=0,Y=0,H=0,W=0,
	B=1,P=1,F=1,V=1,
	C=2,S=2,G=2,J=2,K=2,Q=2,X=2,Z=2,
	D=3,T=3,
	L=4,
	M=5,N=5,
	R=6
	}
	local strSoundex = ""
	local strCap = string.upper(str)
	local strSoundex = string.sub(strCap,1,1)
	local strRest = string.sub(strCap,2)
	for letter in string.gmatch(strRest,"[A-Z]") do
		local l = string.len(strSoundex)
		local strLast = string.sub(strSoundex,l,l)
		print(strLast,codes[letter])
		if codes[letter] ~= 0 and codes[letter] ~= tonumber(strLast) then
			strSoundex = strSoundex..codes[letter]
		end
	end
	strSoundex = strSoundex.."0000"
	return string.sub(strSoundex,1,4)
end

Sample of Use

	print(soundex(“Scadden”))

Global Variable Version

This uses a slightly different algorithm and runs about 5 times faster than the Local Variable Version by using Global variables and a cache of previously coded names.

Soundex.fh_lua
--[[
@Title:		Soundex Calculator
@Author:	Mike Tate
@Version:	2.0
@LastUpdated:	July 2012
@Description:	Function to Convert any Name to Soundex as per http://en.wikipedia.org/wiki/Soundex
		and http://creativyst.com/Doc/Articles/SoundEx1/SoundEx1.htm#SoundExAndCensus
]]
 
TblSoundex = {}					-- Soundex dictionary cache of previously coded Names
 
TblCodeNum = {					-- Soundex code number table is faster as Global than Local
	A=0,E=0,I=0,O=0,U=0,Y=0,		-- H=0,W=0,	-- H & W are ignored
	B=1,F=1,P=1,V=1,
	C=2,G=2,J=2,K=2,Q=2,S=2,X=2,Z=2,
	D=3,T=3,
	L=4,
	M=5,N=5,
	R=6
	}
 
function StrSoundex(strAnyName)
	strAnyName = string.upper(strAnyName:gsub("[^A-Z]",""))			-- Make name upper case letters only
	if strAnyName == "" then return "Z000" end
	local strSoundex = TblSoundex[strAnyName]				-- If already coded in cache then return previous Soundex code
	if strSoundex then return strSoundex end
	local strSoundex = string.sub(strAnyName,1,1)				-- Soundex starts with initial letter
	local tblCodeNum = TblCodeNum						-- Local reference to Global table is faster
	local strLastNum = tblCodeNum[strSoundex]				-- Set initial Soundex code number
	for i = 2, string.len(strAnyName) do
		local strCodeNum = tblCodeNum[string.sub(strAnyName,i,i)]	-- Step through Soundex code of each subsequent letter
		if strCodeNum then
			if strCodeNum > 0 and strCodeNum ~= strLastNum then	-- Not a vowel nor same as Soundex preceeding code
				strSoundex = strSoundex..strCodeNum		-- So append Soundex code until 4 chars long
				if string.len(strSoundex) == 4 then
					TblSoundex[strAnyName] = strSoundex	-- Save code in cache for future quick lookup
					return strSoundex
				end
			end
			strLastNum = strCodeNum					-- Save as Soundex preceeding code, unless H or W
		end
	end
	strSoundex = string.sub(strSoundex.."0000",1,4)				-- Pad code with zeroes to 4 chars long
	TblSoundex[strAnyName] = strSoundex					-- Save code in cache for future quick lookup
	return strSoundex
end -- function StrSoundex

Sample of Use

	print(StrSoundex(“Scadden”))

Function Prototype Version

This version uses a Function Prototype Closure to encapsulate the Global variables of the Global Variable Version, making them Private and Local, which has several benefits:

  • The variables cannot be accidentally accessed outside the function.
  • The variables retain the run time performance of Locals.
  • The variables persist between function calls just like Globals.

Nevertheless, optionally the Soundex dictionary cache can be held externally in a Global variable, to allow it to be saved to file between Plugin runs, and without any loss of performance.

Function Prototype Closures are explained by Programming in Lua: Chapter 6.1 Closures.

Soundex.fh_lua
--[[
@Title:		Soundex Calculator
@Author:	Jane Taubman & Mike Tate
@Version:	3.0
@LastUpdated:	October 2012
@Description:	Function Prototype to Convert any Name to Soundex as per http://en.wikipedia.org/wiki/Soundex
		and http://creativyst.com/Doc/Articles/SoundEx1/SoundEx1.htm#SoundExAndCensus
]]
 
function NewSoundex(tblSoundex)				-- Prototype Soundex Calculator
 
	local tblSoundex = tblSoundex or { }		-- Soundex dictionary cache of previously coded Names
	tblSoundex[""] = "Z000"				-- Seed with null string special case
 
	local tblCodeNum = {				-- Soundex code number table
		A=0,E=0,I=0,O=0,U=0,Y=0,		-- H=0,W=0,	-- H & W are ignored
		B=1,F=1,P=1,V=1,
		C=2,G=2,J=2,K=2,Q=2,S=2,X=2,Z=2,
		D=3,T=3,
		L=4,
		M=5,N=5,
		R=6
	}
 
	return function (strAnyName)
		strAnyName = string.upper(strAnyName:gsub("[^A-Z]",""))			-- Make name upper case letters only
		local strSoundex = tblSoundex[strAnyName]				-- If already coded in cache then return previous Soundex code
		if strSoundex then return strSoundex end
		local strSoundex = string.sub(strAnyName,1,1)				-- Soundex starts with initial letter
		local strLastNum = tblCodeNum[strSoundex]				-- Set initial Soundex code number
		for i = 2, string.len(strAnyName) do
			local strCodeNum = tblCodeNum[string.sub(strAnyName,i,i)]	-- Step through Soundex code of each subsequent letter
			if strCodeNum then
				if strCodeNum > 0 and strCodeNum ~= strLastNum then	-- Not a vowel nor same as Soundex preceeding code
					strSoundex = strSoundex..strCodeNum		-- So append Soundex code until 4 chars long
					if string.len(strSoundex) == 4 then
						tblSoundex[strAnyName] = strSoundex	-- Save code in cache for future quick lookup
						return strSoundex
					end
				end
				strLastNum = strCodeNum					-- Save as Soundex preceeding code, unless H or W
			end
		end
		strSoundex = string.sub(strSoundex.."0000",1,4)				-- Pad code with zeroes to 4 chars long
		tblSoundex[strAnyName] = strSoundex					-- Save code in cache for future quick lookup
		return strSoundex
	end -- anonymous function
end -- function Soundex

Samples of Use

Default Internal Cache Example

	StrSoundex = NewSoundex()
	print(StrSoundex(“Scadden”))

Advanced External Cache Example

	TblSoundex = {}			-- External Soundex dictionary cache of previously coded Names
	StrSoundex = NewSoundex(TblSoundex)
	print(StrSoundex(“Scadden”))