aho-corasick algorithm in swift

Here's an implementation of the Aho-Corasick algorithm in Swift:

class ACNode {
    var character: Character?
    var isEndOfWord: Bool = false
    var children: [Character: ACNode] = [:]
    var fail: ACNode?
    weak var parent: ACNode?
    
    init(character: Character?, parent: ACNode?) {
        self.character = character
        self.parent = parent
    }
}

class AC {
    var root = ACNode(character: nil, parent: nil)
    
    func insert(_ word: String) {
        var currentNode = root
        for char in word {
            if currentNode.children[char] == nil {
                currentNode.children[char] = ACNode(character: char, parent: currentNode)
            }
            currentNode = currentNode.children[char]!
        }
        currentNode.isEndOfWord = true
    }
    
    func buildFailureLinks() {
        let queue = DispatchQueue(label: "com.iosbrain.ACFailureLinksQueue", attributes: .concurrent)
        queue.async { [weak self] in
            guard let strongSelf = self else { return }
            strongSelf.root.fail = nil
            var nodesToProcess = [ACNode]()
            for (_, node) in strongSelf.root.children {
                node.fail = strongSelf.root
                nodesToProcess.append(node)
            }
            
            while !nodesToProcess.isEmpty {
                let currentNode = nodesToProcess.removeFirst()
                
                for (char, childNode) in currentNode.children {
                    var failNode = currentNode.fail
                    while failNode != nil && failNode!.children[char] == nil {
                        failNode = failNode!.fail
                    }
                    if failNode == nil {
                        childNode.fail = strongSelf.root
                    } else {
                        childNode.fail = failNode!.children[char]
                    }
                    childNode.isEndOfWord = childNode.fail?.isEndOfWord ?? false
                    nodesToProcess.append(childNode)
                }
            }
        }
    }
    
    func search(_ text: String) -> [String] {
        let queue = DispatchQueue(label: "com.iosbrain.ACSearcherQueue", attributes: .concurrent)
        var matches = [String]()
        queue.sync {
            var currentNode = self.root
            var i = 0
            let chars = Array(text)
            while i < chars.count {
                let char = chars[i]
                if let childNode = currentNode.children[char] {
                    currentNode = childNode
                    i += 1
                } else {
                    currentNode = currentNode.fail!
                }
                
                if currentNode.isEndOfWord {
                    var matchLength = 0
                    var lastMatch = i - 1
                    for j in (0..<i).reversed() {
                        let node = self.root.children[chars[j]]!
                        if node.isEndOfWord {
                            matchLength += 1
                            lastMatch = j
                        } else {
                            break
                        }
                    }
                    let word = String(chars[lastMatch...(i-1)])
                    matches.append(word)
                }
            }
        }
        return matches
    }
}
3287 chars
95 lines

You can use it like this:

let ac = AC()
ac.insert("art")
ac.insert("cart")
ac.insert("cat")
ac.buildFailureLinks()
let matches = ac.search("I have a cart and a cat in my garden.")
print(matches) // ["art", "cart", "cat"]
195 chars
8 lines

The algorithm builds a trie of all the search terms, and then traverses the trie to build failure links between nodes. The failure links allow the search to efficiently backtrack to the appropriate nodes when there is no match.

gistlibby LogSnag