refactor: urfave v2
This commit is contained in:
9
vendor/github.com/xrash/smetrics/.travis.yml
generated
vendored
Normal file
9
vendor/github.com/xrash/smetrics/.travis.yml
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
language: go
|
||||
go:
|
||||
- 1.11
|
||||
- 1.12
|
||||
- 1.13
|
||||
- 1.14.x
|
||||
- master
|
||||
script:
|
||||
- cd tests && make
|
21
vendor/github.com/xrash/smetrics/LICENSE
generated
vendored
Normal file
21
vendor/github.com/xrash/smetrics/LICENSE
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
Copyright (C) 2016 Felipe da Cunha Gonçalves
|
||||
All Rights Reserved.
|
||||
|
||||
MIT LICENSE
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
49
vendor/github.com/xrash/smetrics/README.md
generated
vendored
Normal file
49
vendor/github.com/xrash/smetrics/README.md
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
[](http://travis-ci.org/xrash/smetrics)
|
||||
|
||||
# smetrics
|
||||
|
||||
`smetrics` is "string metrics".
|
||||
|
||||
Package smetrics provides a bunch of algorithms for calculating the distance between strings.
|
||||
|
||||
There are implementations for calculating the popular Levenshtein distance (aka Edit Distance or Wagner-Fischer), as well as the Jaro distance, the Jaro-Winkler distance, and more.
|
||||
|
||||
# How to import
|
||||
|
||||
```go
|
||||
import "github.com/xrash/smetrics"
|
||||
```
|
||||
|
||||
# Documentation
|
||||
|
||||
Go to [https://pkg.go.dev/github.com/xrash/smetrics](https://pkg.go.dev/github.com/xrash/smetrics) for complete documentation.
|
||||
|
||||
# Example
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/xrash/smetrics"
|
||||
)
|
||||
|
||||
func main() {
|
||||
smetrics.WagnerFischer("POTATO", "POTATTO", 1, 1, 2)
|
||||
smetrics.WagnerFischer("MOUSE", "HOUSE", 2, 2, 4)
|
||||
|
||||
smetrics.Ukkonen("POTATO", "POTATTO", 1, 1, 2)
|
||||
smetrics.Ukkonen("MOUSE", "HOUSE", 2, 2, 4)
|
||||
|
||||
smetrics.Jaro("AL", "AL")
|
||||
smetrics.Jaro("MARTHA", "MARHTA")
|
||||
|
||||
smetrics.JaroWinkler("AL", "AL", 0.7, 4)
|
||||
smetrics.JaroWinkler("MARTHA", "MARHTA", 0.7, 4)
|
||||
|
||||
smetrics.Soundex("Euler")
|
||||
smetrics.Soundex("Ellery")
|
||||
|
||||
smetrics.Hamming("aaa", "aaa")
|
||||
smetrics.Hamming("aaa", "aab")
|
||||
}
|
||||
```
|
19
vendor/github.com/xrash/smetrics/doc.go
generated
vendored
Normal file
19
vendor/github.com/xrash/smetrics/doc.go
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
Package smetrics provides a bunch of algorithms for calculating
|
||||
the distance between strings.
|
||||
|
||||
There are implementations for calculating the popular Levenshtein
|
||||
distance (aka Edit Distance or Wagner-Fischer), as well as the Jaro
|
||||
distance, the Jaro-Winkler distance, and more.
|
||||
|
||||
For the Levenshtein distance, you can use the functions WagnerFischer()
|
||||
and Ukkonen(). Read the documentation on these functions.
|
||||
|
||||
For the Jaro and Jaro-Winkler algorithms, check the functions
|
||||
Jaro() and JaroWinkler(). Read the documentation on these functions.
|
||||
|
||||
For the Soundex algorithm, check the function Soundex().
|
||||
|
||||
For the Hamming distance algorithm, check the function Hamming().
|
||||
*/
|
||||
package smetrics
|
25
vendor/github.com/xrash/smetrics/hamming.go
generated
vendored
Normal file
25
vendor/github.com/xrash/smetrics/hamming.go
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
package smetrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// The Hamming distance is the minimum number of substitutions required to change string A into string B. Both strings must have the same size. If the strings have different sizes, the function returns an error.
|
||||
func Hamming(a, b string) (int, error) {
|
||||
al := len(a)
|
||||
bl := len(b)
|
||||
|
||||
if al != bl {
|
||||
return -1, fmt.Errorf("strings are not equal (len(a)=%d, len(b)=%d)", al, bl)
|
||||
}
|
||||
|
||||
var difference = 0
|
||||
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
difference = difference + 1
|
||||
}
|
||||
}
|
||||
|
||||
return difference, nil
|
||||
}
|
28
vendor/github.com/xrash/smetrics/jaro-winkler.go
generated
vendored
Normal file
28
vendor/github.com/xrash/smetrics/jaro-winkler.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
package smetrics
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// The Jaro-Winkler distance. The result is 1 for equal strings, and 0 for completely different strings. It is commonly used on Record Linkage stuff, thus it tries to be accurate for common typos when writing real names such as person names and street names.
|
||||
// Jaro-Winkler is a modification of the Jaro algorithm. It works by first running Jaro, then boosting the score of exact matches at the beginning of the strings. Because of that, it introduces two more parameters: the boostThreshold and the prefixSize. These are commonly set to 0.7 and 4, respectively.
|
||||
func JaroWinkler(a, b string, boostThreshold float64, prefixSize int) float64 {
|
||||
j := Jaro(a, b)
|
||||
|
||||
if j <= boostThreshold {
|
||||
return j
|
||||
}
|
||||
|
||||
prefixSize = int(math.Min(float64(len(a)), math.Min(float64(prefixSize), float64(len(b)))))
|
||||
|
||||
var prefixMatch float64
|
||||
for i := 0; i < prefixSize; i++ {
|
||||
if a[i] == b[i] {
|
||||
prefixMatch++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return j + 0.1*prefixMatch*(1.0-j)
|
||||
}
|
86
vendor/github.com/xrash/smetrics/jaro.go
generated
vendored
Normal file
86
vendor/github.com/xrash/smetrics/jaro.go
generated
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
package smetrics
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// The Jaro distance. The result is 1 for equal strings, and 0 for completely different strings.
|
||||
func Jaro(a, b string) float64 {
|
||||
// If both strings are zero-length, they are completely equal,
|
||||
// therefore return 1.
|
||||
if len(a) == 0 && len(b) == 0 {
|
||||
return 1
|
||||
}
|
||||
|
||||
// If one string is zero-length, strings are completely different,
|
||||
// therefore return 0.
|
||||
if len(a) == 0 || len(b) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Define the necessary variables for the algorithm.
|
||||
la := float64(len(a))
|
||||
lb := float64(len(b))
|
||||
matchRange := int(math.Max(0, math.Floor(math.Max(la, lb)/2.0)-1))
|
||||
matchesA := make([]bool, len(a))
|
||||
matchesB := make([]bool, len(b))
|
||||
var matches float64 = 0
|
||||
|
||||
// Step 1: Matches
|
||||
// Loop through each character of the first string,
|
||||
// looking for a matching character in the second string.
|
||||
for i := 0; i < len(a); i++ {
|
||||
start := int(math.Max(0, float64(i-matchRange)))
|
||||
end := int(math.Min(lb-1, float64(i+matchRange)))
|
||||
|
||||
for j := start; j <= end; j++ {
|
||||
if matchesB[j] {
|
||||
continue
|
||||
}
|
||||
|
||||
if a[i] == b[j] {
|
||||
matchesA[i] = true
|
||||
matchesB[j] = true
|
||||
matches++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no matches, strings are completely different,
|
||||
// therefore return 0.
|
||||
if matches == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Step 2: Transpositions
|
||||
// Loop through the matches' arrays, looking for
|
||||
// unaligned matches. Count the number of unaligned matches.
|
||||
unaligned := 0
|
||||
j := 0
|
||||
for i := 0; i < len(a); i++ {
|
||||
if !matchesA[i] {
|
||||
continue
|
||||
}
|
||||
|
||||
for !matchesB[j] {
|
||||
j++
|
||||
}
|
||||
|
||||
if a[i] != b[j] {
|
||||
unaligned++
|
||||
}
|
||||
|
||||
j++
|
||||
}
|
||||
|
||||
// The number of unaligned matches divided by two, is the number of _transpositions_.
|
||||
transpositions := math.Floor(float64(unaligned / 2))
|
||||
|
||||
// Jaro distance is the average between these three numbers:
|
||||
// 1. matches / length of string A
|
||||
// 2. matches / length of string B
|
||||
// 3. (matches - transpositions/matches)
|
||||
// So, all that divided by three is the final result.
|
||||
return ((matches / la) + (matches / lb) + ((matches - transpositions) / matches)) / 3.0
|
||||
}
|
63
vendor/github.com/xrash/smetrics/soundex.go
generated
vendored
Normal file
63
vendor/github.com/xrash/smetrics/soundex.go
generated
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
package smetrics
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// The Soundex encoding. It is a phonetic algorithm that considers how the words sound in English. Soundex maps a string to a 4-byte code consisting of the first letter of the original string and three numbers. Strings that sound similar should map to the same code.
|
||||
func Soundex(s string) string {
|
||||
b := strings.Builder{}
|
||||
b.Grow(4)
|
||||
|
||||
p := s[0]
|
||||
if p <= 'z' && p >= 'a' {
|
||||
p -= 32 // convert to uppercase
|
||||
}
|
||||
b.WriteByte(p)
|
||||
|
||||
n := 0
|
||||
for i := 1; i < len(s); i++ {
|
||||
c := s[i]
|
||||
|
||||
if c <= 'z' && c >= 'a' {
|
||||
c -= 32 // convert to uppercase
|
||||
} else if c < 'A' || c > 'Z' {
|
||||
continue
|
||||
}
|
||||
|
||||
if c == p {
|
||||
continue
|
||||
}
|
||||
|
||||
p = c
|
||||
|
||||
switch c {
|
||||
case 'B', 'P', 'F', 'V':
|
||||
c = '1'
|
||||
case 'C', 'S', 'K', 'G', 'J', 'Q', 'X', 'Z':
|
||||
c = '2'
|
||||
case 'D', 'T':
|
||||
c = '3'
|
||||
case 'L':
|
||||
c = '4'
|
||||
case 'M', 'N':
|
||||
c = '5'
|
||||
case 'R':
|
||||
c = '6'
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
b.WriteByte(c)
|
||||
n++
|
||||
if n == 3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for i := n; i < 3; i++ {
|
||||
b.WriteByte('0')
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
94
vendor/github.com/xrash/smetrics/ukkonen.go
generated
vendored
Normal file
94
vendor/github.com/xrash/smetrics/ukkonen.go
generated
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
package smetrics
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// The Ukkonen algorithm for calculating the Levenshtein distance. The algorithm is described in http://www.cs.helsinki.fi/u/ukkonen/InfCont85.PDF, or in docs/InfCont85.PDF. It runs on O(t . min(m, n)) where t is the actual distance between strings a and b. It needs O(min(t, m, n)) space. This function might be preferred over WagnerFischer() for *very* similar strings. But test it out yourself.
|
||||
// The first two parameters are the two strings to be compared. The last three parameters are the insertion cost, the deletion cost and the substitution cost. These are normally defined as 1, 1 and 2 respectively.
|
||||
func Ukkonen(a, b string, icost, dcost, scost int) int {
|
||||
var lowerCost int
|
||||
|
||||
if icost < dcost && icost < scost {
|
||||
lowerCost = icost
|
||||
} else if dcost < scost {
|
||||
lowerCost = dcost
|
||||
} else {
|
||||
lowerCost = scost
|
||||
}
|
||||
|
||||
infinite := math.MaxInt32 / 2
|
||||
|
||||
var r []int
|
||||
var k, kprime, p, t int
|
||||
var ins, del, sub int
|
||||
|
||||
if len(a) > len(b) {
|
||||
t = (len(a) - len(b) + 1) * lowerCost
|
||||
} else {
|
||||
t = (len(b) - len(a) + 1) * lowerCost
|
||||
}
|
||||
|
||||
for {
|
||||
if (t / lowerCost) < (len(b) - len(a)) {
|
||||
continue
|
||||
}
|
||||
|
||||
// This is the right damn thing since the original Ukkonen
|
||||
// paper minimizes the expression result only, but the uncommented version
|
||||
// doesn't need to deal with floats so it's faster.
|
||||
// p = int(math.Floor(0.5*((float64(t)/float64(lowerCost)) - float64(len(b) - len(a)))))
|
||||
p = ((t / lowerCost) - (len(b) - len(a))) / 2
|
||||
|
||||
k = -p
|
||||
kprime = k
|
||||
|
||||
rowlength := (len(b) - len(a)) + (2 * p)
|
||||
|
||||
r = make([]int, rowlength+2)
|
||||
|
||||
for i := 0; i < rowlength+2; i++ {
|
||||
r[i] = infinite
|
||||
}
|
||||
|
||||
for i := 0; i <= len(a); i++ {
|
||||
for j := 0; j <= rowlength; j++ {
|
||||
if i == j+k && i == 0 {
|
||||
r[j] = 0
|
||||
} else {
|
||||
if j-1 < 0 {
|
||||
ins = infinite
|
||||
} else {
|
||||
ins = r[j-1] + icost
|
||||
}
|
||||
|
||||
del = r[j+1] + dcost
|
||||
sub = r[j] + scost
|
||||
|
||||
if i-1 < 0 || i-1 >= len(a) || j+k-1 >= len(b) || j+k-1 < 0 {
|
||||
sub = infinite
|
||||
} else if a[i-1] == b[j+k-1] {
|
||||
sub = r[j]
|
||||
}
|
||||
|
||||
if ins < del && ins < sub {
|
||||
r[j] = ins
|
||||
} else if del < sub {
|
||||
r[j] = del
|
||||
} else {
|
||||
r[j] = sub
|
||||
}
|
||||
}
|
||||
}
|
||||
k++
|
||||
}
|
||||
|
||||
if r[(len(b)-len(a))+(2*p)+kprime] <= t {
|
||||
break
|
||||
} else {
|
||||
t *= 2
|
||||
}
|
||||
}
|
||||
|
||||
return r[(len(b)-len(a))+(2*p)+kprime]
|
||||
}
|
48
vendor/github.com/xrash/smetrics/wagner-fischer.go
generated
vendored
Normal file
48
vendor/github.com/xrash/smetrics/wagner-fischer.go
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
package smetrics
|
||||
|
||||
// The Wagner-Fischer algorithm for calculating the Levenshtein distance.
|
||||
// The first two parameters are the two strings to be compared. The last three parameters are the insertion cost, the deletion cost and the substitution cost. These are normally defined as 1, 1 and 2 respectively.
|
||||
func WagnerFischer(a, b string, icost, dcost, scost int) int {
|
||||
|
||||
// Allocate both rows.
|
||||
row1 := make([]int, len(b)+1)
|
||||
row2 := make([]int, len(b)+1)
|
||||
var tmp []int
|
||||
|
||||
// Initialize the first row.
|
||||
for i := 1; i <= len(b); i++ {
|
||||
row1[i] = i * icost
|
||||
}
|
||||
|
||||
// For each row...
|
||||
for i := 1; i <= len(a); i++ {
|
||||
row2[0] = i * dcost
|
||||
|
||||
// For each column...
|
||||
for j := 1; j <= len(b); j++ {
|
||||
if a[i-1] == b[j-1] {
|
||||
row2[j] = row1[j-1]
|
||||
} else {
|
||||
ins := row2[j-1] + icost
|
||||
del := row1[j] + dcost
|
||||
sub := row1[j-1] + scost
|
||||
|
||||
if ins < del && ins < sub {
|
||||
row2[j] = ins
|
||||
} else if del < sub {
|
||||
row2[j] = del
|
||||
} else {
|
||||
row2[j] = sub
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Swap the rows at the end of each row.
|
||||
tmp = row1
|
||||
row1 = row2
|
||||
row2 = tmp
|
||||
}
|
||||
|
||||
// Because we swapped the rows, the final result is in row1 instead of row2.
|
||||
return row1[len(row1)-1]
|
||||
}
|
Reference in New Issue
Block a user