add unit test and sanity test for different dict type
This commit is contained in:
parent
4cf8ce9963
commit
61aa84c691
13
dict.go
13
dict.go
@ -25,6 +25,8 @@ func NewChineseDict(filePath string) (*ChineseDict, error) {
|
||||
|
||||
var lines []string
|
||||
scanner := bufio.NewScanner(file)
|
||||
buf := make([]byte, 0, bufio.MaxScanTokenSize)
|
||||
scanner.Buffer(buf, 5*1024*1024) // 5MB buffer to handle long lines
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line != "" { // Skip empty lines
|
||||
@ -75,6 +77,17 @@ func (cd *ChineseDict) GetRandomCharacter() rune {
|
||||
return 0 // Return null rune if no lines available
|
||||
}
|
||||
|
||||
if len(cd.lines) == 1 {
|
||||
if cd.currentLineIndex == -1 {
|
||||
cd.selectNewRandomLine()
|
||||
}
|
||||
// Get random index
|
||||
randomIndex := rand.Intn(len(cd.currentLineRunes))
|
||||
cd.currentCharIndex = randomIndex + 1 // Move index forward for next call, align with multiline version for testing
|
||||
|
||||
return cd.currentLineRunes[randomIndex]
|
||||
}
|
||||
|
||||
// Keep track of attempts to avoid infinite recursion
|
||||
maxAttempts := len(cd.lines) * 2 // Try each line at least twice
|
||||
attempts := 0
|
||||
|
||||
70
dict_test.go
70
dict_test.go
@ -48,6 +48,32 @@ func TestDictE2E(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDictE2ESingleLineVer(t *testing.T) {
|
||||
dict, err := NewChineseDict("random_char.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
for range 1000 {
|
||||
char := dict.GetRandomCharacter()
|
||||
fmt.Printf("%c", char)
|
||||
if char == 0 {
|
||||
t.Error("GetRandomCharacter returned null rune")
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
count := dict.GetCharacterCount()
|
||||
if count == 0 {
|
||||
t.Error("GetCharacterCount returned 0")
|
||||
}
|
||||
|
||||
count = dict.GetLineCount()
|
||||
if count == 0 {
|
||||
t.Error("GetLineCount returned 0")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewChineseDict(t *testing.T) {
|
||||
testContent := `大家好大家好,家人们晚上好
|
||||
先点点关注不迷路,点个小红心
|
||||
@ -424,3 +450,47 @@ Another行with more混合content。`
|
||||
|
||||
t.Logf("Got %d Chinese and %d English characters", chineseCount, englishCount)
|
||||
}
|
||||
|
||||
func TestSingleLineContent(t *testing.T) {
|
||||
testContent := `华枝睾吸虫豆卷叶野螟小叶榕木`
|
||||
//黄眉企鹅五房` for multiline false test
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
runeInPosition := 0
|
||||
currentPosition := -1
|
||||
|
||||
for range 100 {
|
||||
char := dict.GetRandomCharacter()
|
||||
if char == 0 {
|
||||
t.Error("Got null character")
|
||||
continue
|
||||
}
|
||||
|
||||
if !dict.isValidCharacter(char) {
|
||||
t.Errorf("Got invalid character: '%c' (U+%04X)", char, char)
|
||||
continue
|
||||
}
|
||||
|
||||
if char == '华' || char == '黄' {
|
||||
currentPosition = 0
|
||||
}
|
||||
|
||||
if currentPosition+1 == dict.currentCharIndex {
|
||||
runeInPosition++
|
||||
currentPosition++
|
||||
}
|
||||
}
|
||||
|
||||
if runeInPosition == 100 {
|
||||
t.Error("GetRandomCharacter is not random enough, always returning characters in sequence")
|
||||
}
|
||||
|
||||
t.Logf("Rune in position: %d", runeInPosition)
|
||||
}
|
||||
|
||||
1
random_char.txt
Normal file
1
random_char.txt
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user