add unit test and sanity test for different dict type

This commit is contained in:
wjsjwr 2025-09-28 22:48:27 +08:00
parent 4cf8ce9963
commit 61aa84c691
3 changed files with 84 additions and 0 deletions

13
dict.go
View File

@ -25,6 +25,8 @@ func NewChineseDict(filePath string) (*ChineseDict, error) {
var lines []string
scanner := bufio.NewScanner(file)
buf := make([]byte, 0, bufio.MaxScanTokenSize)
scanner.Buffer(buf, 5*1024*1024) // 5MB buffer to handle long lines
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" { // Skip empty lines
@ -75,6 +77,17 @@ func (cd *ChineseDict) GetRandomCharacter() rune {
return 0 // Return null rune if no lines available
}
if len(cd.lines) == 1 {
if cd.currentLineIndex == -1 {
cd.selectNewRandomLine()
}
// Get random index
randomIndex := rand.Intn(len(cd.currentLineRunes))
cd.currentCharIndex = randomIndex + 1 // Move index forward for next call, align with multiline version for testing
return cd.currentLineRunes[randomIndex]
}
// Keep track of attempts to avoid infinite recursion
maxAttempts := len(cd.lines) * 2 // Try each line at least twice
attempts := 0

View File

@ -48,6 +48,32 @@ func TestDictE2E(t *testing.T) {
}
}
func TestDictE2ESingleLineVer(t *testing.T) {
dict, err := NewChineseDict("random_char.txt")
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
for range 1000 {
char := dict.GetRandomCharacter()
fmt.Printf("%c", char)
if char == 0 {
t.Error("GetRandomCharacter returned null rune")
}
}
fmt.Println()
count := dict.GetCharacterCount()
if count == 0 {
t.Error("GetCharacterCount returned 0")
}
count = dict.GetLineCount()
if count == 0 {
t.Error("GetLineCount returned 0")
}
}
func TestNewChineseDict(t *testing.T) {
testContent := `大家好大家好家人们晚上好
先点点关注不迷路点个小红心
@ -424,3 +450,47 @@ Another行with more混合content。`
t.Logf("Got %d Chinese and %d English characters", chineseCount, englishCount)
}
func TestSingleLineContent(t *testing.T) {
testContent := `华枝睾吸虫豆卷叶野螟小叶榕木`
//黄眉企鹅五房` for multiline false test
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
runeInPosition := 0
currentPosition := -1
for range 100 {
char := dict.GetRandomCharacter()
if char == 0 {
t.Error("Got null character")
continue
}
if !dict.isValidCharacter(char) {
t.Errorf("Got invalid character: '%c' (U+%04X)", char, char)
continue
}
if char == '华' || char == '黄' {
currentPosition = 0
}
if currentPosition+1 == dict.currentCharIndex {
runeInPosition++
currentPosition++
}
}
if runeInPosition == 100 {
t.Error("GetRandomCharacter is not random enough, always returning characters in sequence")
}
t.Logf("Rune in position: %d", runeInPosition)
}

1
random_char.txt Normal file

File diff suppressed because one or more lines are too long