新增中文词典测试文件,包含多个单元测试以验证ChineseDict结构体的功能,包括随机字符获取、行数和字符数统计、有效字符检查等。同时更新dict.go以优化文件读取和字符处理逻辑。

This commit is contained in:
wjsjwr 2025-09-08 23:14:07 +08:00
parent a214b58ac6
commit 7f1f9eb1f8
3 changed files with 721 additions and 29 deletions

134
dict.go
View File

@ -1,60 +1,138 @@
package main
import (
"bufio"
"math/rand"
"os"
"strings"
)
// ChineseDict represents a dictionary containing Chinese characters
// ChineseDict represents a dictionary containing lines of Chinese text
type ChineseDict struct {
characters []rune
lines []string
currentLineIndex int
currentCharIndex int
currentLineRunes []rune
}
// NewChineseDict creates a new ChineseDict instance and loads content from dict.txt
// NewChineseDict creates a new ChineseDict instance and loads lines from dict.txt
func NewChineseDict(filePath string) (*ChineseDict, error) {
// Read the file content
content, err := os.ReadFile(filePath)
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()
// Convert bytes to string and then to runes to properly handle Chinese characters
text := string(content)
runes := []rune(text)
return &ChineseDict{
characters: runes,
}, nil
}
// GetRandomCharacter returns a random Chinese character from the dictionary
func (cd *ChineseDict) GetRandomCharacter() rune {
if len(cd.characters) == 0 {
return 0 // Return null rune if no characters available
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" { // Skip empty lines
lines = append(lines, line)
}
}
// Get random index
randomIndex := rand.Intn(len(cd.characters))
if err := scanner.Err(); err != nil {
return nil, err
}
return cd.characters[randomIndex]
cd := &ChineseDict{
lines: lines,
currentLineIndex: -1, // Will be set when first character is requested
currentCharIndex: 0,
}
return cd, nil
}
// isValidCharacter checks if a rune is a Chinese character or English letter
func (cd *ChineseDict) isValidCharacter(r rune) bool {
// Check if it's a Chinese character (CJK Unified Ideographs)
if r >= 0x4E00 && r <= 0x9FFF {
return true
}
// Check if it's an English letter
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
return true
}
return false
}
// selectNewRandomLine selects a new random line and resets character index
func (cd *ChineseDict) selectNewRandomLine() {
if len(cd.lines) == 0 {
return
}
cd.currentLineIndex = rand.Intn(len(cd.lines))
cd.currentLineRunes = []rune(cd.lines[cd.currentLineIndex])
cd.currentCharIndex = 0
}
// GetRandomCharacter returns Chinese characters or English words from random lines
// It picks a random line and iterates through characters, skipping punctuation
func (cd *ChineseDict) GetRandomCharacter() rune {
if len(cd.lines) == 0 {
return 0 // Return null rune if no lines available
}
// Keep track of attempts to avoid infinite recursion
maxAttempts := len(cd.lines) * 2 // Try each line at least twice
attempts := 0
for attempts < maxAttempts {
// If this is the first call or we've reached the end of current line, select a new line
if cd.currentLineIndex == -1 || cd.currentCharIndex >= len(cd.currentLineRunes) {
cd.selectNewRandomLine()
}
// Find the next valid character in the current line
for cd.currentCharIndex < len(cd.currentLineRunes) {
currentChar := cd.currentLineRunes[cd.currentCharIndex]
cd.currentCharIndex++
if cd.isValidCharacter(currentChar) {
return currentChar
}
}
// If we've exhausted the current line without finding a valid character,
// mark it for retry and continue
attempts++
cd.currentCharIndex = len(cd.currentLineRunes) // Force line selection on next iteration
}
// If we've tried all lines multiple times and found no valid characters, return null
return 0
}
// GetRandomString returns a string of random Chinese characters with specified length
func (cd *ChineseDict) GetRandomString(length int) string {
if len(cd.characters) == 0 || length <= 0 {
if len(cd.lines) == 0 || length <= 0 {
return ""
}
result := make([]rune, length)
for i := 0; i < length; i++ {
randomIndex := rand.Intn(len(cd.characters))
result[i] = cd.characters[randomIndex]
for i := range length {
result[i] = cd.GetRandomCharacter()
}
return string(result)
}
// GetCharacterCount returns the total number of Chinese characters in the dictionary
func (cd *ChineseDict) GetCharacterCount() int {
return len(cd.characters)
// GetLineCount returns the total number of lines in the dictionary
func (cd *ChineseDict) GetLineCount() int {
return len(cd.lines)
}
// GetCharacterCount returns the total number of valid characters in the dictionary
func (cd *ChineseDict) GetCharacterCount() int {
count := 0
for _, line := range cd.lines {
for _, r := range line {
if cd.isValidCharacter(r) {
count++
}
}
}
return count
}

190
dict.txt

File diff suppressed because one or more lines are too long

426
dict_test.go Normal file
View File

@ -0,0 +1,426 @@
package main
import (
"fmt"
"os"
"testing"
)
// createTestDictFile creates a temporary dict file for testing
func createTestDictFile(t *testing.T, content string) string {
tmpFile, err := os.CreateTemp("", "test_dict_*.txt")
if err != nil {
t.Fatalf("Failed to create temp file: %v", err)
}
_, err = tmpFile.WriteString(content)
if err != nil {
t.Fatalf("Failed to write to temp file: %v", err)
}
tmpFile.Close()
return tmpFile.Name()
}
func TestDictE2E(t *testing.T) {
dict, err := NewChineseDict("dict.txt")
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
for range 100 {
char := dict.GetRandomCharacter()
fmt.Printf("%c", char)
if char == 0 {
t.Error("GetRandomCharacter returned null rune")
}
}
fmt.Println()
count := dict.GetCharacterCount()
if count == 0 {
t.Error("GetCharacterCount returned 0")
}
count = dict.GetLineCount()
if count == 0 {
t.Error("GetLineCount returned 0")
}
}
func TestNewChineseDict(t *testing.T) {
testContent := `大家好大家好家人们晚上好
先点点关注不迷路点个小红心
Hello world test 测试`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
if dict == nil {
t.Fatal("NewChineseDict returned nil dictionary")
}
expectedLines := 3
if len(dict.lines) != expectedLines {
t.Errorf("Expected %d lines, got %d", expectedLines, len(dict.lines))
}
// Check initial state
if dict.currentLineIndex != -1 {
t.Errorf("Expected currentLineIndex to be -1, got %d", dict.currentLineIndex)
}
if dict.currentCharIndex != 0 {
t.Errorf("Expected currentCharIndex to be 0, got %d", dict.currentCharIndex)
}
}
func TestNewChineseDictWithEmptyLines(t *testing.T) {
testContent := `大家好大家好
先点点关注不迷路
Hello world test`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
expectedLines := 3 // Empty lines should be filtered out
if len(dict.lines) != expectedLines {
t.Errorf("Expected %d lines (empty lines filtered), got %d", expectedLines, len(dict.lines))
}
}
func TestNewChineseDictFileNotFound(t *testing.T) {
_, err := NewChineseDict("nonexistent_file.txt")
if err == nil {
t.Error("Expected error for nonexistent file, got nil")
}
}
func TestIsValidCharacter(t *testing.T) {
dict := &ChineseDict{}
testCases := []struct {
char rune
expected bool
desc string
}{
{'大', true, "Chinese character"},
{'家', true, "Chinese character"},
{'好', true, "Chinese character"},
{'a', true, "English lowercase"},
{'Z', true, "English uppercase"},
{'H', true, "English uppercase"},
{',', false, "Comma punctuation"},
{'', false, "Chinese comma"},
{' ', false, "Space"},
{'!', false, "Exclamation mark"},
{'。', false, "Chinese period"},
{'1', false, "Number"},
{'@', false, "Symbol"},
{'\n', false, "Newline"},
}
for _, tc := range testCases {
result := dict.isValidCharacter(tc.char)
if result != tc.expected {
t.Errorf("isValidCharacter('%c') for %s: expected %v, got %v",
tc.char, tc.desc, tc.expected, result)
}
}
}
func TestGetRandomCharacter(t *testing.T) {
testContent := `大家好abc
Hello世界
测试Test`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
// Test getting characters
validChars := make(map[rune]bool)
for i := 0; i < 50; i++ { // Get multiple characters to test randomness
char := dict.GetRandomCharacter()
if char == 0 {
t.Error("GetRandomCharacter returned null rune")
}
if !dict.isValidCharacter(char) {
t.Errorf("GetRandomCharacter returned invalid character: '%c'", char)
}
validChars[char] = true
}
// Should have gotten some variety of characters
if len(validChars) < 5 {
t.Errorf("Expected more variety in characters, got only %d unique chars", len(validChars))
}
}
func TestGetRandomCharacterEmptyDict(t *testing.T) {
dict := &ChineseDict{lines: []string{}}
char := dict.GetRandomCharacter()
if char != 0 {
t.Errorf("Expected null rune for empty dict, got '%c'", char)
}
}
func TestGetRandomCharacterOnlyPunctuation(t *testing.T) {
testContent := `
@#$%
`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
// This should handle lines with only punctuation gracefully
// The function should try all lines and return null when no valid characters found
char := dict.GetRandomCharacter()
if char != 0 {
t.Errorf("Expected null rune for punctuation-only dict, got '%c'", char)
}
}
func TestGetRandomString(t *testing.T) {
testContent := `大家好abc
Hello世界
测试Test`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
// Test different lengths
testLengths := []int{1, 5, 10, 20}
for _, length := range testLengths {
result := dict.GetRandomString(length)
runes := []rune(result)
if len(runes) != length {
t.Errorf("GetRandomString(%d): expected length %d, got %d",
length, length, len(runes))
}
// Verify all characters are valid
for _, r := range runes {
if !dict.isValidCharacter(r) {
t.Errorf("GetRandomString returned invalid character: '%c'", r)
}
}
}
}
func TestGetRandomStringZeroLength(t *testing.T) {
testContent := `大家好`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
result := dict.GetRandomString(0)
if result != "" {
t.Errorf("Expected empty string for length 0, got '%s'", result)
}
result = dict.GetRandomString(-1)
if result != "" {
t.Errorf("Expected empty string for negative length, got '%s'", result)
}
}
func TestGetRandomStringEmptyDict(t *testing.T) {
dict := &ChineseDict{lines: []string{}}
result := dict.GetRandomString(5)
if result != "" {
t.Errorf("Expected empty string for empty dict, got '%s'", result)
}
}
func TestGetLineCount(t *testing.T) {
testContent := `Line 1
Line 2
Line 3
Line 5`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
expectedCount := 4 // Empty line should be filtered out
count := dict.GetLineCount()
if count != expectedCount {
t.Errorf("Expected line count %d, got %d", expectedCount, count)
}
}
func TestGetCharacterCount(t *testing.T) {
testContent := `大家好abc
Hello世界
测试Test`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
count := dict.GetCharacterCount()
// Count manually: 大家好abc + Hello世界 + 测试Test = 3+3 + 5+2 + 2+4 = 19 valid chars
expectedCount := 19
if count != expectedCount {
t.Errorf("Expected character count %d, got %d", expectedCount, count)
}
}
func TestSelectNewRandomLine(t *testing.T) {
testContent := `Line 1
Line 2
Line 3`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
// Test multiple selections to ensure randomness
selectedLines := make(map[int]bool)
for i := 0; i < 20; i++ {
dict.selectNewRandomLine()
if dict.currentLineIndex < 0 || dict.currentLineIndex >= len(dict.lines) {
t.Errorf("selectNewRandomLine set invalid index: %d", dict.currentLineIndex)
}
if dict.currentCharIndex != 0 {
t.Errorf("selectNewRandomLine should reset currentCharIndex to 0, got %d", dict.currentCharIndex)
}
selectedLines[dict.currentLineIndex] = true
}
// Should have selected different lines (with high probability)
if len(selectedLines) < 2 {
t.Error("selectNewRandomLine appears to not be random enough")
}
}
func TestCharacterSequencing(t *testing.T) {
// Test that characters are returned in sequence from lines
testContent := `abc
def`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
// Get enough characters to likely exhaust at least one line
chars := make([]rune, 10)
for i := 0; i < 10; i++ {
chars[i] = dict.GetRandomCharacter()
}
// Verify we got valid characters
for i, char := range chars {
if !dict.isValidCharacter(char) {
t.Errorf("Character at position %d is invalid: '%c'", i, char)
}
}
// Should contain characters from our test content
validTestChars := map[rune]bool{'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true}
for _, char := range chars {
if !validTestChars[char] {
t.Errorf("Got unexpected character: '%c'", char)
}
}
}
func TestMixedLanguageContent(t *testing.T) {
testContent := `Hello世界This is 测试
English中文Mixed内容with punctuation
Another行with more混合content`
tmpFile := createTestDictFile(t, testContent)
defer os.Remove(tmpFile)
dict, err := NewChineseDict(tmpFile)
if err != nil {
t.Fatalf("NewChineseDict failed: %v", err)
}
// Get many characters and verify they're all valid
chineseCount := 0
englishCount := 0
for i := 0; i < 100; i++ {
char := dict.GetRandomCharacter()
if char == 0 {
t.Error("Got null character")
continue
}
if !dict.isValidCharacter(char) {
t.Errorf("Got invalid character: '%c' (U+%04X)", char, char)
continue
}
if char >= 0x4E00 && char <= 0x9FFF {
chineseCount++
} else if (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') {
englishCount++
}
}
// Should have gotten both Chinese and English characters
if chineseCount == 0 {
t.Error("Expected some Chinese characters, got none")
}
if englishCount == 0 {
t.Error("Expected some English characters, got none")
}
t.Logf("Got %d Chinese and %d English characters", chineseCount, englishCount)
}