新增中文词典测试文件,包含多个单元测试以验证ChineseDict结构体的功能,包括随机字符获取、行数和字符数统计、有效字符检查等。同时更新dict.go以优化文件读取和字符处理逻辑。
This commit is contained in:
parent
a214b58ac6
commit
7f1f9eb1f8
134
dict.go
134
dict.go
@ -1,60 +1,138 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ChineseDict represents a dictionary containing Chinese characters
|
||||
// ChineseDict represents a dictionary containing lines of Chinese text
|
||||
type ChineseDict struct {
|
||||
characters []rune
|
||||
lines []string
|
||||
currentLineIndex int
|
||||
currentCharIndex int
|
||||
currentLineRunes []rune
|
||||
}
|
||||
|
||||
// NewChineseDict creates a new ChineseDict instance and loads content from dict.txt
|
||||
// NewChineseDict creates a new ChineseDict instance and loads lines from dict.txt
|
||||
func NewChineseDict(filePath string) (*ChineseDict, error) {
|
||||
// Read the file content
|
||||
content, err := os.ReadFile(filePath)
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Convert bytes to string and then to runes to properly handle Chinese characters
|
||||
text := string(content)
|
||||
runes := []rune(text)
|
||||
|
||||
return &ChineseDict{
|
||||
characters: runes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetRandomCharacter returns a random Chinese character from the dictionary
|
||||
func (cd *ChineseDict) GetRandomCharacter() rune {
|
||||
if len(cd.characters) == 0 {
|
||||
return 0 // Return null rune if no characters available
|
||||
var lines []string
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line != "" { // Skip empty lines
|
||||
lines = append(lines, line)
|
||||
}
|
||||
}
|
||||
|
||||
// Get random index
|
||||
randomIndex := rand.Intn(len(cd.characters))
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cd.characters[randomIndex]
|
||||
cd := &ChineseDict{
|
||||
lines: lines,
|
||||
currentLineIndex: -1, // Will be set when first character is requested
|
||||
currentCharIndex: 0,
|
||||
}
|
||||
|
||||
return cd, nil
|
||||
}
|
||||
|
||||
// isValidCharacter checks if a rune is a Chinese character or English letter
|
||||
func (cd *ChineseDict) isValidCharacter(r rune) bool {
|
||||
// Check if it's a Chinese character (CJK Unified Ideographs)
|
||||
if r >= 0x4E00 && r <= 0x9FFF {
|
||||
return true
|
||||
}
|
||||
// Check if it's an English letter
|
||||
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// selectNewRandomLine selects a new random line and resets character index
|
||||
func (cd *ChineseDict) selectNewRandomLine() {
|
||||
if len(cd.lines) == 0 {
|
||||
return
|
||||
}
|
||||
cd.currentLineIndex = rand.Intn(len(cd.lines))
|
||||
cd.currentLineRunes = []rune(cd.lines[cd.currentLineIndex])
|
||||
cd.currentCharIndex = 0
|
||||
}
|
||||
|
||||
// GetRandomCharacter returns Chinese characters or English words from random lines
|
||||
// It picks a random line and iterates through characters, skipping punctuation
|
||||
func (cd *ChineseDict) GetRandomCharacter() rune {
|
||||
if len(cd.lines) == 0 {
|
||||
return 0 // Return null rune if no lines available
|
||||
}
|
||||
|
||||
// Keep track of attempts to avoid infinite recursion
|
||||
maxAttempts := len(cd.lines) * 2 // Try each line at least twice
|
||||
attempts := 0
|
||||
|
||||
for attempts < maxAttempts {
|
||||
// If this is the first call or we've reached the end of current line, select a new line
|
||||
if cd.currentLineIndex == -1 || cd.currentCharIndex >= len(cd.currentLineRunes) {
|
||||
cd.selectNewRandomLine()
|
||||
}
|
||||
|
||||
// Find the next valid character in the current line
|
||||
for cd.currentCharIndex < len(cd.currentLineRunes) {
|
||||
currentChar := cd.currentLineRunes[cd.currentCharIndex]
|
||||
cd.currentCharIndex++
|
||||
|
||||
if cd.isValidCharacter(currentChar) {
|
||||
return currentChar
|
||||
}
|
||||
}
|
||||
|
||||
// If we've exhausted the current line without finding a valid character,
|
||||
// mark it for retry and continue
|
||||
attempts++
|
||||
cd.currentCharIndex = len(cd.currentLineRunes) // Force line selection on next iteration
|
||||
}
|
||||
|
||||
// If we've tried all lines multiple times and found no valid characters, return null
|
||||
return 0
|
||||
}
|
||||
|
||||
// GetRandomString returns a string of random Chinese characters with specified length
|
||||
func (cd *ChineseDict) GetRandomString(length int) string {
|
||||
if len(cd.characters) == 0 || length <= 0 {
|
||||
if len(cd.lines) == 0 || length <= 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
result := make([]rune, length)
|
||||
for i := 0; i < length; i++ {
|
||||
randomIndex := rand.Intn(len(cd.characters))
|
||||
result[i] = cd.characters[randomIndex]
|
||||
for i := range length {
|
||||
result[i] = cd.GetRandomCharacter()
|
||||
}
|
||||
|
||||
return string(result)
|
||||
}
|
||||
|
||||
// GetCharacterCount returns the total number of Chinese characters in the dictionary
|
||||
func (cd *ChineseDict) GetCharacterCount() int {
|
||||
return len(cd.characters)
|
||||
// GetLineCount returns the total number of lines in the dictionary
|
||||
func (cd *ChineseDict) GetLineCount() int {
|
||||
return len(cd.lines)
|
||||
}
|
||||
|
||||
// GetCharacterCount returns the total number of valid characters in the dictionary
|
||||
func (cd *ChineseDict) GetCharacterCount() int {
|
||||
count := 0
|
||||
for _, line := range cd.lines {
|
||||
for _, r := range line {
|
||||
if cd.isValidCharacter(r) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
426
dict_test.go
Normal file
426
dict_test.go
Normal file
@ -0,0 +1,426 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// createTestDictFile creates a temporary dict file for testing
|
||||
func createTestDictFile(t *testing.T, content string) string {
|
||||
tmpFile, err := os.CreateTemp("", "test_dict_*.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp file: %v", err)
|
||||
}
|
||||
|
||||
_, err = tmpFile.WriteString(content)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to write to temp file: %v", err)
|
||||
}
|
||||
|
||||
tmpFile.Close()
|
||||
return tmpFile.Name()
|
||||
}
|
||||
|
||||
func TestDictE2E(t *testing.T) {
|
||||
dict, err := NewChineseDict("dict.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
for range 100 {
|
||||
char := dict.GetRandomCharacter()
|
||||
fmt.Printf("%c", char)
|
||||
if char == 0 {
|
||||
t.Error("GetRandomCharacter returned null rune")
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
count := dict.GetCharacterCount()
|
||||
if count == 0 {
|
||||
t.Error("GetCharacterCount returned 0")
|
||||
}
|
||||
|
||||
count = dict.GetLineCount()
|
||||
if count == 0 {
|
||||
t.Error("GetLineCount returned 0")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewChineseDict(t *testing.T) {
|
||||
testContent := `大家好大家好,家人们晚上好
|
||||
先点点关注不迷路,点个小红心
|
||||
Hello world test 测试`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
if dict == nil {
|
||||
t.Fatal("NewChineseDict returned nil dictionary")
|
||||
}
|
||||
|
||||
expectedLines := 3
|
||||
if len(dict.lines) != expectedLines {
|
||||
t.Errorf("Expected %d lines, got %d", expectedLines, len(dict.lines))
|
||||
}
|
||||
|
||||
// Check initial state
|
||||
if dict.currentLineIndex != -1 {
|
||||
t.Errorf("Expected currentLineIndex to be -1, got %d", dict.currentLineIndex)
|
||||
}
|
||||
|
||||
if dict.currentCharIndex != 0 {
|
||||
t.Errorf("Expected currentCharIndex to be 0, got %d", dict.currentCharIndex)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewChineseDictWithEmptyLines(t *testing.T) {
|
||||
testContent := `大家好大家好
|
||||
|
||||
先点点关注不迷路
|
||||
|
||||
Hello world test`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
expectedLines := 3 // Empty lines should be filtered out
|
||||
if len(dict.lines) != expectedLines {
|
||||
t.Errorf("Expected %d lines (empty lines filtered), got %d", expectedLines, len(dict.lines))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewChineseDictFileNotFound(t *testing.T) {
|
||||
_, err := NewChineseDict("nonexistent_file.txt")
|
||||
if err == nil {
|
||||
t.Error("Expected error for nonexistent file, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsValidCharacter(t *testing.T) {
|
||||
dict := &ChineseDict{}
|
||||
|
||||
testCases := []struct {
|
||||
char rune
|
||||
expected bool
|
||||
desc string
|
||||
}{
|
||||
{'大', true, "Chinese character"},
|
||||
{'家', true, "Chinese character"},
|
||||
{'好', true, "Chinese character"},
|
||||
{'a', true, "English lowercase"},
|
||||
{'Z', true, "English uppercase"},
|
||||
{'H', true, "English uppercase"},
|
||||
{',', false, "Comma punctuation"},
|
||||
{',', false, "Chinese comma"},
|
||||
{' ', false, "Space"},
|
||||
{'!', false, "Exclamation mark"},
|
||||
{'。', false, "Chinese period"},
|
||||
{'1', false, "Number"},
|
||||
{'@', false, "Symbol"},
|
||||
{'\n', false, "Newline"},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
result := dict.isValidCharacter(tc.char)
|
||||
if result != tc.expected {
|
||||
t.Errorf("isValidCharacter('%c') for %s: expected %v, got %v",
|
||||
tc.char, tc.desc, tc.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRandomCharacter(t *testing.T) {
|
||||
testContent := `大家好abc
|
||||
Hello世界
|
||||
测试Test`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
// Test getting characters
|
||||
validChars := make(map[rune]bool)
|
||||
for i := 0; i < 50; i++ { // Get multiple characters to test randomness
|
||||
char := dict.GetRandomCharacter()
|
||||
if char == 0 {
|
||||
t.Error("GetRandomCharacter returned null rune")
|
||||
}
|
||||
if !dict.isValidCharacter(char) {
|
||||
t.Errorf("GetRandomCharacter returned invalid character: '%c'", char)
|
||||
}
|
||||
validChars[char] = true
|
||||
}
|
||||
|
||||
// Should have gotten some variety of characters
|
||||
if len(validChars) < 5 {
|
||||
t.Errorf("Expected more variety in characters, got only %d unique chars", len(validChars))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRandomCharacterEmptyDict(t *testing.T) {
|
||||
dict := &ChineseDict{lines: []string{}}
|
||||
|
||||
char := dict.GetRandomCharacter()
|
||||
if char != 0 {
|
||||
t.Errorf("Expected null rune for empty dict, got '%c'", char)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRandomCharacterOnlyPunctuation(t *testing.T) {
|
||||
testContent := `,。!?
|
||||
!@#$%
|
||||
,,,。。。`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
// This should handle lines with only punctuation gracefully
|
||||
// The function should try all lines and return null when no valid characters found
|
||||
char := dict.GetRandomCharacter()
|
||||
if char != 0 {
|
||||
t.Errorf("Expected null rune for punctuation-only dict, got '%c'", char)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRandomString(t *testing.T) {
|
||||
testContent := `大家好abc
|
||||
Hello世界
|
||||
测试Test`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
// Test different lengths
|
||||
testLengths := []int{1, 5, 10, 20}
|
||||
|
||||
for _, length := range testLengths {
|
||||
result := dict.GetRandomString(length)
|
||||
runes := []rune(result)
|
||||
|
||||
if len(runes) != length {
|
||||
t.Errorf("GetRandomString(%d): expected length %d, got %d",
|
||||
length, length, len(runes))
|
||||
}
|
||||
|
||||
// Verify all characters are valid
|
||||
for _, r := range runes {
|
||||
if !dict.isValidCharacter(r) {
|
||||
t.Errorf("GetRandomString returned invalid character: '%c'", r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRandomStringZeroLength(t *testing.T) {
|
||||
testContent := `大家好`
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
result := dict.GetRandomString(0)
|
||||
if result != "" {
|
||||
t.Errorf("Expected empty string for length 0, got '%s'", result)
|
||||
}
|
||||
|
||||
result = dict.GetRandomString(-1)
|
||||
if result != "" {
|
||||
t.Errorf("Expected empty string for negative length, got '%s'", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRandomStringEmptyDict(t *testing.T) {
|
||||
dict := &ChineseDict{lines: []string{}}
|
||||
|
||||
result := dict.GetRandomString(5)
|
||||
if result != "" {
|
||||
t.Errorf("Expected empty string for empty dict, got '%s'", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetLineCount(t *testing.T) {
|
||||
testContent := `Line 1
|
||||
Line 2
|
||||
Line 3
|
||||
|
||||
Line 5`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
expectedCount := 4 // Empty line should be filtered out
|
||||
count := dict.GetLineCount()
|
||||
if count != expectedCount {
|
||||
t.Errorf("Expected line count %d, got %d", expectedCount, count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetCharacterCount(t *testing.T) {
|
||||
testContent := `大家好abc!
|
||||
Hello世界,
|
||||
测试Test。`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
count := dict.GetCharacterCount()
|
||||
|
||||
// Count manually: 大家好abc + Hello世界 + 测试Test = 3+3 + 5+2 + 2+4 = 19 valid chars
|
||||
expectedCount := 19
|
||||
if count != expectedCount {
|
||||
t.Errorf("Expected character count %d, got %d", expectedCount, count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectNewRandomLine(t *testing.T) {
|
||||
testContent := `Line 1
|
||||
Line 2
|
||||
Line 3`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
// Test multiple selections to ensure randomness
|
||||
selectedLines := make(map[int]bool)
|
||||
for i := 0; i < 20; i++ {
|
||||
dict.selectNewRandomLine()
|
||||
if dict.currentLineIndex < 0 || dict.currentLineIndex >= len(dict.lines) {
|
||||
t.Errorf("selectNewRandomLine set invalid index: %d", dict.currentLineIndex)
|
||||
}
|
||||
if dict.currentCharIndex != 0 {
|
||||
t.Errorf("selectNewRandomLine should reset currentCharIndex to 0, got %d", dict.currentCharIndex)
|
||||
}
|
||||
selectedLines[dict.currentLineIndex] = true
|
||||
}
|
||||
|
||||
// Should have selected different lines (with high probability)
|
||||
if len(selectedLines) < 2 {
|
||||
t.Error("selectNewRandomLine appears to not be random enough")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCharacterSequencing(t *testing.T) {
|
||||
// Test that characters are returned in sequence from lines
|
||||
testContent := `abc
|
||||
def`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
// Get enough characters to likely exhaust at least one line
|
||||
chars := make([]rune, 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
chars[i] = dict.GetRandomCharacter()
|
||||
}
|
||||
|
||||
// Verify we got valid characters
|
||||
for i, char := range chars {
|
||||
if !dict.isValidCharacter(char) {
|
||||
t.Errorf("Character at position %d is invalid: '%c'", i, char)
|
||||
}
|
||||
}
|
||||
|
||||
// Should contain characters from our test content
|
||||
validTestChars := map[rune]bool{'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true}
|
||||
for _, char := range chars {
|
||||
if !validTestChars[char] {
|
||||
t.Errorf("Got unexpected character: '%c'", char)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMixedLanguageContent(t *testing.T) {
|
||||
testContent := `Hello世界!This is 测试。
|
||||
English中文Mixed内容,with punctuation!
|
||||
Another行with more混合content。`
|
||||
|
||||
tmpFile := createTestDictFile(t, testContent)
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
dict, err := NewChineseDict(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("NewChineseDict failed: %v", err)
|
||||
}
|
||||
|
||||
// Get many characters and verify they're all valid
|
||||
chineseCount := 0
|
||||
englishCount := 0
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
char := dict.GetRandomCharacter()
|
||||
if char == 0 {
|
||||
t.Error("Got null character")
|
||||
continue
|
||||
}
|
||||
|
||||
if !dict.isValidCharacter(char) {
|
||||
t.Errorf("Got invalid character: '%c' (U+%04X)", char, char)
|
||||
continue
|
||||
}
|
||||
|
||||
if char >= 0x4E00 && char <= 0x9FFF {
|
||||
chineseCount++
|
||||
} else if (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') {
|
||||
englishCount++
|
||||
}
|
||||
}
|
||||
|
||||
// Should have gotten both Chinese and English characters
|
||||
if chineseCount == 0 {
|
||||
t.Error("Expected some Chinese characters, got none")
|
||||
}
|
||||
if englishCount == 0 {
|
||||
t.Error("Expected some English characters, got none")
|
||||
}
|
||||
|
||||
t.Logf("Got %d Chinese and %d English characters", chineseCount, englishCount)
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user