新增中文词典测试文件,包含多个单元测试以验证ChineseDict结构体的功能,包括随机字符获取、行数和字符数统计、有效字符检查等。同时更新dict.go以优化文件读取和字符处理逻辑。
This commit is contained in:
parent
a214b58ac6
commit
7f1f9eb1f8
134
dict.go
134
dict.go
@ -1,60 +1,138 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ChineseDict represents a dictionary containing Chinese characters
|
// ChineseDict represents a dictionary containing lines of Chinese text
|
||||||
type ChineseDict struct {
|
type ChineseDict struct {
|
||||||
characters []rune
|
lines []string
|
||||||
|
currentLineIndex int
|
||||||
|
currentCharIndex int
|
||||||
|
currentLineRunes []rune
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewChineseDict creates a new ChineseDict instance and loads content from dict.txt
|
// NewChineseDict creates a new ChineseDict instance and loads lines from dict.txt
|
||||||
func NewChineseDict(filePath string) (*ChineseDict, error) {
|
func NewChineseDict(filePath string) (*ChineseDict, error) {
|
||||||
// Read the file content
|
file, err := os.Open(filePath)
|
||||||
content, err := os.ReadFile(filePath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
// Convert bytes to string and then to runes to properly handle Chinese characters
|
var lines []string
|
||||||
text := string(content)
|
scanner := bufio.NewScanner(file)
|
||||||
runes := []rune(text)
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
return &ChineseDict{
|
if line != "" { // Skip empty lines
|
||||||
characters: runes,
|
lines = append(lines, line)
|
||||||
}, nil
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// GetRandomCharacter returns a random Chinese character from the dictionary
|
|
||||||
func (cd *ChineseDict) GetRandomCharacter() rune {
|
|
||||||
if len(cd.characters) == 0 {
|
|
||||||
return 0 // Return null rune if no characters available
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get random index
|
if err := scanner.Err(); err != nil {
|
||||||
randomIndex := rand.Intn(len(cd.characters))
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
return cd.characters[randomIndex]
|
cd := &ChineseDict{
|
||||||
|
lines: lines,
|
||||||
|
currentLineIndex: -1, // Will be set when first character is requested
|
||||||
|
currentCharIndex: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
return cd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isValidCharacter checks if a rune is a Chinese character or English letter
|
||||||
|
func (cd *ChineseDict) isValidCharacter(r rune) bool {
|
||||||
|
// Check if it's a Chinese character (CJK Unified Ideographs)
|
||||||
|
if r >= 0x4E00 && r <= 0x9FFF {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Check if it's an English letter
|
||||||
|
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// selectNewRandomLine selects a new random line and resets character index
|
||||||
|
func (cd *ChineseDict) selectNewRandomLine() {
|
||||||
|
if len(cd.lines) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cd.currentLineIndex = rand.Intn(len(cd.lines))
|
||||||
|
cd.currentLineRunes = []rune(cd.lines[cd.currentLineIndex])
|
||||||
|
cd.currentCharIndex = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRandomCharacter returns Chinese characters or English words from random lines
|
||||||
|
// It picks a random line and iterates through characters, skipping punctuation
|
||||||
|
func (cd *ChineseDict) GetRandomCharacter() rune {
|
||||||
|
if len(cd.lines) == 0 {
|
||||||
|
return 0 // Return null rune if no lines available
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep track of attempts to avoid infinite recursion
|
||||||
|
maxAttempts := len(cd.lines) * 2 // Try each line at least twice
|
||||||
|
attempts := 0
|
||||||
|
|
||||||
|
for attempts < maxAttempts {
|
||||||
|
// If this is the first call or we've reached the end of current line, select a new line
|
||||||
|
if cd.currentLineIndex == -1 || cd.currentCharIndex >= len(cd.currentLineRunes) {
|
||||||
|
cd.selectNewRandomLine()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the next valid character in the current line
|
||||||
|
for cd.currentCharIndex < len(cd.currentLineRunes) {
|
||||||
|
currentChar := cd.currentLineRunes[cd.currentCharIndex]
|
||||||
|
cd.currentCharIndex++
|
||||||
|
|
||||||
|
if cd.isValidCharacter(currentChar) {
|
||||||
|
return currentChar
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've exhausted the current line without finding a valid character,
|
||||||
|
// mark it for retry and continue
|
||||||
|
attempts++
|
||||||
|
cd.currentCharIndex = len(cd.currentLineRunes) // Force line selection on next iteration
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've tried all lines multiple times and found no valid characters, return null
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetRandomString returns a string of random Chinese characters with specified length
|
// GetRandomString returns a string of random Chinese characters with specified length
|
||||||
func (cd *ChineseDict) GetRandomString(length int) string {
|
func (cd *ChineseDict) GetRandomString(length int) string {
|
||||||
if len(cd.characters) == 0 || length <= 0 {
|
if len(cd.lines) == 0 || length <= 0 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make([]rune, length)
|
result := make([]rune, length)
|
||||||
for i := 0; i < length; i++ {
|
for i := range length {
|
||||||
randomIndex := rand.Intn(len(cd.characters))
|
result[i] = cd.GetRandomCharacter()
|
||||||
result[i] = cd.characters[randomIndex]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return string(result)
|
return string(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetCharacterCount returns the total number of Chinese characters in the dictionary
|
// GetLineCount returns the total number of lines in the dictionary
|
||||||
func (cd *ChineseDict) GetCharacterCount() int {
|
func (cd *ChineseDict) GetLineCount() int {
|
||||||
return len(cd.characters)
|
return len(cd.lines)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCharacterCount returns the total number of valid characters in the dictionary
|
||||||
|
func (cd *ChineseDict) GetCharacterCount() int {
|
||||||
|
count := 0
|
||||||
|
for _, line := range cd.lines {
|
||||||
|
for _, r := range line {
|
||||||
|
if cd.isValidCharacter(r) {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count
|
||||||
}
|
}
|
||||||
|
|||||||
426
dict_test.go
Normal file
426
dict_test.go
Normal file
@ -0,0 +1,426 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// createTestDictFile creates a temporary dict file for testing
|
||||||
|
func createTestDictFile(t *testing.T, content string) string {
|
||||||
|
tmpFile, err := os.CreateTemp("", "test_dict_*.txt")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to create temp file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = tmpFile.WriteString(content)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to write to temp file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpFile.Close()
|
||||||
|
return tmpFile.Name()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDictE2E(t *testing.T) {
|
||||||
|
dict, err := NewChineseDict("dict.txt")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for range 100 {
|
||||||
|
char := dict.GetRandomCharacter()
|
||||||
|
fmt.Printf("%c", char)
|
||||||
|
if char == 0 {
|
||||||
|
t.Error("GetRandomCharacter returned null rune")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
|
||||||
|
count := dict.GetCharacterCount()
|
||||||
|
if count == 0 {
|
||||||
|
t.Error("GetCharacterCount returned 0")
|
||||||
|
}
|
||||||
|
|
||||||
|
count = dict.GetLineCount()
|
||||||
|
if count == 0 {
|
||||||
|
t.Error("GetLineCount returned 0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewChineseDict(t *testing.T) {
|
||||||
|
testContent := `大家好大家好,家人们晚上好
|
||||||
|
先点点关注不迷路,点个小红心
|
||||||
|
Hello world test 测试`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if dict == nil {
|
||||||
|
t.Fatal("NewChineseDict returned nil dictionary")
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedLines := 3
|
||||||
|
if len(dict.lines) != expectedLines {
|
||||||
|
t.Errorf("Expected %d lines, got %d", expectedLines, len(dict.lines))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check initial state
|
||||||
|
if dict.currentLineIndex != -1 {
|
||||||
|
t.Errorf("Expected currentLineIndex to be -1, got %d", dict.currentLineIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
if dict.currentCharIndex != 0 {
|
||||||
|
t.Errorf("Expected currentCharIndex to be 0, got %d", dict.currentCharIndex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewChineseDictWithEmptyLines(t *testing.T) {
|
||||||
|
testContent := `大家好大家好
|
||||||
|
|
||||||
|
先点点关注不迷路
|
||||||
|
|
||||||
|
Hello world test`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedLines := 3 // Empty lines should be filtered out
|
||||||
|
if len(dict.lines) != expectedLines {
|
||||||
|
t.Errorf("Expected %d lines (empty lines filtered), got %d", expectedLines, len(dict.lines))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewChineseDictFileNotFound(t *testing.T) {
|
||||||
|
_, err := NewChineseDict("nonexistent_file.txt")
|
||||||
|
if err == nil {
|
||||||
|
t.Error("Expected error for nonexistent file, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsValidCharacter(t *testing.T) {
|
||||||
|
dict := &ChineseDict{}
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
char rune
|
||||||
|
expected bool
|
||||||
|
desc string
|
||||||
|
}{
|
||||||
|
{'大', true, "Chinese character"},
|
||||||
|
{'家', true, "Chinese character"},
|
||||||
|
{'好', true, "Chinese character"},
|
||||||
|
{'a', true, "English lowercase"},
|
||||||
|
{'Z', true, "English uppercase"},
|
||||||
|
{'H', true, "English uppercase"},
|
||||||
|
{',', false, "Comma punctuation"},
|
||||||
|
{',', false, "Chinese comma"},
|
||||||
|
{' ', false, "Space"},
|
||||||
|
{'!', false, "Exclamation mark"},
|
||||||
|
{'。', false, "Chinese period"},
|
||||||
|
{'1', false, "Number"},
|
||||||
|
{'@', false, "Symbol"},
|
||||||
|
{'\n', false, "Newline"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
result := dict.isValidCharacter(tc.char)
|
||||||
|
if result != tc.expected {
|
||||||
|
t.Errorf("isValidCharacter('%c') for %s: expected %v, got %v",
|
||||||
|
tc.char, tc.desc, tc.expected, result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetRandomCharacter(t *testing.T) {
|
||||||
|
testContent := `大家好abc
|
||||||
|
Hello世界
|
||||||
|
测试Test`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test getting characters
|
||||||
|
validChars := make(map[rune]bool)
|
||||||
|
for i := 0; i < 50; i++ { // Get multiple characters to test randomness
|
||||||
|
char := dict.GetRandomCharacter()
|
||||||
|
if char == 0 {
|
||||||
|
t.Error("GetRandomCharacter returned null rune")
|
||||||
|
}
|
||||||
|
if !dict.isValidCharacter(char) {
|
||||||
|
t.Errorf("GetRandomCharacter returned invalid character: '%c'", char)
|
||||||
|
}
|
||||||
|
validChars[char] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should have gotten some variety of characters
|
||||||
|
if len(validChars) < 5 {
|
||||||
|
t.Errorf("Expected more variety in characters, got only %d unique chars", len(validChars))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetRandomCharacterEmptyDict(t *testing.T) {
|
||||||
|
dict := &ChineseDict{lines: []string{}}
|
||||||
|
|
||||||
|
char := dict.GetRandomCharacter()
|
||||||
|
if char != 0 {
|
||||||
|
t.Errorf("Expected null rune for empty dict, got '%c'", char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetRandomCharacterOnlyPunctuation(t *testing.T) {
|
||||||
|
testContent := `,。!?
|
||||||
|
!@#$%
|
||||||
|
,,,。。。`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should handle lines with only punctuation gracefully
|
||||||
|
// The function should try all lines and return null when no valid characters found
|
||||||
|
char := dict.GetRandomCharacter()
|
||||||
|
if char != 0 {
|
||||||
|
t.Errorf("Expected null rune for punctuation-only dict, got '%c'", char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetRandomString(t *testing.T) {
|
||||||
|
testContent := `大家好abc
|
||||||
|
Hello世界
|
||||||
|
测试Test`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test different lengths
|
||||||
|
testLengths := []int{1, 5, 10, 20}
|
||||||
|
|
||||||
|
for _, length := range testLengths {
|
||||||
|
result := dict.GetRandomString(length)
|
||||||
|
runes := []rune(result)
|
||||||
|
|
||||||
|
if len(runes) != length {
|
||||||
|
t.Errorf("GetRandomString(%d): expected length %d, got %d",
|
||||||
|
length, length, len(runes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify all characters are valid
|
||||||
|
for _, r := range runes {
|
||||||
|
if !dict.isValidCharacter(r) {
|
||||||
|
t.Errorf("GetRandomString returned invalid character: '%c'", r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetRandomStringZeroLength(t *testing.T) {
|
||||||
|
testContent := `大家好`
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := dict.GetRandomString(0)
|
||||||
|
if result != "" {
|
||||||
|
t.Errorf("Expected empty string for length 0, got '%s'", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
result = dict.GetRandomString(-1)
|
||||||
|
if result != "" {
|
||||||
|
t.Errorf("Expected empty string for negative length, got '%s'", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetRandomStringEmptyDict(t *testing.T) {
|
||||||
|
dict := &ChineseDict{lines: []string{}}
|
||||||
|
|
||||||
|
result := dict.GetRandomString(5)
|
||||||
|
if result != "" {
|
||||||
|
t.Errorf("Expected empty string for empty dict, got '%s'", result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetLineCount(t *testing.T) {
|
||||||
|
testContent := `Line 1
|
||||||
|
Line 2
|
||||||
|
Line 3
|
||||||
|
|
||||||
|
Line 5`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedCount := 4 // Empty line should be filtered out
|
||||||
|
count := dict.GetLineCount()
|
||||||
|
if count != expectedCount {
|
||||||
|
t.Errorf("Expected line count %d, got %d", expectedCount, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetCharacterCount(t *testing.T) {
|
||||||
|
testContent := `大家好abc!
|
||||||
|
Hello世界,
|
||||||
|
测试Test。`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
count := dict.GetCharacterCount()
|
||||||
|
|
||||||
|
// Count manually: 大家好abc + Hello世界 + 测试Test = 3+3 + 5+2 + 2+4 = 19 valid chars
|
||||||
|
expectedCount := 19
|
||||||
|
if count != expectedCount {
|
||||||
|
t.Errorf("Expected character count %d, got %d", expectedCount, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectNewRandomLine(t *testing.T) {
|
||||||
|
testContent := `Line 1
|
||||||
|
Line 2
|
||||||
|
Line 3`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test multiple selections to ensure randomness
|
||||||
|
selectedLines := make(map[int]bool)
|
||||||
|
for i := 0; i < 20; i++ {
|
||||||
|
dict.selectNewRandomLine()
|
||||||
|
if dict.currentLineIndex < 0 || dict.currentLineIndex >= len(dict.lines) {
|
||||||
|
t.Errorf("selectNewRandomLine set invalid index: %d", dict.currentLineIndex)
|
||||||
|
}
|
||||||
|
if dict.currentCharIndex != 0 {
|
||||||
|
t.Errorf("selectNewRandomLine should reset currentCharIndex to 0, got %d", dict.currentCharIndex)
|
||||||
|
}
|
||||||
|
selectedLines[dict.currentLineIndex] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should have selected different lines (with high probability)
|
||||||
|
if len(selectedLines) < 2 {
|
||||||
|
t.Error("selectNewRandomLine appears to not be random enough")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCharacterSequencing(t *testing.T) {
|
||||||
|
// Test that characters are returned in sequence from lines
|
||||||
|
testContent := `abc
|
||||||
|
def`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get enough characters to likely exhaust at least one line
|
||||||
|
chars := make([]rune, 10)
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
chars[i] = dict.GetRandomCharacter()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify we got valid characters
|
||||||
|
for i, char := range chars {
|
||||||
|
if !dict.isValidCharacter(char) {
|
||||||
|
t.Errorf("Character at position %d is invalid: '%c'", i, char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should contain characters from our test content
|
||||||
|
validTestChars := map[rune]bool{'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true}
|
||||||
|
for _, char := range chars {
|
||||||
|
if !validTestChars[char] {
|
||||||
|
t.Errorf("Got unexpected character: '%c'", char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMixedLanguageContent(t *testing.T) {
|
||||||
|
testContent := `Hello世界!This is 测试。
|
||||||
|
English中文Mixed内容,with punctuation!
|
||||||
|
Another行with more混合content。`
|
||||||
|
|
||||||
|
tmpFile := createTestDictFile(t, testContent)
|
||||||
|
defer os.Remove(tmpFile)
|
||||||
|
|
||||||
|
dict, err := NewChineseDict(tmpFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewChineseDict failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get many characters and verify they're all valid
|
||||||
|
chineseCount := 0
|
||||||
|
englishCount := 0
|
||||||
|
|
||||||
|
for i := 0; i < 100; i++ {
|
||||||
|
char := dict.GetRandomCharacter()
|
||||||
|
if char == 0 {
|
||||||
|
t.Error("Got null character")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if !dict.isValidCharacter(char) {
|
||||||
|
t.Errorf("Got invalid character: '%c' (U+%04X)", char, char)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if char >= 0x4E00 && char <= 0x9FFF {
|
||||||
|
chineseCount++
|
||||||
|
} else if (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') {
|
||||||
|
englishCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should have gotten both Chinese and English characters
|
||||||
|
if chineseCount == 0 {
|
||||||
|
t.Error("Expected some Chinese characters, got none")
|
||||||
|
}
|
||||||
|
if englishCount == 0 {
|
||||||
|
t.Error("Expected some English characters, got none")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("Got %d Chinese and %d English characters", chineseCount, englishCount)
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user