package main import ( "bufio" "math/rand" "os" "strings" ) // ChineseDict represents a dictionary containing lines of Chinese text type ChineseDict struct { lines []string currentLineIndex int currentCharIndex int currentLineRunes []rune } // NewChineseDict creates a new ChineseDict instance and loads lines from dict.txt func NewChineseDict(filePath string) (*ChineseDict, error) { file, err := os.Open(filePath) if err != nil { return nil, err } defer file.Close() var lines []string scanner := bufio.NewScanner(file) buf := make([]byte, 0, bufio.MaxScanTokenSize) scanner.Buffer(buf, 5*1024*1024) // 5MB buffer to handle long lines for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line != "" { // Skip empty lines lines = append(lines, line) } } if err := scanner.Err(); err != nil { return nil, err } cd := &ChineseDict{ lines: lines, currentLineIndex: -1, // Will be set when first character is requested currentCharIndex: 0, } return cd, nil } // isValidCharacter checks if a rune is a Chinese character or English letter func (cd *ChineseDict) isValidCharacter(r rune) bool { // Check if it's a Chinese character (CJK Unified Ideographs) if r >= 0x4E00 && r <= 0x9FFF { return true } // Check if it's an English letter if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') { return true } return false } // selectNewRandomLine selects a new random line and resets character index func (cd *ChineseDict) selectNewRandomLine() { if len(cd.lines) == 0 { return } cd.currentLineIndex = rand.Intn(len(cd.lines)) cd.currentLineRunes = []rune(cd.lines[cd.currentLineIndex]) cd.currentCharIndex = 0 } // GetRandomCharacter returns Chinese characters or English words from random lines // It picks a random line and iterates through characters, skipping punctuation func (cd *ChineseDict) GetRandomCharacter() rune { if len(cd.lines) == 0 { return 0 // Return null rune if no lines available } if len(cd.lines) == 1 { if cd.currentLineIndex == -1 { cd.selectNewRandomLine() } // Get random index randomIndex := rand.Intn(len(cd.currentLineRunes)) cd.currentCharIndex = randomIndex + 1 // Move index forward for next call, align with multiline version for testing return cd.currentLineRunes[randomIndex] } // Keep track of attempts to avoid infinite recursion maxAttempts := len(cd.lines) * 2 // Try each line at least twice attempts := 0 for attempts < maxAttempts { // If this is the first call or we've reached the end of current line, select a new line if cd.currentLineIndex == -1 || cd.currentCharIndex >= len(cd.currentLineRunes) { cd.selectNewRandomLine() } // Find the next valid character in the current line for cd.currentCharIndex < len(cd.currentLineRunes) { currentChar := cd.currentLineRunes[cd.currentCharIndex] cd.currentCharIndex++ if cd.isValidCharacter(currentChar) { return currentChar } } // If we've exhausted the current line without finding a valid character, // mark it for retry and continue attempts++ cd.currentCharIndex = len(cd.currentLineRunes) // Force line selection on next iteration } // If we've tried all lines multiple times and found no valid characters, return null return 0 } // GetRandomString returns a string of random Chinese characters with specified length func (cd *ChineseDict) GetRandomString(length int) string { if len(cd.lines) == 0 || length <= 0 { return "" } result := make([]rune, length) for i := range length { result[i] = cd.GetRandomCharacter() } return string(result) } // GetLineCount returns the total number of lines in the dictionary func (cd *ChineseDict) GetLineCount() int { return len(cd.lines) } // GetCharacterCount returns the total number of valid characters in the dictionary func (cd *ChineseDict) GetCharacterCount() int { count := 0 for _, line := range cd.lines { for _, r := range line { if cd.isValidCharacter(r) { count++ } } } return count }