152 lines
3.9 KiB
Go
152 lines
3.9 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"math/rand"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
// ChineseDict represents a dictionary containing lines of Chinese text
|
|
type ChineseDict struct {
|
|
lines []string
|
|
currentLineIndex int
|
|
currentCharIndex int
|
|
currentLineRunes []rune
|
|
}
|
|
|
|
// NewChineseDict creates a new ChineseDict instance and loads lines from dict.txt
|
|
func NewChineseDict(filePath string) (*ChineseDict, error) {
|
|
file, err := os.Open(filePath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer file.Close()
|
|
|
|
var lines []string
|
|
scanner := bufio.NewScanner(file)
|
|
buf := make([]byte, 0, bufio.MaxScanTokenSize)
|
|
scanner.Buffer(buf, 5*1024*1024) // 5MB buffer to handle long lines
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line != "" { // Skip empty lines
|
|
lines = append(lines, line)
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cd := &ChineseDict{
|
|
lines: lines,
|
|
currentLineIndex: -1, // Will be set when first character is requested
|
|
currentCharIndex: 0,
|
|
}
|
|
|
|
return cd, nil
|
|
}
|
|
|
|
// isValidCharacter checks if a rune is a Chinese character or English letter
|
|
func (cd *ChineseDict) isValidCharacter(r rune) bool {
|
|
// Check if it's a Chinese character (CJK Unified Ideographs)
|
|
if r >= 0x4E00 && r <= 0x9FFF {
|
|
return true
|
|
}
|
|
// Check if it's an English letter
|
|
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// selectNewRandomLine selects a new random line and resets character index
|
|
func (cd *ChineseDict) selectNewRandomLine() {
|
|
if len(cd.lines) == 0 {
|
|
return
|
|
}
|
|
cd.currentLineIndex = rand.Intn(len(cd.lines))
|
|
cd.currentLineRunes = []rune(cd.lines[cd.currentLineIndex])
|
|
cd.currentCharIndex = 0
|
|
}
|
|
|
|
// GetRandomCharacter returns Chinese characters or English words from random lines
|
|
// It picks a random line and iterates through characters, skipping punctuation
|
|
func (cd *ChineseDict) GetRandomCharacter() rune {
|
|
if len(cd.lines) == 0 {
|
|
return 0 // Return null rune if no lines available
|
|
}
|
|
|
|
if len(cd.lines) == 1 {
|
|
if cd.currentLineIndex == -1 {
|
|
cd.selectNewRandomLine()
|
|
}
|
|
// Get random index
|
|
randomIndex := rand.Intn(len(cd.currentLineRunes))
|
|
cd.currentCharIndex = randomIndex + 1 // Move index forward for next call, align with multiline version for testing
|
|
|
|
return cd.currentLineRunes[randomIndex]
|
|
}
|
|
|
|
// Keep track of attempts to avoid infinite recursion
|
|
maxAttempts := len(cd.lines) * 2 // Try each line at least twice
|
|
attempts := 0
|
|
|
|
for attempts < maxAttempts {
|
|
// If this is the first call or we've reached the end of current line, select a new line
|
|
if cd.currentLineIndex == -1 || cd.currentCharIndex >= len(cd.currentLineRunes) {
|
|
cd.selectNewRandomLine()
|
|
}
|
|
|
|
// Find the next valid character in the current line
|
|
for cd.currentCharIndex < len(cd.currentLineRunes) {
|
|
currentChar := cd.currentLineRunes[cd.currentCharIndex]
|
|
cd.currentCharIndex++
|
|
|
|
if cd.isValidCharacter(currentChar) {
|
|
return currentChar
|
|
}
|
|
}
|
|
|
|
// If we've exhausted the current line without finding a valid character,
|
|
// mark it for retry and continue
|
|
attempts++
|
|
cd.currentCharIndex = len(cd.currentLineRunes) // Force line selection on next iteration
|
|
}
|
|
|
|
// If we've tried all lines multiple times and found no valid characters, return null
|
|
return 0
|
|
}
|
|
|
|
// GetRandomString returns a string of random Chinese characters with specified length
|
|
func (cd *ChineseDict) GetRandomString(length int) string {
|
|
if len(cd.lines) == 0 || length <= 0 {
|
|
return ""
|
|
}
|
|
|
|
result := make([]rune, length)
|
|
for i := range length {
|
|
result[i] = cd.GetRandomCharacter()
|
|
}
|
|
|
|
return string(result)
|
|
}
|
|
|
|
// GetLineCount returns the total number of lines in the dictionary
|
|
func (cd *ChineseDict) GetLineCount() int {
|
|
return len(cd.lines)
|
|
}
|
|
|
|
// GetCharacterCount returns the total number of valid characters in the dictionary
|
|
func (cd *ChineseDict) GetCharacterCount() int {
|
|
count := 0
|
|
for _, line := range cd.lines {
|
|
for _, r := range line {
|
|
if cd.isValidCharacter(r) {
|
|
count++
|
|
}
|
|
}
|
|
}
|
|
return count
|
|
}
|