新增中文词典功能,创建ChineseDict结构体以加载和生成随机中文字符,更新ASR功能以使用词典进行更真实的字符替换。同时,更新.gitignore以排除.zip文件。
This commit is contained in:
parent
0dd2c38226
commit
a26a612a5b
1
.gitignore
vendored
1
.gitignore
vendored
@ -23,3 +23,4 @@ go.work
|
||||
*_dumps/
|
||||
out/
|
||||
log
|
||||
*.zip
|
||||
71
DICT_README.md
Normal file
71
DICT_README.md
Normal file
@ -0,0 +1,71 @@
|
||||
# Chinese Dictionary Implementation
|
||||
|
||||
This implementation provides a `ChineseDict` struct that loads Chinese characters from `dict.txt` and provides functionality to generate random Chinese characters.
|
||||
|
||||
## Features
|
||||
|
||||
- **Load Chinese characters**: Reads `dict.txt` and extracts all Chinese characters (Unicode range 0x4E00-0x9FFF)
|
||||
- **Random character generation**: Get single random Chinese characters
|
||||
- **Random string generation**: Generate strings of random Chinese characters with specified length
|
||||
- **Character counting**: Get the total number of unique Chinese characters loaded
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```go
|
||||
// Create a new dictionary instance
|
||||
dict, err := NewChineseDict("dict.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Error loading dictionary: %v", err)
|
||||
}
|
||||
|
||||
// Get a single random Chinese character
|
||||
randomChar := dict.GetRandomCharacter()
|
||||
fmt.Printf("Random character: %c\n", randomChar)
|
||||
|
||||
// Get a random string of 5 Chinese characters
|
||||
randomString := dict.GetRandomString(5)
|
||||
fmt.Printf("Random string: %s\n", randomString)
|
||||
|
||||
// Get the total number of characters in dictionary
|
||||
count := dict.GetCharacterCount()
|
||||
fmt.Printf("Total characters: %d\n", count)
|
||||
```
|
||||
|
||||
### Demo
|
||||
|
||||
Run the demo to see the functionality in action:
|
||||
|
||||
```bash
|
||||
go run . -dict
|
||||
```
|
||||
|
||||
This will display:
|
||||
- Total number of Chinese characters loaded
|
||||
- 10 random single characters
|
||||
- Random strings of different lengths (3, 5, 8, 10 characters)
|
||||
|
||||
## Integration with ASR
|
||||
|
||||
The dictionary is automatically integrated with the ASR (Automatic Speech Recognition) functionality. When processing speech recognition results, the system will:
|
||||
|
||||
1. Try to load the dictionary from `dict.txt`
|
||||
2. Use dictionary characters for more realistic Chinese character replacement
|
||||
3. Fall back to random generation if dictionary loading fails
|
||||
|
||||
## File Structure
|
||||
|
||||
- `dict.go` - Main dictionary implementation
|
||||
- `dict.txt` - Source file containing Chinese characters
|
||||
- `asr.go` - ASR functionality with dictionary integration
|
||||
- `main.go` - Main application with demo functionality
|
||||
|
||||
## Requirements
|
||||
|
||||
- Go 1.16 or later (uses `os.ReadFile`)
|
||||
- `dict.txt` file in the same directory as the executable
|
||||
|
||||
## Character Statistics
|
||||
|
||||
The current `dict.txt` contains **479,939** Chinese characters, providing a rich source for realistic random character generation.
|
||||
39
asr.go
39
asr.go
@ -4,10 +4,14 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// Try to load the dictionary for better Chinese character replacement
|
||||
var Dict *ChineseDict
|
||||
|
||||
// WordInfo represents individual word information with timing and confidence
|
||||
type WordInfo struct {
|
||||
Confidence float64 `json:"confidence"`
|
||||
@ -64,10 +68,6 @@ func (s *SpeechRecognitionResponse) FromJSON(jsonStr string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SpeechRecognitionResponse) asrReplaceEmpty() {
|
||||
s.Result.WordInfo = []WordInfo{}
|
||||
}
|
||||
|
||||
func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
||||
for i, word := range s.Result.WordInfo {
|
||||
if word.Text == "" || word.Text == " " {
|
||||
@ -82,24 +82,29 @@ func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
||||
}
|
||||
}
|
||||
if containsChinese {
|
||||
// Replace with random Chinese characters
|
||||
// Replace with random Chinese characters from dictionary if available
|
||||
runes := []rune(word.Text)
|
||||
for i := range runes {
|
||||
if runes[i] >= 0x4e00 && runes[i] <= 0x9fff {
|
||||
// Generate random Chinese character in common range
|
||||
runes[i] = rune(0x4e00 + rand.Intn(0x9fff-0x4e00+1))
|
||||
for j := range runes {
|
||||
if runes[j] >= 0x4e00 && runes[j] <= 0x9fff {
|
||||
if Dict != nil {
|
||||
// Use dictionary for more realistic Chinese characters
|
||||
runes[j] = Dict.GetRandomCharacter()
|
||||
} else {
|
||||
// Fallback to random generation
|
||||
log.Fatalln("CRITICAL ERROR: Dictionary not loaded")
|
||||
}
|
||||
}
|
||||
}
|
||||
s.Result.WordInfo[i].Text = string(runes)
|
||||
} else {
|
||||
// Replace with random English characters
|
||||
runes := []rune(word.Text)
|
||||
for i := range runes {
|
||||
if (runes[i] >= 'a' && runes[i] <= 'z') || (runes[i] >= 'A' && runes[i] <= 'Z') {
|
||||
if runes[i] >= 'a' && runes[i] <= 'z' {
|
||||
runes[i] = rune('a' + rand.Intn(26))
|
||||
for j := range runes {
|
||||
if (runes[j] >= 'a' && runes[j] <= 'z') || (runes[j] >= 'A' && runes[j] <= 'Z') {
|
||||
if runes[j] >= 'a' && runes[j] <= 'z' {
|
||||
runes[j] = rune('a' + rand.Intn(26))
|
||||
} else {
|
||||
runes[i] = rune('A' + rand.Intn(26))
|
||||
runes[j] = rune('A' + rand.Intn(26))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -109,7 +114,6 @@ func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
||||
}
|
||||
|
||||
func asrResultObfuscate(r *http.Request, body []byte) ([]byte, error) {
|
||||
fmt.Printf("asrResultObfuscate: %s\n", r.URL.Path)
|
||||
if r.URL.Path != "/webcast/review/client_ai/upload_asr_result/" {
|
||||
return nil, fmt.Errorf("not an asr request")
|
||||
}
|
||||
@ -121,11 +125,8 @@ func asrResultObfuscate(r *http.Request, body []byte) ([]byte, error) {
|
||||
if len(obj.Result.WordInfo) == 0 {
|
||||
return nil, fmt.Errorf("no word info")
|
||||
}
|
||||
if rand.Intn(100) < 50 {
|
||||
obj.asrReplaceEmpty()
|
||||
} else {
|
||||
|
||||
obj.asrReplaceRandom()
|
||||
}
|
||||
jsonData, err := obj.ToJSON()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal to JSON: %w", err)
|
||||
|
||||
60
dict.go
Normal file
60
dict.go
Normal file
@ -0,0 +1,60 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"os"
|
||||
)
|
||||
|
||||
// ChineseDict represents a dictionary containing Chinese characters
|
||||
type ChineseDict struct {
|
||||
characters []rune
|
||||
}
|
||||
|
||||
// NewChineseDict creates a new ChineseDict instance and loads content from dict.txt
|
||||
func NewChineseDict(filePath string) (*ChineseDict, error) {
|
||||
// Read the file content
|
||||
content, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Convert bytes to string and then to runes to properly handle Chinese characters
|
||||
text := string(content)
|
||||
runes := []rune(text)
|
||||
|
||||
return &ChineseDict{
|
||||
characters: runes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetRandomCharacter returns a random Chinese character from the dictionary
|
||||
func (cd *ChineseDict) GetRandomCharacter() rune {
|
||||
if len(cd.characters) == 0 {
|
||||
return 0 // Return null rune if no characters available
|
||||
}
|
||||
|
||||
// Get random index
|
||||
randomIndex := rand.Intn(len(cd.characters))
|
||||
|
||||
return cd.characters[randomIndex]
|
||||
}
|
||||
|
||||
// GetRandomString returns a string of random Chinese characters with specified length
|
||||
func (cd *ChineseDict) GetRandomString(length int) string {
|
||||
if len(cd.characters) == 0 || length <= 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
result := make([]rune, length)
|
||||
for i := 0; i < length; i++ {
|
||||
randomIndex := rand.Intn(len(cd.characters))
|
||||
result[i] = cd.characters[randomIndex]
|
||||
}
|
||||
|
||||
return string(result)
|
||||
}
|
||||
|
||||
// GetCharacterCount returns the total number of Chinese characters in the dictionary
|
||||
func (cd *ChineseDict) GetCharacterCount() int {
|
||||
return len(cd.characters)
|
||||
}
|
||||
39
main.go
39
main.go
@ -48,11 +48,15 @@ type ProxyServer struct {
|
||||
proxy *goproxy.ProxyHttpServer
|
||||
server *http.Server
|
||||
originalProxy string
|
||||
verbose bool
|
||||
quiet bool
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Parse command line flags
|
||||
var testConnectivity = flag.Bool("test", false, "Test proxy connectivity")
|
||||
var verbose = flag.Bool("v", false, "Enable verbose mode - dump all traffic instead of only modified requests/responses")
|
||||
var debugMode = flag.Bool("d", false, "Debug mode - dump modified requests/responses")
|
||||
flag.Parse()
|
||||
|
||||
// Set console to UTF-8 on Windows to prevent garbled text
|
||||
@ -60,6 +64,14 @@ func main() {
|
||||
setConsoleUTF8()
|
||||
}
|
||||
|
||||
fmt.Println("Reading dictionary...")
|
||||
dict, err := NewChineseDict("dict.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to load dictionary: %v", err)
|
||||
}
|
||||
Dict = dict
|
||||
fmt.Printf("Dictionary loaded successfully, size=%d\n", Dict.GetCharacterCount())
|
||||
|
||||
fmt.Println("Starting MITM proxy server...")
|
||||
|
||||
// Load configuration
|
||||
@ -80,7 +92,7 @@ func main() {
|
||||
}
|
||||
|
||||
// Create proxy server
|
||||
proxy, err := NewProxyServer(config)
|
||||
proxy, err := NewProxyServer(config, *verbose, *debugMode)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create proxy server: %v", err)
|
||||
}
|
||||
@ -151,7 +163,7 @@ func loadConfig(filename string) (*Config, error) {
|
||||
return parseConfig(filename)
|
||||
}
|
||||
|
||||
func NewProxyServer(config *Config) (*ProxyServer, error) {
|
||||
func NewProxyServer(config *Config, verbose bool, debugMode bool) (*ProxyServer, error) {
|
||||
// Load hardcoded P12 certificate for MITM
|
||||
tlsConfig, err := loadHardcodedCertificate()
|
||||
if err != nil {
|
||||
@ -160,12 +172,14 @@ func NewProxyServer(config *Config) (*ProxyServer, error) {
|
||||
|
||||
// Create goproxy instance
|
||||
goProxy := goproxy.NewProxyHttpServer()
|
||||
goProxy.Verbose = true
|
||||
goProxy.Verbose = verbose
|
||||
|
||||
ps := &ProxyServer{
|
||||
config: config,
|
||||
tlsConfig: tlsConfig,
|
||||
proxy: goProxy,
|
||||
verbose: verbose,
|
||||
quiet: !debugMode,
|
||||
}
|
||||
|
||||
// Configure MITM for HTTPS traffic
|
||||
@ -209,6 +223,10 @@ func (p *ProxyServer) setupHandlers() {
|
||||
|
||||
// Log all HTTP requests and capture request body
|
||||
p.proxy.OnRequest().DoFunc(func(r *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) {
|
||||
if !p.isDomainOfInterest(r.Host) && p.quiet {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Read request body once and recreate it for both dumping and forwarding
|
||||
if r.Body != nil {
|
||||
reqBody, err := io.ReadAll(r.Body)
|
||||
@ -225,6 +243,9 @@ func (p *ProxyServer) setupHandlers() {
|
||||
if err != nil && err.Error() != "not an asr request" {
|
||||
log.Printf("Failed to obfuscate request body: %v", err)
|
||||
}
|
||||
if p.quiet && err == nil {
|
||||
log.Println("[INFO] ASR Request Body Modified")
|
||||
}
|
||||
}
|
||||
|
||||
if newReqBody != nil {
|
||||
@ -250,8 +271,13 @@ func (p *ProxyServer) setupHandlers() {
|
||||
|
||||
// Log all HTTP responses and dump traffic
|
||||
p.proxy.OnResponse().DoFunc(func(r *http.Response, ctx *goproxy.ProxyCtx) *http.Response {
|
||||
if p.quiet {
|
||||
return r
|
||||
}
|
||||
|
||||
timestamp := time.Now().Format("20060102T15:04:05.000000")
|
||||
if r != nil {
|
||||
if p.verbose || p.isDomainOfInterest(ctx.Req.Host) {
|
||||
fmt.Printf(
|
||||
"[%s][INFO][Interest=%v] HTTP Response: %s %s\n",
|
||||
timestamp,
|
||||
@ -259,6 +285,7 @@ func (p *ProxyServer) setupHandlers() {
|
||||
r.Status,
|
||||
ctx.Req.URL.String(),
|
||||
)
|
||||
}
|
||||
|
||||
// Get request body from context (if available)
|
||||
var reqBody []byte
|
||||
@ -284,12 +311,18 @@ func (p *ProxyServer) setupHandlers() {
|
||||
r.ContentLength = int64(len(respBody))
|
||||
|
||||
// Dump traffic to file with both request and response bodies
|
||||
// Only dump if verbose mode is enabled OR if the request was modified
|
||||
if p.verbose || modifiedBody != nil {
|
||||
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, respBody)
|
||||
}
|
||||
} else {
|
||||
// No response body, but may have request body
|
||||
// Only dump if verbose mode is enabled OR if the request was modified
|
||||
if p.verbose || modifiedBody != nil {
|
||||
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
return r
|
||||
})
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user