新增中文词典功能,创建ChineseDict结构体以加载和生成随机中文字符,更新ASR功能以使用词典进行更真实的字符替换。同时,更新.gitignore以排除.zip文件。
This commit is contained in:
parent
0dd2c38226
commit
a26a612a5b
1
.gitignore
vendored
1
.gitignore
vendored
@ -23,3 +23,4 @@ go.work
|
|||||||
*_dumps/
|
*_dumps/
|
||||||
out/
|
out/
|
||||||
log
|
log
|
||||||
|
*.zip
|
||||||
71
DICT_README.md
Normal file
71
DICT_README.md
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# Chinese Dictionary Implementation
|
||||||
|
|
||||||
|
This implementation provides a `ChineseDict` struct that loads Chinese characters from `dict.txt` and provides functionality to generate random Chinese characters.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Load Chinese characters**: Reads `dict.txt` and extracts all Chinese characters (Unicode range 0x4E00-0x9FFF)
|
||||||
|
- **Random character generation**: Get single random Chinese characters
|
||||||
|
- **Random string generation**: Generate strings of random Chinese characters with specified length
|
||||||
|
- **Character counting**: Get the total number of unique Chinese characters loaded
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Create a new dictionary instance
|
||||||
|
dict, err := NewChineseDict("dict.txt")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error loading dictionary: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get a single random Chinese character
|
||||||
|
randomChar := dict.GetRandomCharacter()
|
||||||
|
fmt.Printf("Random character: %c\n", randomChar)
|
||||||
|
|
||||||
|
// Get a random string of 5 Chinese characters
|
||||||
|
randomString := dict.GetRandomString(5)
|
||||||
|
fmt.Printf("Random string: %s\n", randomString)
|
||||||
|
|
||||||
|
// Get the total number of characters in dictionary
|
||||||
|
count := dict.GetCharacterCount()
|
||||||
|
fmt.Printf("Total characters: %d\n", count)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Demo
|
||||||
|
|
||||||
|
Run the demo to see the functionality in action:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go run . -dict
|
||||||
|
```
|
||||||
|
|
||||||
|
This will display:
|
||||||
|
- Total number of Chinese characters loaded
|
||||||
|
- 10 random single characters
|
||||||
|
- Random strings of different lengths (3, 5, 8, 10 characters)
|
||||||
|
|
||||||
|
## Integration with ASR
|
||||||
|
|
||||||
|
The dictionary is automatically integrated with the ASR (Automatic Speech Recognition) functionality. When processing speech recognition results, the system will:
|
||||||
|
|
||||||
|
1. Try to load the dictionary from `dict.txt`
|
||||||
|
2. Use dictionary characters for more realistic Chinese character replacement
|
||||||
|
3. Fall back to random generation if dictionary loading fails
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
- `dict.go` - Main dictionary implementation
|
||||||
|
- `dict.txt` - Source file containing Chinese characters
|
||||||
|
- `asr.go` - ASR functionality with dictionary integration
|
||||||
|
- `main.go` - Main application with demo functionality
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Go 1.16 or later (uses `os.ReadFile`)
|
||||||
|
- `dict.txt` file in the same directory as the executable
|
||||||
|
|
||||||
|
## Character Statistics
|
||||||
|
|
||||||
|
The current `dict.txt` contains **479,939** Chinese characters, providing a rich source for realistic random character generation.
|
||||||
41
asr.go
41
asr.go
@ -4,10 +4,14 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Try to load the dictionary for better Chinese character replacement
|
||||||
|
var Dict *ChineseDict
|
||||||
|
|
||||||
// WordInfo represents individual word information with timing and confidence
|
// WordInfo represents individual word information with timing and confidence
|
||||||
type WordInfo struct {
|
type WordInfo struct {
|
||||||
Confidence float64 `json:"confidence"`
|
Confidence float64 `json:"confidence"`
|
||||||
@ -64,10 +68,6 @@ func (s *SpeechRecognitionResponse) FromJSON(jsonStr string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SpeechRecognitionResponse) asrReplaceEmpty() {
|
|
||||||
s.Result.WordInfo = []WordInfo{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
||||||
for i, word := range s.Result.WordInfo {
|
for i, word := range s.Result.WordInfo {
|
||||||
if word.Text == "" || word.Text == " " {
|
if word.Text == "" || word.Text == " " {
|
||||||
@ -82,24 +82,29 @@ func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if containsChinese {
|
if containsChinese {
|
||||||
// Replace with random Chinese characters
|
// Replace with random Chinese characters from dictionary if available
|
||||||
runes := []rune(word.Text)
|
runes := []rune(word.Text)
|
||||||
for i := range runes {
|
for j := range runes {
|
||||||
if runes[i] >= 0x4e00 && runes[i] <= 0x9fff {
|
if runes[j] >= 0x4e00 && runes[j] <= 0x9fff {
|
||||||
// Generate random Chinese character in common range
|
if Dict != nil {
|
||||||
runes[i] = rune(0x4e00 + rand.Intn(0x9fff-0x4e00+1))
|
// Use dictionary for more realistic Chinese characters
|
||||||
|
runes[j] = Dict.GetRandomCharacter()
|
||||||
|
} else {
|
||||||
|
// Fallback to random generation
|
||||||
|
log.Fatalln("CRITICAL ERROR: Dictionary not loaded")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.Result.WordInfo[i].Text = string(runes)
|
s.Result.WordInfo[i].Text = string(runes)
|
||||||
} else {
|
} else {
|
||||||
// Replace with random English characters
|
// Replace with random English characters
|
||||||
runes := []rune(word.Text)
|
runes := []rune(word.Text)
|
||||||
for i := range runes {
|
for j := range runes {
|
||||||
if (runes[i] >= 'a' && runes[i] <= 'z') || (runes[i] >= 'A' && runes[i] <= 'Z') {
|
if (runes[j] >= 'a' && runes[j] <= 'z') || (runes[j] >= 'A' && runes[j] <= 'Z') {
|
||||||
if runes[i] >= 'a' && runes[i] <= 'z' {
|
if runes[j] >= 'a' && runes[j] <= 'z' {
|
||||||
runes[i] = rune('a' + rand.Intn(26))
|
runes[j] = rune('a' + rand.Intn(26))
|
||||||
} else {
|
} else {
|
||||||
runes[i] = rune('A' + rand.Intn(26))
|
runes[j] = rune('A' + rand.Intn(26))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -109,7 +114,6 @@ func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func asrResultObfuscate(r *http.Request, body []byte) ([]byte, error) {
|
func asrResultObfuscate(r *http.Request, body []byte) ([]byte, error) {
|
||||||
fmt.Printf("asrResultObfuscate: %s\n", r.URL.Path)
|
|
||||||
if r.URL.Path != "/webcast/review/client_ai/upload_asr_result/" {
|
if r.URL.Path != "/webcast/review/client_ai/upload_asr_result/" {
|
||||||
return nil, fmt.Errorf("not an asr request")
|
return nil, fmt.Errorf("not an asr request")
|
||||||
}
|
}
|
||||||
@ -121,11 +125,8 @@ func asrResultObfuscate(r *http.Request, body []byte) ([]byte, error) {
|
|||||||
if len(obj.Result.WordInfo) == 0 {
|
if len(obj.Result.WordInfo) == 0 {
|
||||||
return nil, fmt.Errorf("no word info")
|
return nil, fmt.Errorf("no word info")
|
||||||
}
|
}
|
||||||
if rand.Intn(100) < 50 {
|
|
||||||
obj.asrReplaceEmpty()
|
obj.asrReplaceRandom()
|
||||||
} else {
|
|
||||||
obj.asrReplaceRandom()
|
|
||||||
}
|
|
||||||
jsonData, err := obj.ToJSON()
|
jsonData, err := obj.ToJSON()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to marshal to JSON: %w", err)
|
return nil, fmt.Errorf("failed to marshal to JSON: %w", err)
|
||||||
|
|||||||
60
dict.go
Normal file
60
dict.go
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math/rand"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ChineseDict represents a dictionary containing Chinese characters
|
||||||
|
type ChineseDict struct {
|
||||||
|
characters []rune
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewChineseDict creates a new ChineseDict instance and loads content from dict.txt
|
||||||
|
func NewChineseDict(filePath string) (*ChineseDict, error) {
|
||||||
|
// Read the file content
|
||||||
|
content, err := os.ReadFile(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert bytes to string and then to runes to properly handle Chinese characters
|
||||||
|
text := string(content)
|
||||||
|
runes := []rune(text)
|
||||||
|
|
||||||
|
return &ChineseDict{
|
||||||
|
characters: runes,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRandomCharacter returns a random Chinese character from the dictionary
|
||||||
|
func (cd *ChineseDict) GetRandomCharacter() rune {
|
||||||
|
if len(cd.characters) == 0 {
|
||||||
|
return 0 // Return null rune if no characters available
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get random index
|
||||||
|
randomIndex := rand.Intn(len(cd.characters))
|
||||||
|
|
||||||
|
return cd.characters[randomIndex]
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRandomString returns a string of random Chinese characters with specified length
|
||||||
|
func (cd *ChineseDict) GetRandomString(length int) string {
|
||||||
|
if len(cd.characters) == 0 || length <= 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make([]rune, length)
|
||||||
|
for i := 0; i < length; i++ {
|
||||||
|
randomIndex := rand.Intn(len(cd.characters))
|
||||||
|
result[i] = cd.characters[randomIndex]
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCharacterCount returns the total number of Chinese characters in the dictionary
|
||||||
|
func (cd *ChineseDict) GetCharacterCount() int {
|
||||||
|
return len(cd.characters)
|
||||||
|
}
|
||||||
57
main.go
57
main.go
@ -48,11 +48,15 @@ type ProxyServer struct {
|
|||||||
proxy *goproxy.ProxyHttpServer
|
proxy *goproxy.ProxyHttpServer
|
||||||
server *http.Server
|
server *http.Server
|
||||||
originalProxy string
|
originalProxy string
|
||||||
|
verbose bool
|
||||||
|
quiet bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Parse command line flags
|
// Parse command line flags
|
||||||
var testConnectivity = flag.Bool("test", false, "Test proxy connectivity")
|
var testConnectivity = flag.Bool("test", false, "Test proxy connectivity")
|
||||||
|
var verbose = flag.Bool("v", false, "Enable verbose mode - dump all traffic instead of only modified requests/responses")
|
||||||
|
var debugMode = flag.Bool("d", false, "Debug mode - dump modified requests/responses")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
// Set console to UTF-8 on Windows to prevent garbled text
|
// Set console to UTF-8 on Windows to prevent garbled text
|
||||||
@ -60,6 +64,14 @@ func main() {
|
|||||||
setConsoleUTF8()
|
setConsoleUTF8()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Println("Reading dictionary...")
|
||||||
|
dict, err := NewChineseDict("dict.txt")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Failed to load dictionary: %v", err)
|
||||||
|
}
|
||||||
|
Dict = dict
|
||||||
|
fmt.Printf("Dictionary loaded successfully, size=%d\n", Dict.GetCharacterCount())
|
||||||
|
|
||||||
fmt.Println("Starting MITM proxy server...")
|
fmt.Println("Starting MITM proxy server...")
|
||||||
|
|
||||||
// Load configuration
|
// Load configuration
|
||||||
@ -80,7 +92,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create proxy server
|
// Create proxy server
|
||||||
proxy, err := NewProxyServer(config)
|
proxy, err := NewProxyServer(config, *verbose, *debugMode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Failed to create proxy server: %v", err)
|
log.Fatalf("Failed to create proxy server: %v", err)
|
||||||
}
|
}
|
||||||
@ -151,7 +163,7 @@ func loadConfig(filename string) (*Config, error) {
|
|||||||
return parseConfig(filename)
|
return parseConfig(filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewProxyServer(config *Config) (*ProxyServer, error) {
|
func NewProxyServer(config *Config, verbose bool, debugMode bool) (*ProxyServer, error) {
|
||||||
// Load hardcoded P12 certificate for MITM
|
// Load hardcoded P12 certificate for MITM
|
||||||
tlsConfig, err := loadHardcodedCertificate()
|
tlsConfig, err := loadHardcodedCertificate()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -160,12 +172,14 @@ func NewProxyServer(config *Config) (*ProxyServer, error) {
|
|||||||
|
|
||||||
// Create goproxy instance
|
// Create goproxy instance
|
||||||
goProxy := goproxy.NewProxyHttpServer()
|
goProxy := goproxy.NewProxyHttpServer()
|
||||||
goProxy.Verbose = true
|
goProxy.Verbose = verbose
|
||||||
|
|
||||||
ps := &ProxyServer{
|
ps := &ProxyServer{
|
||||||
config: config,
|
config: config,
|
||||||
tlsConfig: tlsConfig,
|
tlsConfig: tlsConfig,
|
||||||
proxy: goProxy,
|
proxy: goProxy,
|
||||||
|
verbose: verbose,
|
||||||
|
quiet: !debugMode,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure MITM for HTTPS traffic
|
// Configure MITM for HTTPS traffic
|
||||||
@ -209,6 +223,10 @@ func (p *ProxyServer) setupHandlers() {
|
|||||||
|
|
||||||
// Log all HTTP requests and capture request body
|
// Log all HTTP requests and capture request body
|
||||||
p.proxy.OnRequest().DoFunc(func(r *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) {
|
p.proxy.OnRequest().DoFunc(func(r *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) {
|
||||||
|
if !p.isDomainOfInterest(r.Host) && p.quiet {
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Read request body once and recreate it for both dumping and forwarding
|
// Read request body once and recreate it for both dumping and forwarding
|
||||||
if r.Body != nil {
|
if r.Body != nil {
|
||||||
reqBody, err := io.ReadAll(r.Body)
|
reqBody, err := io.ReadAll(r.Body)
|
||||||
@ -225,6 +243,9 @@ func (p *ProxyServer) setupHandlers() {
|
|||||||
if err != nil && err.Error() != "not an asr request" {
|
if err != nil && err.Error() != "not an asr request" {
|
||||||
log.Printf("Failed to obfuscate request body: %v", err)
|
log.Printf("Failed to obfuscate request body: %v", err)
|
||||||
}
|
}
|
||||||
|
if p.quiet && err == nil {
|
||||||
|
log.Println("[INFO] ASR Request Body Modified")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if newReqBody != nil {
|
if newReqBody != nil {
|
||||||
@ -250,15 +271,21 @@ func (p *ProxyServer) setupHandlers() {
|
|||||||
|
|
||||||
// Log all HTTP responses and dump traffic
|
// Log all HTTP responses and dump traffic
|
||||||
p.proxy.OnResponse().DoFunc(func(r *http.Response, ctx *goproxy.ProxyCtx) *http.Response {
|
p.proxy.OnResponse().DoFunc(func(r *http.Response, ctx *goproxy.ProxyCtx) *http.Response {
|
||||||
|
if p.quiet {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
timestamp := time.Now().Format("20060102T15:04:05.000000")
|
timestamp := time.Now().Format("20060102T15:04:05.000000")
|
||||||
if r != nil {
|
if r != nil {
|
||||||
fmt.Printf(
|
if p.verbose || p.isDomainOfInterest(ctx.Req.Host) {
|
||||||
"[%s][INFO][Interest=%v] HTTP Response: %s %s\n",
|
fmt.Printf(
|
||||||
timestamp,
|
"[%s][INFO][Interest=%v] HTTP Response: %s %s\n",
|
||||||
p.isDomainOfInterest(ctx.Req.Host),
|
timestamp,
|
||||||
r.Status,
|
p.isDomainOfInterest(ctx.Req.Host),
|
||||||
ctx.Req.URL.String(),
|
r.Status,
|
||||||
)
|
ctx.Req.URL.String(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// Get request body from context (if available)
|
// Get request body from context (if available)
|
||||||
var reqBody []byte
|
var reqBody []byte
|
||||||
@ -284,10 +311,16 @@ func (p *ProxyServer) setupHandlers() {
|
|||||||
r.ContentLength = int64(len(respBody))
|
r.ContentLength = int64(len(respBody))
|
||||||
|
|
||||||
// Dump traffic to file with both request and response bodies
|
// Dump traffic to file with both request and response bodies
|
||||||
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, respBody)
|
// Only dump if verbose mode is enabled OR if the request was modified
|
||||||
|
if p.verbose || modifiedBody != nil {
|
||||||
|
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, respBody)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// No response body, but may have request body
|
// No response body, but may have request body
|
||||||
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, nil)
|
// Only dump if verbose mode is enabled OR if the request was modified
|
||||||
|
if p.verbose || modifiedBody != nil {
|
||||||
|
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, nil)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return r
|
return r
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user