新增ASR(自动语音识别)相关功能,包括实现asrReplaceRandom方法以随机替换WordInfo中的文本,添加相应的单元测试以验证功能的正确性和性能。同时,更新主程序以支持请求体的修改和流量转储功能。

This commit is contained in:
wjsjwr 2025-08-21 23:46:17 +08:00
parent a561b291e9
commit 0dd2c38226
3 changed files with 421 additions and 54 deletions

134
asr.go Normal file
View File

@ -0,0 +1,134 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"math/rand"
"net/http"
)
// WordInfo represents individual word information with timing and confidence
type WordInfo struct {
Confidence float64 `json:"confidence"`
EndTime int64 `json:"end_time"`
StartTime int64 `json:"start_time"`
Text string `json:"text"`
}
// Result represents the speech recognition result
type Result struct {
Language string `json:"language"`
LanguageDetails string `json:"language_details"`
Volume string `json:"volume"`
Source int `json:"source"`
AudioStreamOffset int64 `json:"audio_stream_offset"`
AudioStreamEndTime int64 `json:"audio_stream_end_time"`
BoardcastOffset int64 `json:"boardcast_offset"`
BoardcastEndTime int64 `json:"boardcast_end_time"`
WordInfo []WordInfo `json:"word_info"`
}
// SpeechRecognitionResponse represents the complete speech recognition response
type SpeechRecognitionResponse struct {
RoomID string `json:"room_id"`
UserID string `json:"user_id"`
StreamID string `json:"stream_id"`
Result Result `json:"result"`
ModelName string `json:"model_name"`
}
// ToJSON converts the struct to JSON byte array
func (s *SpeechRecognitionResponse) ToJSON() ([]byte, error) {
var buf bytes.Buffer
encoder := json.NewEncoder(&buf)
encoder.SetEscapeHTML(false)
err := encoder.Encode(s)
if err != nil {
return nil, fmt.Errorf("failed to marshal to JSON: %w", err)
}
data := buf.Bytes()
// Remove the trailing newline that Encode adds
if len(data) > 0 && data[len(data)-1] == '\n' {
data = data[:len(data)-1]
}
return data, nil
}
// FromJSON parses JSON string into the struct
func (s *SpeechRecognitionResponse) FromJSON(jsonStr string) error {
err := json.Unmarshal([]byte(jsonStr), s)
if err != nil {
return fmt.Errorf("failed to unmarshal JSON: %w", err)
}
return nil
}
func (s *SpeechRecognitionResponse) asrReplaceEmpty() {
s.Result.WordInfo = []WordInfo{}
}
func (s *SpeechRecognitionResponse) asrReplaceRandom() {
for i, word := range s.Result.WordInfo {
if word.Text == "" || word.Text == " " {
continue
}
// Check if text contains Chinese characters
containsChinese := false
for _, r := range word.Text {
if r >= 0x4e00 && r <= 0x9fff {
containsChinese = true
break
}
}
if containsChinese {
// Replace with random Chinese characters
runes := []rune(word.Text)
for i := range runes {
if runes[i] >= 0x4e00 && runes[i] <= 0x9fff {
// Generate random Chinese character in common range
runes[i] = rune(0x4e00 + rand.Intn(0x9fff-0x4e00+1))
}
}
s.Result.WordInfo[i].Text = string(runes)
} else {
// Replace with random English characters
runes := []rune(word.Text)
for i := range runes {
if (runes[i] >= 'a' && runes[i] <= 'z') || (runes[i] >= 'A' && runes[i] <= 'Z') {
if runes[i] >= 'a' && runes[i] <= 'z' {
runes[i] = rune('a' + rand.Intn(26))
} else {
runes[i] = rune('A' + rand.Intn(26))
}
}
}
s.Result.WordInfo[i].Text = string(runes)
}
}
}
func asrResultObfuscate(r *http.Request, body []byte) ([]byte, error) {
fmt.Printf("asrResultObfuscate: %s\n", r.URL.Path)
if r.URL.Path != "/webcast/review/client_ai/upload_asr_result/" {
return nil, fmt.Errorf("not an asr request")
}
obj := SpeechRecognitionResponse{}
err := obj.FromJSON(string(body))
if err != nil {
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
}
if len(obj.Result.WordInfo) == 0 {
return nil, fmt.Errorf("no word info")
}
if rand.Intn(100) < 50 {
obj.asrReplaceEmpty()
} else {
obj.asrReplaceRandom()
}
jsonData, err := obj.ToJSON()
if err != nil {
return nil, fmt.Errorf("failed to marshal to JSON: %w", err)
}
return jsonData, nil
}

242
asr_test.go Normal file
View File

@ -0,0 +1,242 @@
package main
import (
"testing"
)
// NOTE: These tests are written for the current implementation of asrReplaceRandom,
// which has a bug - it doesn't actually modify the WordInfo slice because it iterates
// over copies of the structs instead of references. The method should use:
// for i := range s.Result.WordInfo instead of for _, word := range s.Result.WordInfo
func TestSpeechRecognitionResponse_asrReplaceRandom(t *testing.T) {
t.Run("EmptyAndSpaceText", func(t *testing.T) {
response := &SpeechRecognitionResponse{
Result: Result{
WordInfo: []WordInfo{
{Text: "", Confidence: 0.9, StartTime: 100, EndTime: 200},
{Text: " ", Confidence: 0.8, StartTime: 200, EndTime: 300},
{Text: "hello", Confidence: 0.95, StartTime: 300, EndTime: 400},
},
},
}
// Store original values
originalEmpty := response.Result.WordInfo[0].Text
originalSpace := response.Result.WordInfo[1].Text
originalHello := response.Result.WordInfo[2].Text
response.asrReplaceRandom()
// NOTE: The current implementation has a bug - it doesn't actually modify the WordInfo slice
// because it iterates over copies, not references. All text should remain unchanged.
if response.Result.WordInfo[0].Text != originalEmpty {
t.Errorf("Expected empty text to remain unchanged, got %q", response.Result.WordInfo[0].Text)
}
if response.Result.WordInfo[1].Text != originalSpace {
t.Errorf("Expected space text to remain unchanged, got %q", response.Result.WordInfo[1].Text)
}
if response.Result.WordInfo[2].Text == originalHello {
t.Errorf("Expected text to be replaced, got %q", response.Result.WordInfo[2].Text)
}
})
t.Run("ChineseCharacterReplacement", func(t *testing.T) {
response := &SpeechRecognitionResponse{
Result: Result{
WordInfo: []WordInfo{
{Text: "你好", Confidence: 0.9, StartTime: 100, EndTime: 200},
{Text: "世界", Confidence: 0.8, StartTime: 200, EndTime: 300},
},
},
}
originalTexts := make([]string, len(response.Result.WordInfo))
for i, word := range response.Result.WordInfo {
originalTexts[i] = word.Text
}
response.asrReplaceRandom()
for i, word := range response.Result.WordInfo {
if word.Text == originalTexts[i] {
t.Errorf("Expected Chinese text to be replaced")
}
}
})
t.Run("EnglishCharacterReplacement", func(t *testing.T) {
response := &SpeechRecognitionResponse{
Result: Result{
WordInfo: []WordInfo{
{Text: "Hello", Confidence: 0.9, StartTime: 100, EndTime: 200},
{Text: "WORLD", Confidence: 0.8, StartTime: 200, EndTime: 300},
{Text: "test", Confidence: 0.7, StartTime: 300, EndTime: 400},
},
},
}
originalTexts := make([]string, len(response.Result.WordInfo))
for i, word := range response.Result.WordInfo {
originalTexts[i] = word.Text
}
response.asrReplaceRandom()
// Due to the implementation bug, English text should remain unchanged
for i, word := range response.Result.WordInfo {
if word.Text == originalTexts[i] {
t.Errorf("Expected English text to be replaced")
}
}
})
t.Run("MixedContent", func(t *testing.T) {
response := &SpeechRecognitionResponse{
Result: Result{
WordInfo: []WordInfo{
{Text: "Hello123你好", Confidence: 0.9, StartTime: 100, EndTime: 200},
{Text: "Test!@#测试", Confidence: 0.8, StartTime: 200, EndTime: 300},
},
},
}
originalTexts := make([]string, len(response.Result.WordInfo))
for i, word := range response.Result.WordInfo {
originalTexts[i] = word.Text
}
response.asrReplaceRandom()
// Due to the implementation bug, mixed content should remain unchanged
for i, word := range response.Result.WordInfo {
if word.Text == originalTexts[i] {
t.Errorf("Expected mixed content to be replaced")
}
}
})
t.Run("EmptyWordInfoSlice", func(t *testing.T) {
response := &SpeechRecognitionResponse{
Result: Result{
WordInfo: []WordInfo{},
},
}
// Should not panic
response.asrReplaceRandom()
// WordInfo should remain empty
if len(response.Result.WordInfo) != 0 {
t.Errorf("Expected WordInfo to remain empty, got length %d", len(response.Result.WordInfo))
}
})
t.Run("OnlySpecialCharacters", func(t *testing.T) {
response := &SpeechRecognitionResponse{
Result: Result{
WordInfo: []WordInfo{
{Text: "123!@#$%", Confidence: 0.9, StartTime: 100, EndTime: 200},
{Text: ".,;:()[]", Confidence: 0.8, StartTime: 200, EndTime: 300},
},
},
}
originalTexts := make([]string, len(response.Result.WordInfo))
for i, word := range response.Result.WordInfo {
originalTexts[i] = word.Text
}
response.asrReplaceRandom()
// Special characters should remain unchanged
for i, word := range response.Result.WordInfo {
if word.Text != originalTexts[i] {
t.Errorf("Expected special characters to remain unchanged: %q -> %q", originalTexts[i], word.Text)
}
}
})
t.Run("PreservesNonTextFields", func(t *testing.T) {
response := &SpeechRecognitionResponse{
RoomID: "room123",
UserID: "user456",
StreamID: "stream789",
Result: Result{
Language: "zh-CN",
LanguageDetails: "Chinese Simplified",
Volume: "high",
Source: 1,
AudioStreamOffset: 1000,
AudioStreamEndTime: 2000,
BoardcastOffset: 1500,
BoardcastEndTime: 2500,
WordInfo: []WordInfo{
{Text: "test", Confidence: 0.9, StartTime: 100, EndTime: 200},
},
},
ModelName: "test-model",
}
// Store original non-text values
originalRoomID := response.RoomID
originalUserID := response.UserID
originalStreamID := response.StreamID
originalLanguage := response.Result.Language
originalConfidence := response.Result.WordInfo[0].Confidence
originalStartTime := response.Result.WordInfo[0].StartTime
originalEndTime := response.Result.WordInfo[0].EndTime
originalModelName := response.ModelName
response.asrReplaceRandom()
// All non-text fields should remain unchanged
if response.RoomID != originalRoomID {
t.Errorf("Expected RoomID to remain unchanged: %q -> %q", originalRoomID, response.RoomID)
}
if response.UserID != originalUserID {
t.Errorf("Expected UserID to remain unchanged: %q -> %q", originalUserID, response.UserID)
}
if response.StreamID != originalStreamID {
t.Errorf("Expected StreamID to remain unchanged: %q -> %q", originalStreamID, response.StreamID)
}
if response.Result.Language != originalLanguage {
t.Errorf("Expected Language to remain unchanged: %q -> %q", originalLanguage, response.Result.Language)
}
if response.Result.WordInfo[0].Confidence != originalConfidence {
t.Errorf("Expected Confidence to remain unchanged: %f -> %f", originalConfidence, response.Result.WordInfo[0].Confidence)
}
if response.Result.WordInfo[0].StartTime != originalStartTime {
t.Errorf("Expected StartTime to remain unchanged: %d -> %d", originalStartTime, response.Result.WordInfo[0].StartTime)
}
if response.Result.WordInfo[0].EndTime != originalEndTime {
t.Errorf("Expected EndTime to remain unchanged: %d -> %d", originalEndTime, response.Result.WordInfo[0].EndTime)
}
if response.ModelName != originalModelName {
t.Errorf("Expected ModelName to remain unchanged: %q -> %q", originalModelName, response.ModelName)
}
})
}
// Benchmark test to measure performance
func BenchmarkSpeechRecognitionResponse_asrReplaceRandom(b *testing.B) {
response := &SpeechRecognitionResponse{
Result: Result{
WordInfo: []WordInfo{
{Text: "Hello World", Confidence: 0.9, StartTime: 100, EndTime: 200},
{Text: "你好世界", Confidence: 0.8, StartTime: 200, EndTime: 300},
{Text: "Test123!@#", Confidence: 0.7, StartTime: 300, EndTime: 400},
},
},
}
for b.Loop() {
// Create a copy for each iteration to avoid modifying the same data
testResponse := *response
testResponse.Result.WordInfo = make([]WordInfo, len(response.Result.WordInfo))
copy(testResponse.Result.WordInfo, response.Result.WordInfo)
testResponse.asrReplaceRandom()
}
}

99
main.go
View File

@ -37,6 +37,11 @@ type Config struct {
} `toml:"dump"` } `toml:"dump"`
} }
type UserData struct {
RequestBody []byte
ModifiedBody []byte
}
type ProxyServer struct { type ProxyServer struct {
config *Config config *Config
tlsConfig *tls.Config tlsConfig *tls.Config
@ -204,9 +209,6 @@ func (p *ProxyServer) setupHandlers() {
// Log all HTTP requests and capture request body // Log all HTTP requests and capture request body
p.proxy.OnRequest().DoFunc(func(r *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) { p.proxy.OnRequest().DoFunc(func(r *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) {
timestamp := time.Now().Format("20060102T15:04:05.000000")
fmt.Printf("[%s][INFO][Interest=%v] HTTP Request: %s %s from %s\n", timestamp, p.isDomainOfInterest(r.Host), r.Method, r.URL.String(), r.RemoteAddr)
// Read request body once and recreate it for both dumping and forwarding // Read request body once and recreate it for both dumping and forwarding
if r.Body != nil { if r.Body != nil {
reqBody, err := io.ReadAll(r.Body) reqBody, err := io.ReadAll(r.Body)
@ -216,13 +218,30 @@ func (p *ProxyServer) setupHandlers() {
} }
r.Body.Close() r.Body.Close()
// Recreate the request body so it can be forwarded to the server var newReqBody []byte = nil
r.Body = io.NopCloser(bytes.NewReader(reqBody))
r.ContentLength = int64(len(reqBody)) if p.isDomainOfInterest(r.Host) {
newReqBody, err = asrResultObfuscate(r, reqBody)
if err != nil && err.Error() != "not an asr request" {
log.Printf("Failed to obfuscate request body: %v", err)
}
}
if newReqBody != nil {
r.Body = io.NopCloser(bytes.NewReader(newReqBody))
r.ContentLength = int64(len(newReqBody))
} else {
// Recreate the request body so it can be forwarded to the server
r.Body = io.NopCloser(bytes.NewReader(reqBody))
r.ContentLength = int64(len(reqBody))
}
// Store request body in context for later use in response handler // Store request body in context for later use in response handler
if len(reqBody) > 0 { if len(reqBody) > 0 {
ctx.UserData = reqBody ctx.UserData = UserData{
RequestBody: reqBody,
ModifiedBody: newReqBody,
}
} }
} }
@ -233,13 +252,21 @@ func (p *ProxyServer) setupHandlers() {
p.proxy.OnResponse().DoFunc(func(r *http.Response, ctx *goproxy.ProxyCtx) *http.Response { p.proxy.OnResponse().DoFunc(func(r *http.Response, ctx *goproxy.ProxyCtx) *http.Response {
timestamp := time.Now().Format("20060102T15:04:05.000000") timestamp := time.Now().Format("20060102T15:04:05.000000")
if r != nil { if r != nil {
fmt.Printf("[%s][INFO][Interest=%v] HTTP Response: %s %s\n", timestamp, p.isDomainOfInterest(ctx.Req.Host), r.Status, ctx.Req.URL.String()) fmt.Printf(
"[%s][INFO][Interest=%v] HTTP Response: %s %s\n",
timestamp,
p.isDomainOfInterest(ctx.Req.Host),
r.Status,
ctx.Req.URL.String(),
)
// Get request body from context (if available) // Get request body from context (if available)
var reqBody []byte var reqBody []byte
var modifiedBody []byte
if ctx.UserData != nil { if ctx.UserData != nil {
if body, ok := ctx.UserData.([]byte); ok { if userData, ok := ctx.UserData.(UserData); ok {
reqBody = body reqBody = userData.RequestBody
modifiedBody = userData.ModifiedBody
} }
} }
@ -257,10 +284,10 @@ func (p *ProxyServer) setupHandlers() {
r.ContentLength = int64(len(respBody)) r.ContentLength = int64(len(respBody))
// Dump traffic to file with both request and response bodies // Dump traffic to file with both request and response bodies
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, respBody) p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, respBody)
} else { } else {
// No response body, but may have request body // No response body, but may have request body
p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, nil) p.dumpHTTPTrafficWithBodies(ctx.Req, r, reqBody, modifiedBody, nil)
} }
} }
return r return r
@ -296,49 +323,8 @@ func (p *ProxyServer) Shutdown() {
p.server.Shutdown(ctx) p.server.Shutdown(ctx)
} }
// dumpHTTPTraffic dumps HTTP request and response to file
func (p *ProxyServer) dumpHTTPTraffic(req *http.Request, resp *http.Response) {
file, err := os.Create(p.getFilePath(req))
if err != nil {
log.Printf("Failed to create dump file: %v", err)
return
}
defer file.Close()
// Write request information
fmt.Fprintf(file, "=== REQUEST ===\n")
fmt.Fprintf(file, "%s %s %s\n", req.Method, req.URL.String(), req.Proto)
fmt.Fprintf(file, "Host: %s\n", req.Host)
// Write all request headers
for name, values := range req.Header {
for _, value := range values {
fmt.Fprintf(file, "%s: %s\n", name, value)
}
}
fmt.Fprintf(file, "\n")
// Note: Request body handling is done in the request handler to avoid consuming it twice
// Write response information
if resp != nil {
fmt.Fprintf(file, "\n=== RESPONSE ===\n")
fmt.Fprintf(file, "%s %s\n", resp.Proto, resp.Status)
// Write all response headers
for name, values := range resp.Header {
for _, value := range values {
fmt.Fprintf(file, "%s: %s\n", name, value)
}
}
fmt.Fprintf(file, "\n")
// Note: Response body will be handled in the response handler to avoid consuming it
}
}
// dumpHTTPTrafficWithBodies dumps HTTP request and response with both bodies to file // dumpHTTPTrafficWithBodies dumps HTTP request and response with both bodies to file
func (p *ProxyServer) dumpHTTPTrafficWithBodies(req *http.Request, resp *http.Response, reqBody []byte, respBody []byte) { func (p *ProxyServer) dumpHTTPTrafficWithBodies(req *http.Request, resp *http.Response, reqBody []byte, modifiedBody []byte, respBody []byte) {
file, err := os.Create(p.getFilePath(req)) file, err := os.Create(p.getFilePath(req))
if err != nil { if err != nil {
log.Printf("Failed to create dump file: %v", err) log.Printf("Failed to create dump file: %v", err)
@ -364,6 +350,11 @@ func (p *ProxyServer) dumpHTTPTrafficWithBodies(req *http.Request, resp *http.Re
fmt.Fprintf(file, "%s\n", string(reqBody)) fmt.Fprintf(file, "%s\n", string(reqBody))
} }
if modifiedBody != nil {
fmt.Fprintf(file, "\n=== MODIFIED REQUEST BODY ===\n")
fmt.Fprintf(file, "%s\n", string(modifiedBody))
}
// Write response information // Write response information
if resp != nil { if resp != nil {
fmt.Fprintf(file, "\n=== RESPONSE ===\n") fmt.Fprintf(file, "\n=== RESPONSE ===\n")