135 lines
3.7 KiB
Go
135 lines
3.7 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math/rand"
|
|
"net/http"
|
|
)
|
|
|
|
// WordInfo represents individual word information with timing and confidence
|
|
type WordInfo struct {
|
|
Confidence float64 `json:"confidence"`
|
|
EndTime int64 `json:"end_time"`
|
|
StartTime int64 `json:"start_time"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
// Result represents the speech recognition result
|
|
type Result struct {
|
|
Language string `json:"language"`
|
|
LanguageDetails string `json:"language_details"`
|
|
Volume string `json:"volume"`
|
|
Source int `json:"source"`
|
|
AudioStreamOffset int64 `json:"audio_stream_offset"`
|
|
AudioStreamEndTime int64 `json:"audio_stream_end_time"`
|
|
BoardcastOffset int64 `json:"boardcast_offset"`
|
|
BoardcastEndTime int64 `json:"boardcast_end_time"`
|
|
WordInfo []WordInfo `json:"word_info"`
|
|
}
|
|
|
|
// SpeechRecognitionResponse represents the complete speech recognition response
|
|
type SpeechRecognitionResponse struct {
|
|
RoomID string `json:"room_id"`
|
|
UserID string `json:"user_id"`
|
|
StreamID string `json:"stream_id"`
|
|
Result Result `json:"result"`
|
|
ModelName string `json:"model_name"`
|
|
}
|
|
|
|
// ToJSON converts the struct to JSON byte array
|
|
func (s *SpeechRecognitionResponse) ToJSON() ([]byte, error) {
|
|
var buf bytes.Buffer
|
|
encoder := json.NewEncoder(&buf)
|
|
encoder.SetEscapeHTML(false)
|
|
err := encoder.Encode(s)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to marshal to JSON: %w", err)
|
|
}
|
|
data := buf.Bytes()
|
|
// Remove the trailing newline that Encode adds
|
|
if len(data) > 0 && data[len(data)-1] == '\n' {
|
|
data = data[:len(data)-1]
|
|
}
|
|
return data, nil
|
|
}
|
|
|
|
// FromJSON parses JSON string into the struct
|
|
func (s *SpeechRecognitionResponse) FromJSON(jsonStr string) error {
|
|
err := json.Unmarshal([]byte(jsonStr), s)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to unmarshal JSON: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *SpeechRecognitionResponse) asrReplaceEmpty() {
|
|
s.Result.WordInfo = []WordInfo{}
|
|
}
|
|
|
|
func (s *SpeechRecognitionResponse) asrReplaceRandom() {
|
|
for i, word := range s.Result.WordInfo {
|
|
if word.Text == "" || word.Text == " " {
|
|
continue
|
|
}
|
|
// Check if text contains Chinese characters
|
|
containsChinese := false
|
|
for _, r := range word.Text {
|
|
if r >= 0x4e00 && r <= 0x9fff {
|
|
containsChinese = true
|
|
break
|
|
}
|
|
}
|
|
if containsChinese {
|
|
// Replace with random Chinese characters
|
|
runes := []rune(word.Text)
|
|
for i := range runes {
|
|
if runes[i] >= 0x4e00 && runes[i] <= 0x9fff {
|
|
// Generate random Chinese character in common range
|
|
runes[i] = rune(0x4e00 + rand.Intn(0x9fff-0x4e00+1))
|
|
}
|
|
}
|
|
s.Result.WordInfo[i].Text = string(runes)
|
|
} else {
|
|
// Replace with random English characters
|
|
runes := []rune(word.Text)
|
|
for i := range runes {
|
|
if (runes[i] >= 'a' && runes[i] <= 'z') || (runes[i] >= 'A' && runes[i] <= 'Z') {
|
|
if runes[i] >= 'a' && runes[i] <= 'z' {
|
|
runes[i] = rune('a' + rand.Intn(26))
|
|
} else {
|
|
runes[i] = rune('A' + rand.Intn(26))
|
|
}
|
|
}
|
|
}
|
|
s.Result.WordInfo[i].Text = string(runes)
|
|
}
|
|
}
|
|
}
|
|
|
|
func asrResultObfuscate(r *http.Request, body []byte) ([]byte, error) {
|
|
fmt.Printf("asrResultObfuscate: %s\n", r.URL.Path)
|
|
if r.URL.Path != "/webcast/review/client_ai/upload_asr_result/" {
|
|
return nil, fmt.Errorf("not an asr request")
|
|
}
|
|
obj := SpeechRecognitionResponse{}
|
|
err := obj.FromJSON(string(body))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
|
|
}
|
|
if len(obj.Result.WordInfo) == 0 {
|
|
return nil, fmt.Errorf("no word info")
|
|
}
|
|
if rand.Intn(100) < 50 {
|
|
obj.asrReplaceEmpty()
|
|
} else {
|
|
obj.asrReplaceRandom()
|
|
}
|
|
jsonData, err := obj.ToJSON()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to marshal to JSON: %w", err)
|
|
}
|
|
return jsonData, nil
|
|
}
|