700字范文 > Spring Boot+VUE集成科大讯飞语音合成解决方案

Spring Boot+VUE集成科大讯飞语音合成解决方案

时间：2023-08-14 19:51:57

在项目中需要用到将景点文字合成语音，通过语音方式向用户介绍景点信息，需要用到文字转语音的在线合成解决方案。通过对各种文字转语音合成方案与效果比较，觉得讯飞的效果最好，语音拟人效果、文章断词都非常不错，并且有一年10万次的免费使用量，因此对比后决定使用讯的在线语音合成解决方案。由于这信主题网上教程非常少，只找到了一个没提供完整源代码的参考案例，结合官网资料，搞定的完整解决方案和效果图如下:

一、注册讯飞开发者，获取访问Key

到讯飞开发者平台（/app/myapp），用实名注册好讯飞开发者用户，添加好自己拟开发的应用系统，申请好应用访问讯飞平台的访问参数。由于自己的服务器性能不行，是使用在线语音合成，采用的是WebAPI方式。此外，读飞还提供了一相demo程序，新手需要下载后反复研读。

讯飞接口4个参数

二、创建在线语音转换的Util

讯飞demo提供的是一个独立运行的方法，将且用PCM格式，将转换后文件保存在服务器上，需要对此程序进行改造，我主要进行了以下几个方面的改造：

1、将访问参数放到properties文件，以免放在程序代码段被泄漏，由于这部分代码定义的是静态变量，参数注入方式有变化

2、对前端传递过来的TEXT文本，转换后使用MP3格式输出给前端，完整代码如下

//静态参数注入，必须增加@Component注解@Componentpublic class XunFeiUtil {protected static final Logger log = LoggerFactory.getLogger(XunFeiUtil.class);//讯飞四个注入参数，保存在配置文件，便于复用和避免代码上传gitee后泄漏private static String hostUrl;@Value("${xunfei.hostUrl}")public void setHostUrl(String hostUrl) {XunFeiUtil.hostUrl = hostUrl;}private static String appid;@Value("${xunfei.appid}")public void setAppid(String appid) {XunFeiUtil.appid = appid;}private static String apiSecret;@Value("${xunfei.apiSecret}")public void setApiSecret(String apiSecret) {XunFeiUtil.apiSecret = apiSecret;}private static String apiKey;@Value("${xunfei.apiKey}")public void setApiKey(String apiKey) {XunFeiUtil.apiKey = apiKey;}public static final Gson json = new Gson();private static String base64 = "";private static volatile boolean lock = true;/*** 将文本转换为MP3格语音base64文件** @param text 要转换的文本（如JSON串）* @return 转换后的base64文件* @throws IOException 异常*/public static String convertText(String text) throws Exception {lock = true;base64 = "";// 构建鉴权urlString authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);OkHttpClient client = new OkHttpClient.Builder().build();//将url中的 schema http://和https://分别替换为ws:// 和 wss://String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");Request request = new Request.Builder().url(url).build();List<byte[]> list = Lists.newArrayList();WebSocket webSocket = client.newWebSocket(request, new WebSocketListener() {@Overridepublic void onOpen(WebSocket webSocket, Response response) {super.onOpen(webSocket, response);try {System.out.println(response.body().string());} catch (IOException e) {e.printStackTrace();}//发送数据JsonObject frame = new JsonObject();JsonObject business = new JsonObject();JsonObject common = new JsonObject();JsonObject data = new JsonObject();// 填充commoncommon.addProperty("app_id", appid);//填充business,AUE属性lame是MP3格式，raw是PCM格式business.addProperty("aue", "lame");business.addProperty("sfl", 1);business.addProperty("tte", "UTF8");//小语种必须使用UNICODE编码business.addProperty("vcn", "xiaoyan");//到控制台-我的应用-语音合成-添加试用或购买发音人，添加后即显示该发音人参数值，若试用未添加的发音人会报错11200business.addProperty("pitch", 50);business.addProperty("speed", 50);//填充datadata.addProperty("status", 2);//固定位2try {data.addProperty("text", Base64.getEncoder().encodeToString(text.getBytes("utf8")));//使用小语种须使用下面的代码，此处的unicode指的是 utf16小端的编码方式，即"UTF-16LE"”//data.addProperty("text", Base64.getEncoder().encodeToString(text.getBytes("UTF-16LE")));} catch (UnsupportedEncodingException e) {e.printStackTrace();}//填充frameframe.add("common", common);frame.add("business", business);frame.add("data", data);webSocket.send(frame.toString());}@Overridepublic void onMessage(WebSocket webSocket, String text) {super.onMessage(webSocket, text);//处理返回数据System.out.println("receive=>");ResponseData resp = null;try {resp = json.fromJson(text, ResponseData.class);} catch (Exception e) {e.printStackTrace();}if (resp != null) {if (resp.getCode() != 0) {System.out.println("error=>" + resp.getMessage() + " sid=" + resp.getSid());return;}if (resp.getData() != null) {String result = resp.getData().audio;byte[] audio = Base64.getDecoder().decode(result);list.add(audio);// 说明数据全部返回完毕，可以关闭连接，释放资源if (resp.getData().status == 2) {String is = base64Concat(list);base64 = is;lock = false;webSocket.close(1000, "");}}}}@Overridepublic void onMessage(WebSocket webSocket, ByteString bytes) {super.onMessage(webSocket, bytes);}@Overridepublic void onClosing(WebSocket webSocket, int code, String reason) {super.onClosing(webSocket, code, reason);System.out.println("socket closing");}@Overridepublic void onClosed(WebSocket webSocket, int code, String reason) {super.onClosed(webSocket, code, reason);System.out.println("socket closed");}@Overridepublic void onFailure(WebSocket webSocket, Throwable t, Response response) {super.onFailure(webSocket, t, response);System.out.println("connection failed" + response.message());}});while (lock) {}return base64;}/*** * base64拼接* */static String base64Concat(List<byte[]> list) {int length = 0;for (byte[] b : list) {length += b.length;}byte[] retByte = new byte[length];for (byte[] b : list) {retByte = ByteUtils.concat(retByte, b);}return cn.hutool.core.codec.Base64.encode(retByte);}/*** * 获取权限地址* ** * @param hostUrl* * @param apiKey* * @param apiSecret* * @return* */private static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {URL url = new URL(hostUrl);SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);format.setTimeZone(TimeZone.getTimeZone("GMT"));String date = format.format(new Date());StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").append("date: ").append(date).append("\n").append("GET ").append(url.getPath()).append(" HTTP/1.1");Charset charset = Charset.forName("UTF-8");Mac mac = Mac.getInstance("hmacsha256");SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");mac.init(spec);byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));String sha = Base64.getEncoder().encodeToString(hexDigits);String authorization = String.format("hmac username=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(charset))).addQueryParameter("date", date).addQueryParameter("host", url.getHost()).build();return httpUrl.toString();}public static class ResponseData {private int code;private String message;private String sid;private Data data;public int getCode() {return code;}public String getMessage() {return this.message;}public String getSid() {return sid;}public Data getData() {return data;}}private static class Data {//标志音频是否返回结束 status=1，表示后续还有音频返回，status=2表示所有的音频已经返回private int status;//返回的音频，base64 编码private String audio;// 合成进度private String ced;}}

三、构建前后端接口控制文件

构建建前后端访问的接口，由于前后端通过VUE参数方式传递过来，使用Post方式，我另有文章介绍为什么要用Post，这是VUE的约定，用Get有问题。

@RestController@RequestMapping("/data/xunfei")public class TextToAudioController {private static final Logger log = LoggerFactory.getLogger(TextToAudioController.class);@ApiOperation(value = "文字转语音", notes = "文字转语音")@PostMapping(value = "text_to_audio")public void textToAudio(String text, HttpServletRequest request , HttpServletResponse response) throws IOException {if (StringUtils.isNotBlank(text)) {//过滤图片,h5标签text = text.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll("<[^>]*>", "").replaceAll("[(/>)<]", "").trim();//调用后台服务接口获取音频base64String result = "";try {result = XunFeiUtil.convertText(text);} catch (Exception e) {log.error("【文字转语音接口调用异常】", e);}//音频数据byte[] audioByte = Base64.getDecoder().decode(result);//以@RestController@RequestMapping("/data/xunfei")public class TextToAudioController {private static final Logger log = LoggerFactory.getLogger(TextToAudioController.class);@ApiOperation(value = "文字转语音", notes = "文字转语音")@PostMapping(value = "text_to_audio")public void textToAudio(String text, HttpServletRequest request , HttpServletResponse response) throws IOException {if (StringUtils.isNotBlank(text)) {//过滤图片,h5标签text = text.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll("<[^>]*>", "").replaceAll("[(/>)<]", "").trim();//调用微服务接口获取音频base64String result = "";try {result = XunFeiUtil.convertText(text);} catch (Exception e) {log.error("【文字转语音接口调用异常】", e);}//音频数据byte[] audioByte = Base64.getDecoder().decode(result);response.setContentType("application/octet-stream;charset=UTF-8");OutputStream os = new BufferedOutputStream(response.getOutputStream());try {//音频流os.write(audioByte);} catch (IOException e) {e.printStackTrace();} finally {os.flush();os.close();}}}}response.setContentType("application/octet-stream;charset=UTF-8");OutputStream os = new BufferedOutputStream(response.getOutputStream());try {//音频流os.write(audioByte);} catch (IOException e) {e.printStackTrace();} finally {os.flush();os.close();}}}}

四、前端访问请求修改

在request.js拦截器中增加语音处理部分，让前端能正确接收语音语件

// 响应拦截器service.interceptors.response.use(res => {//语音处理const headers = res.headersif (headers['content-type'] === 'application/octet-stream;charset=UTF-8') {return res.data}}

五、构建前端访问接口

// 讯飞语音获取/*** 文字转语音接口*/export function textToAudio(data) {return request({url: '/data/xunfei/text_to_audio',method: 'post',data: data,responseType: "blob"//后台返回的为语音的流数据})}

六、前端程序

为了便于介绍，单独做了一个独立，完整的程序，这个组件功能就是选取textarea中的内容，进行后台转换为MP3语音，并且增加了避免重复转换判断，增加暂停与继续播放按钮，效果如后面图。

<template><div class="audio"><div><el-inputtype="textarea":autosize="{minRows:3,maxRows:5}"placeholder="请输入内容"v-model="textarea"></el-input><el-badge class="item" style="margin-right: 12px" v-loading="audioLoading"><el-button v-if="!audioPlay" icon="el-icon-video-play" size="medium" style="margin: 10px 10px;" circle@click="getAudio(textarea)"></el-button><el-button v-if="audioPlay" icon="el-icon-video-pause" size="medium" style="margin: 10px 10px;" circle@click="audioPause"></el-button></el-badge></div></div></template><script>import {textToAudio} from '@/api/data/xunfei'export default {name: "audioPlay",props: {},components: {},data() {return {//文件组件textarea: '',//语音组件audioObj: null,//转换时loading设置audioLoading: false,audioPlay: false,}},mounted() {this.audioObj = new Audio();//在VUE中使用audio标签},methods: {//调用后台讯飞语音转换getAudio(text) {//已有声音直接播放if (this.audioObj.src) {this.audioObj.play()} else {if (text) {this.audioLoading = truelet formData = new FormData()formData.append('text', text)textToAudio(formData).then(response => {let url = URL.createObjectURL(response)//通过这个API让语音数据转为成一个url地址this.audioObj.src = url//设置audio的src为上面生成的urllet playPromiser = this.audioObj.play()//进行播放//在谷歌内核中,audio.play()会返回一个promise的值，在IE内核中就不会返回任何的值//所以如果你要分浏览器，可以判断playPromiser的值来进行操作哦this.audioObj.onended = () => {}this.audioLoading = false}).catch(err => {})}}this.audioPlay = true},audioPause() {this.audioObj.pause()this.audioPlay = false}}}</script><style>.audio{width: 90%;position: absolute;top: 20px;left: 20px;font-size: 26px;}</style>

效果图

语音转文字的完整方案还是比较少，经过几天努力搞定了此问题，特完整地记录一下，后续将增加转换MP3保存在OSS功能，避免重复调服务，毕竟后续服务要费用，OSS还是比较便宜。

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。