chore: switch to google cloud text to speech

2025-10-29 16:56:34 +00:00 · 2024-12-12 14:36:14 +01:00
parent 6250f760a9
commit 8208316fb9
3 changed files with 22 additions and 9 deletions
--- a/frontend/server/package.json
+++ b/frontend/server/package.json
@@ -11,6 +11,7 @@
  "private": true,
  "dependencies": {
    "@google-cloud/speech": "^6.7.0",
+    "@google-cloud/text-to-speech": "^5.6.0",
    "appjs": "^0.0.20",
    "appjs-win32": "^0.0.19",
    "body-parser": "^1.20.3",
--- a/frontend/server/src/app.ts
+++ b/frontend/server/src/app.ts
@@ -220,7 +220,7 @@ module.exports = function (configLocation, viteProxy) {
    httpProxyMiddleware.createProxyMiddleware({
      target: `http://${
        backendAddress === "*" ? "localhost" : backendAddress
-      }:${config["backend"]["port"]}/`,
+      }:${config["backend"]["port"]}/olympus`,
      changeOrigin: true,
    })
  );
--- a/frontend/server/src/routes/api/speech.ts
+++ b/frontend/server/src/routes/api/speech.ts
@@ -1,20 +1,32 @@
 import express = require("express");
 //const gtts = require("node-gtts")("en");
 const speech = require("@google-cloud/speech");
+const textToSpeech = require('@google-cloud/text-to-speech');
 const router = express.Router();

+// Creates a client
+const recognizeClient = new speech.SpeechClient();
+const generateClient = new textToSpeech.TextToSpeechClient();
+
 module.exports = function () {
  router.put("/generate", (req, res, next) => {
-    //res.set({ "Content-Type": "audio/mpeg" });
-    // TODO
-    //gtts.stream(req.body.text).pipe(res);
-    res.sendStatus(404);
+    const request = {
+      input: {text: req.body.text},
+      voice: {languageCode: 'en-US', ssmlGender: 'MALE'},
+      audioConfig: {audioEncoding: 'MP3'},
+    };
+    
+    generateClient.synthesizeSpeech(request).then(
+      (response) => {
+        res.set({ "Content-Type": "audio/mpeg" });
+        res.send(response[0].audioContent);
+        res.end()
+      }
+    ).catch((error) => res.sendStatus(400)); 
  });

  router.put("/recognize", (req, res, next) => {
-    // Creates a client
-    const client = new speech.SpeechClient();
-    
+
    // The audio file's encoding, sample rate in hertz, and BCP-47 language code
    const audio = {
      content: req.body.data.substring(req.body.data.indexOf("base64,") + 7),
@@ -29,7 +41,7 @@ module.exports = function () {
    };

    // Detects speech in the audio file
-    client.recognize(request).then((response) => {
+    recognizeClient.recognize(request).then((response) => {
      const transcription = response[0].results 
        .map((result) => result.alternatives[0].transcript)
        .join("\n");