Initial tests on sppech synthesis

2025-10-29 16:56:34 +00:00 · 2024-11-28 17:49:09 +01:00
parent 8580c5c62b
commit dd641fc2aa
10 changed files with 368 additions and 19 deletions
--- a/frontend/react/src/audio/audiomanager.ts
+++ b/frontend/react/src/audio/audiomanager.ts
@@ -11,11 +11,25 @@ import { AudioSink } from "./audiosink";
 import { Unit } from "../unit/unit";
 import { UnitSink } from "./unitsink";
 import { AudioPacket, MessageType } from "./audiopacket";
-import { AudioManagerStateChangedEvent, AudioSinksChangedEvent, AudioSourcesChangedEvent, ConfigLoadedEvent, SRSClientsChangedEvent } from "../events";
+import {
+  AudioManagerDevicesChangedEvent,
+  AudioManagerInputChangedEvent,
+  AudioManagerOutputChangedEvent,
+  AudioManagerStateChangedEvent,
+  AudioSinksChangedEvent,
+  AudioSourcesChangedEvent,
+  ConfigLoadedEvent,
+  SRSClientsChangedEvent,
+} from "../events";
 import { OlympusConfig } from "../interfaces";
+import { TextToSpeechSource } from "./texttospeechsource";

 export class AudioManager {
  #audioContext: AudioContext;
+  #synth = window.speechSynthesis;
+  #devices: MediaDeviceInfo[] = [];
+  #input: MediaDeviceInfo;
+  #output: MediaDeviceInfo;

  /* The playback pipeline enables audio playback on the speakers/headphones */
  #playbackPipeline: PlaybackPipeline;
@@ -53,7 +67,7 @@ export class AudioManager {
          code: key,
          shiftKey: true,
          ctrlKey: false,
-          altKey: false
+          altKey: false,
        });
    });
  }
@@ -65,6 +79,10 @@ export class AudioManager {

    this.#running = true;
    this.#audioContext = new AudioContext({ sampleRate: 16000 });
+
+    //@ts-ignore
+    if (this.#output) this.#audioContext.setSinkId(this.#output.deviceId);
+
    this.#playbackPipeline = new PlaybackPipeline();

    /* Connect the audio websocket */
@@ -118,7 +136,7 @@ export class AudioManager {
    });

    /* Add the microphone source and connect it directly to the radio */
-    const microphoneSource = new MicrophoneSource();
+    const microphoneSource = new MicrophoneSource(this.#input);
    microphoneSource.initialize().then(() => {
      this.#sinks.forEach((sink) => {
        if (sink instanceof RadioSink) microphoneSource.connect(sink);
@@ -130,7 +148,18 @@ export class AudioManager {
      this.addRadio();
      this.addRadio();
    });
+
+    const textToSpeechSource = new TextToSpeechSource();
+    this.#sources.push(textToSpeechSource);
+
    AudioManagerStateChangedEvent.dispatch(this.#running);
+
+    navigator.mediaDevices.enumerateDevices().then((devices) => {
+      this.#devices = devices;
+      AudioManagerDevicesChangedEvent.dispatch(devices);
+    });
+
+    this.#startSpeechRecognition();
  }

  stop() {
@@ -141,7 +170,7 @@ export class AudioManager {
    this.#sources = [];
    this.#sinks = [];
    this.#socket?.close();
-    
+
    window.clearInterval(this.#syncInterval);

    AudioSourcesChangedEvent.dispatch(this.#sources);
@@ -207,7 +236,7 @@ export class AudioManager {
    this.#sinks.push(newRadio);
    /* Set radio name by default to be incremental number */
    newRadio.setName(`Radio ${this.#sinks.length}`);
-    this.#sources[0].connect(newRadio);
+    this.#sources.find((source) => source instanceof MicrophoneSource)?.connect(newRadio);
    AudioSinksChangedEvent.dispatch(this.#sinks);
  }

@@ -256,6 +285,32 @@ export class AudioManager {
    return this.#running;
  }

+  setInput(input: MediaDeviceInfo) {
+    if (this.#devices.includes(input)) {
+      this.#input = input;
+      AudioManagerInputChangedEvent.dispatch(input);
+      this.stop();
+      this.start();
+    } else {
+      console.error("Requested input device is not in devices list");
+    }
+  }
+
+  setOutput(output: MediaDeviceInfo) {
+    if (this.#devices.includes(output)) {
+      this.#input = output;
+      AudioManagerOutputChangedEvent.dispatch(output);
+      this.stop();
+      this.start();
+    } else {
+      console.error("Requested output device is not in devices list");
+    }
+  }
+
+  playText(text) {
+    this.#sources.find((source) => source instanceof TextToSpeechSource)?.playText(text);
+  }
+
  #syncRadioSettings() {
    /* Send the radio settings of each radio to the SRS backend */
    let message = {
@@ -275,4 +330,30 @@ export class AudioManager {

    if (this.#socket?.readyState == 1) this.#socket?.send(new Uint8Array([AudioMessageType.settings, ...Buffer.from(JSON.stringify(message), "utf-8")]));
  }
+
+  #startSpeechRecognition() {
+    const grammar =
+      "#JSGF V1.0; grammar colors; public <color> = aqua | azure | beige | bisque | black | blue | brown | chocolate | coral | crimson | cyan | fuchsia | ghostwhite | gold | goldenrod | gray | green | indigo | ivory | khaki | lavender | lime | linen | magenta | maroon | moccasin | navy | olive | orange | orchid | peru | pink | plum | purple | red | salmon | sienna | silver | snow | tan | teal | thistle | tomato | turquoise | violet | white | yellow ;";
+    //@ts-ignore
+    const recognition = new window.webkitSpeechRecognition();
+    //@ts-ignore
+    const speechRecognitionList = new window.webkitSpeechGrammarList();
+    speechRecognitionList.addFromString(grammar, 1);
+    recognition.grammars = speechRecognitionList;
+    recognition.continuous = true;
+    recognition.lang = "en-US";
+    recognition.interimResults = true;
+    //recognition.maxAlternatives = 1;
+
+    const diagnostic = document.querySelector(".output");
+    const bg = document.querySelector("html");
+    recognition.start();
+
+
+    recognition.onresult = (event) => {
+      const color = event.results[0][0].transcript;
+      diagnostic.textContent = `Result received: ${color}`;
+      bg.style.backgroundColor = color;
+    };
+  }
 }
--- a/frontend/react/src/audio/microphonesource.ts
+++ b/frontend/react/src/audio/microphonesource.ts
@@ -4,16 +4,25 @@ import { AudioSource } from "./audiosource";

 export class MicrophoneSource extends AudioSource {
  #sourceNode: MediaStreamAudioSourceNode;
+  #device: MediaDeviceInfo;

-  constructor() {
+  constructor(device?: MediaDeviceInfo) {
    super();

    this.setName("Microphone");
+
+    if (device) this.#device = device;
  }

  /* Asynchronously initialize the microphone and connect it to the output node */
  async initialize() {
-    const microphone = await navigator.mediaDevices.getUserMedia({ audio: true });
+    const microphone = await navigator.mediaDevices.getUserMedia({
+      audio: this.#device
+        ? {
+            deviceId: this.#device.deviceId,
+          }
+        : true,
+    });
    if (getApp().getAudioManager().getAudioContext()) {
      this.#sourceNode = getApp().getAudioManager().getAudioContext().createMediaStreamSource(microphone);
      this.#sourceNode.connect(this.getOutputNode());
--- a/frontend/react/src/audio/texttospeechsource.ts
+++ b/frontend/react/src/audio/texttospeechsource.ts
@@ -0,0 +1,136 @@
+import { AudioSource } from "./audiosource";
+import { getApp } from "../olympusapp";
+import { AudioSourcesChangedEvent } from "../events";
+
+export class TextToSpeechSource extends AudioSource {
+  #source: AudioBufferSourceNode;
+  #duration: number = 0;
+  #currentPosition: number = 0;
+  #updateInterval: any;
+  #lastUpdateTime: number = 0;
+  #playing = false;
+  #audioBuffer: AudioBuffer;
+  #restartTimeout: any;
+  #looping = false;
+
+  constructor() {
+    super();
+
+    this.setName("Text to speech")
+  }
+
+  playText(text: string) {
+    const requestOptions = {
+      method: "PUT", // Specify the request method
+      headers: { "Content-Type": "application/json" }, // Specify the content type
+      body: JSON.stringify({ text }), // Send the data in JSON format
+    };
+
+    fetch(getApp().getExpressAddress() + `/api/texttospeech/generate`, requestOptions)
+      .then((response) => {
+        if (response.status === 200) {
+          console.log(`Text to speech generate correctly`);
+          return response.blob();
+        } else {
+          throw new Error("Error generating text to speech");
+        }
+      }) // Parse the response
+      .then((blob) => {
+        return blob.arrayBuffer()
+      })
+      .then((contents) => {
+        getApp()
+          .getAudioManager()
+          .getAudioContext()
+          /* Decode the audio file. This method takes care of codecs */
+          .decodeAudioData(contents, (audioBuffer) => {
+            this.#audioBuffer = audioBuffer;
+            this.#duration = audioBuffer.duration;
+
+            this.play();
+          });
+      })
+      .catch((error) => console.error(error)); // Handle errors
+  }
+
+  play() {
+    /* A new buffer source must be created every time the file is played */
+    this.#source = getApp().getAudioManager().getAudioContext().createBufferSource();
+    this.#source.buffer = this.#audioBuffer;
+    this.#source.connect(this.getOutputNode());
+    this.#source.loop = this.#looping;
+
+    /* Start playing the file at the selected position */
+    this.#source.start(0, this.#currentPosition);
+    this.#playing = true;
+    const now = Date.now() / 1000;
+    this.#lastUpdateTime = now;
+
+    AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
+
+    this.#updateInterval = setInterval(() => {
+      /* Update the current position value every second */
+      const now = Date.now() / 1000;
+      this.#currentPosition += now - this.#lastUpdateTime;
+      this.#lastUpdateTime = now;
+
+      if (this.#currentPosition > this.#duration) {
+        this.#currentPosition = 0;
+        if (!this.#looping) this.pause();
+      }
+
+      AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
+    }, 1000);
+  }
+
+  pause() {
+    /* Disconnect the source and update the position to the current time (precisely)*/
+    this.#source.stop();
+    this.#source.disconnect();
+    this.#playing = false;
+
+    const now = Date.now() / 1000;
+    this.#currentPosition += now - this.#lastUpdateTime;
+    clearInterval(this.#updateInterval);
+
+    AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
+  }
+
+  getPlaying() {
+    return this.#playing;
+  }
+
+  getCurrentPosition() {
+    return this.#currentPosition;
+  }
+
+  getDuration() {
+    return this.#duration;
+  }
+
+  setCurrentPosition(percentPosition) {
+    /* To change the current play position we must:
+    1) pause the current playback;
+    2) update the current position value;
+    3) after some time, restart playing. The delay is needed to avoid immediately restarting many times if the user drags the position slider;
+    */
+    if (this.#playing) {
+      clearTimeout(this.#restartTimeout);
+      this.#restartTimeout = setTimeout(() => this.play(), 1000);
+    }
+
+    this.pause();
+    this.#currentPosition = (percentPosition / 100) * this.#duration;
+  }
+
+  setLooping(looping) {
+    this.#looping = looping;
+    if (this.#source) this.#source.loop = looping;
+    AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
+  }
+
+  getLooping() {
+    return this.#looping;
+  }
+}
+
--- a/frontend/react/src/events.ts
+++ b/frontend/react/src/events.ts
@@ -466,6 +466,45 @@ export class AudioManagerStateChangedEvent {
  }
 }

+export class AudioManagerDevicesChangedEvent {
+  static on(callback: (devices: MediaDeviceInfo[]) => void) {
+    document.addEventListener(this.name, (ev: CustomEventInit) => {
+      callback(ev.detail.devices);
+    });
+  }
+
+  static dispatch(devices: MediaDeviceInfo[]) {
+    document.dispatchEvent(new CustomEvent(this.name, { detail: { devices } }));
+    console.log(`Event ${this.name} dispatched`);
+  }
+}
+
+export class AudioManagerInputChangedEvent {
+  static on(callback: (input: MediaDeviceInfo) => void) {
+    document.addEventListener(this.name, (ev: CustomEventInit) => {
+      callback(ev.detail.input);
+    });
+  }
+
+  static dispatch(input: MediaDeviceInfo) {
+    document.dispatchEvent(new CustomEvent(this.name, { detail: { input } }));
+    console.log(`Event ${this.name} dispatched`);
+  }
+}
+
+export class AudioManagerOutputChangedEvent {
+  static on(callback: (output: MediaDeviceInfo) => void) {
+    document.addEventListener(this.name, (ev: CustomEventInit) => {
+      callback(ev.detail.output);
+    });
+  }
+
+  static dispatch(output: MediaDeviceInfo) {
+    document.dispatchEvent(new CustomEvent(this.name, { detail: { output } }));
+    console.log(`Event ${this.name} dispatched`);
+  }
+}
+
 /************** Mission data events ***************/
 export class BullseyesDataChanged {
  static on(callback: (bullseyes: { [name: string]: Bullseye }) => void) {
--- a/frontend/react/src/ui/panels/audiomenu.tsx
+++ b/frontend/react/src/ui/panels/audiomenu.tsx
@@ -11,15 +11,27 @@ import { UnitSinkPanel } from "./components/unitsinkpanel";
 import { UnitSink } from "../../audio/unitsink";
 import { FaMinus, FaVolumeHigh } from "react-icons/fa6";
 import { getRandomColor } from "../../other/utils";
-import { AudioManagerStateChangedEvent, AudioSinksChangedEvent, AudioSourcesChangedEvent, ShortcutsChangedEvent } from "../../events";
+import {
+  AudioManagerDevicesChangedEvent,
+  AudioManagerInputChangedEvent,
+  AudioManagerOutputChangedEvent,
+  AudioManagerStateChangedEvent,
+  AudioSinksChangedEvent,
+  AudioSourcesChangedEvent,
+  ShortcutsChangedEvent,
+} from "../../events";
+import { OlDropdown, OlDropdownItem } from "../components/oldropdown";

 export function AudioMenu(props: { open: boolean; onClose: () => void; children?: JSX.Element | JSX.Element[] }) {
+  const [devices, setDevices] = useState([] as MediaDeviceInfo[]);
  const [sinks, setSinks] = useState([] as AudioSink[]);
  const [sources, setSources] = useState([] as AudioSource[]);
  const [audioManagerEnabled, setAudioManagerEnabled] = useState(false);
  const [activeSource, setActiveSource] = useState(null as AudioSource | null);
  const [count, setCount] = useState(0);
  const [shortcuts, setShortcuts] = useState({});
+  const [input, setInput] = useState(undefined as undefined | MediaDeviceInfo);
+  const [output, setOutput] = useState(undefined as undefined | MediaDeviceInfo);

  /* Preallocate 128 references for the source and sink panels. If the number of references changes, React will give an error */
  const sourceRefs = Array(128)
@@ -61,6 +73,10 @@ export function AudioMenu(props: { open: boolean; onClose: () => void; children?
    });

    ShortcutsChangedEvent.on((shortcuts) => setShortcuts(shortcuts));
+
+    AudioManagerDevicesChangedEvent.on((devices) => setDevices([...devices]));
+    AudioManagerInputChangedEvent.on((input) => setInput(input));
+    AudioManagerOutputChangedEvent.on((output) => setOutput(output));
  }, []);

  /* When the sinks or sources change, use the count state to force a rerender to update the connection lines */
@@ -127,6 +143,40 @@ export function AudioMenu(props: { open: boolean; onClose: () => void; children?
          `}
          style={{ paddingRight: `${paddingRight}px` }}
        >
+          {audioManagerEnabled && (
+            <>
+              <span>Input</span>
+
+              <OlDropdown label={input ? input.label : "Default"}>
+                {devices
+                  .filter((device) => device.kind === "audioinput")
+                  .map((device, idx) => {
+                    return (
+                      <OlDropdownItem onClick={() => getApp().getAudioManager().setInput(device)}>
+                        <div className="w-full truncate">{device.label}</div>
+                      </OlDropdownItem>
+                    );
+                  })}
+              </OlDropdown>
+            </>
+          )}
+          {audioManagerEnabled && (
+            <>
+              {" "}
+              <span>Output</span>
+              <OlDropdown label={output ? output.label : "Default"}>
+                {devices
+                  .filter((device) => device.kind === "audiooutput")
+                  .map((device, idx) => {
+                    return (
+                      <OlDropdownItem onClick={() => getApp().getAudioManager().setOutput(device)}>
+                        <div className="w-full truncate">{device.label}</div>
+                      </OlDropdownItem>
+                    );
+                  })}
+              </OlDropdown>
+            </>
+          )}
          {audioManagerEnabled && <span>Audio sources</span>}
          <>
            {sources.map((source, idx) => {
--- a/frontend/react/src/ui/panels/awacsmenu.tsx
+++ b/frontend/react/src/ui/panels/awacsmenu.tsx
@@ -151,6 +151,7 @@ export function AWACSMenu(props: { open: boolean; onClose: () => void; children?
                {readout.map((line) => (
                  <span className="font-bold italic text-cyan-500">{line}</span>
                ))}
+                <button onClick={() => getApp().getAudioManager().playText(readout.reduce((acc, line) => acc += " " + line, ""))}>Play</button>
              </>
            )}
          </div>
--- a/frontend/react/src/ui/panels/components/sourcepanel.tsx
+++ b/frontend/react/src/ui/panels/components/sourcepanel.tsx
@@ -7,14 +7,16 @@ import { FaChevronUp, FaVolumeHigh, FaXmark } from "react-icons/fa6";
 import { OlRangeSlider } from "../../components/olrangeslider";
 import { FileSource } from "../../../audio/filesource";
 import { MicrophoneSource } from "../../../audio/microphonesource";
+import { TextToSpeechSource } from "../../../audio/texttospeechsource";

 export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpanded: () => void }, ref: ForwardedRef<HTMLDivElement>) => {
  const [meterLevel, setMeterLevel] = useState(0);
  const [expanded, setExpanded] = useState(false);
+  const [text, setText] = useState("");

  useEffect(() => {
    if (props.onExpanded) props.onExpanded();
-  }, [expanded])
+  }, [expanded]);

  useEffect(() => {
    setInterval(() => {
@@ -46,13 +48,9 @@ export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpa
          />
        </div>
        <div className="flex w-full overflow-hidden">
-          <span
-            className={`my-auto truncate`}
-          >
-            {props.source.getName()}
-          </span>
+          <span className={`my-auto truncate`}>{props.source.getName()}</span>
        </div>
-        {!(props.source instanceof MicrophoneSource) && (
+        {!(props.source instanceof MicrophoneSource) && !(props.source instanceof TextToSpeechSource) && (
          <div
            className={`
              mb-auto aspect-square cursor-pointer rounded-md p-2
@@ -68,21 +66,38 @@ export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpa
      </div>
      {expanded && (
        <>
-          {props.source instanceof FileSource && (
+          {(props.source instanceof FileSource || props.source instanceof TextToSpeechSource) && (
            <div className="flex flex-col gap-2 rounded-md bg-olympus-400 p-2">
+              {props.source instanceof TextToSpeechSource && 
+              <input
+              className={`
+                block h-10 w-full border-[2px] bg-gray-50 py-2.5 text-center
+                text-sm text-gray-900
+                dark:border-gray-700 dark:bg-olympus-600 dark:text-white
+                dark:placeholder-gray-400 dark:focus:border-blue-700
+                dark:focus:ring-blue-700
+                focus:border-blue-700 focus:ring-blue-500
+              `}
+              value={text}
+              onChange={(ev) => {
+                setText(ev.target.value);
+              }}
+            ></input>
+              }
              <div className="flex gap-4">
                <OlStateButton
                  checked={false}
                  icon={props.source.getPlaying() ? faPause : faPlay}
                  onClick={() => {
                    if (props.source instanceof FileSource) props.source.getPlaying() ? props.source.pause() : props.source.play();
+                    else if (props.source instanceof TextToSpeechSource) props.source.getPlaying() ? props.source.pause() : props.source.playText(text);
                  }}
-                  tooltip="Play file"
+                  tooltip="Play file / Text to speech"
                ></OlStateButton>
                <OlRangeSlider
                  value={props.source.getDuration() > 0 ? (props.source.getCurrentPosition() / props.source.getDuration()) * 100 : 0}
                  onChange={(ev) => {
-                    if (props.source instanceof FileSource) props.source.setCurrentPosition(parseFloat(ev.currentTarget.value));
+                    if (props.source instanceof FileSource || props.source instanceof TextToSpeechSource) props.source.setCurrentPosition(parseFloat(ev.currentTarget.value));
                  }}
                  className="my-auto"
                />
@@ -90,7 +105,7 @@ export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpa
                  checked={props.source.getLooping()}
                  icon={faRepeat}
                  onClick={() => {
-                    if (props.source instanceof FileSource) props.source.setLooping(!props.source.getLooping());
+                    if (props.source instanceof FileSource || props.source instanceof TextToSpeechSource) props.source.setLooping(!props.source.getLooping());
                  }}
                  tooltip="Loop"
                ></OlStateButton>
--- a/frontend/server/package.json
+++ b/frontend/server/package.json
@@ -21,6 +21,7 @@
    "express-basic-auth": "^1.2.1",
    "http-proxy-middleware": "^2.0.6",
    "morgan": "~1.9.1",
+    "node-gtts": "^2.0.2",
    "open": "^10.0.0",
    "regedit": "^5.1.2",
    "save": "^2.9.0",
--- a/frontend/server/src/app.ts
+++ b/frontend/server/src/app.ts
@@ -27,6 +27,7 @@ module.exports = function (configLocation, viteProxy) {
      "databases"
    )
  );
+  const textToSpeechRouter = require("./routes/api/texttospeech")();

  /* Load the config and create the express app */
  let config = {};
@@ -75,10 +76,12 @@ module.exports = function (configLocation, viteProxy) {
  app.use("/api/airbases", airbasesRouter);
  app.use("/api/elevation", elevationRouter);
  app.use("/api/databases", databasesRouter);
+  app.use("/api/texttospeech", textToSpeechRouter);
  app.use("/resources", resourcesRouter);
  app.use("/express/api/airbases", airbasesRouter);
  app.use("/express/api/elevation", elevationRouter);
  app.use("/express/api/databases", databasesRouter);
+  app.use("/express/api/texttospeech", textToSpeechRouter);
  app.use("/express/resources", resourcesRouter);

  /* Set default index */
--- a/frontend/server/src/routes/api/texttospeech.ts
+++ b/frontend/server/src/routes/api/texttospeech.ts
@@ -0,0 +1,14 @@
+import express = require('express');
+import fs = require('fs');
+var gtts = require('node-gtts')('en');
+
+const router = express.Router();
+
+module.exports = function () {
+    router.put( "/generate", ( req, res, next ) => {        
+        res.set({'Content-Type': 'audio/mpeg'});
+        gtts.stream(req.body.text).pipe(res);
+    });
+
+    return router; 
+}