Initial tests on sppech synthesis

This commit is contained in:
Davide Passoni 2024-11-28 17:49:09 +01:00
parent 8580c5c62b
commit dd641fc2aa
10 changed files with 368 additions and 19 deletions

View File

@ -11,11 +11,25 @@ import { AudioSink } from "./audiosink";
import { Unit } from "../unit/unit";
import { UnitSink } from "./unitsink";
import { AudioPacket, MessageType } from "./audiopacket";
import { AudioManagerStateChangedEvent, AudioSinksChangedEvent, AudioSourcesChangedEvent, ConfigLoadedEvent, SRSClientsChangedEvent } from "../events";
import {
AudioManagerDevicesChangedEvent,
AudioManagerInputChangedEvent,
AudioManagerOutputChangedEvent,
AudioManagerStateChangedEvent,
AudioSinksChangedEvent,
AudioSourcesChangedEvent,
ConfigLoadedEvent,
SRSClientsChangedEvent,
} from "../events";
import { OlympusConfig } from "../interfaces";
import { TextToSpeechSource } from "./texttospeechsource";
export class AudioManager {
#audioContext: AudioContext;
#synth = window.speechSynthesis;
#devices: MediaDeviceInfo[] = [];
#input: MediaDeviceInfo;
#output: MediaDeviceInfo;
/* The playback pipeline enables audio playback on the speakers/headphones */
#playbackPipeline: PlaybackPipeline;
@ -53,7 +67,7 @@ export class AudioManager {
code: key,
shiftKey: true,
ctrlKey: false,
altKey: false
altKey: false,
});
});
}
@ -65,6 +79,10 @@ export class AudioManager {
this.#running = true;
this.#audioContext = new AudioContext({ sampleRate: 16000 });
//@ts-ignore
if (this.#output) this.#audioContext.setSinkId(this.#output.deviceId);
this.#playbackPipeline = new PlaybackPipeline();
/* Connect the audio websocket */
@ -118,7 +136,7 @@ export class AudioManager {
});
/* Add the microphone source and connect it directly to the radio */
const microphoneSource = new MicrophoneSource();
const microphoneSource = new MicrophoneSource(this.#input);
microphoneSource.initialize().then(() => {
this.#sinks.forEach((sink) => {
if (sink instanceof RadioSink) microphoneSource.connect(sink);
@ -130,7 +148,18 @@ export class AudioManager {
this.addRadio();
this.addRadio();
});
const textToSpeechSource = new TextToSpeechSource();
this.#sources.push(textToSpeechSource);
AudioManagerStateChangedEvent.dispatch(this.#running);
navigator.mediaDevices.enumerateDevices().then((devices) => {
this.#devices = devices;
AudioManagerDevicesChangedEvent.dispatch(devices);
});
this.#startSpeechRecognition();
}
stop() {
@ -141,7 +170,7 @@ export class AudioManager {
this.#sources = [];
this.#sinks = [];
this.#socket?.close();
window.clearInterval(this.#syncInterval);
AudioSourcesChangedEvent.dispatch(this.#sources);
@ -207,7 +236,7 @@ export class AudioManager {
this.#sinks.push(newRadio);
/* Set radio name by default to be incremental number */
newRadio.setName(`Radio ${this.#sinks.length}`);
this.#sources[0].connect(newRadio);
this.#sources.find((source) => source instanceof MicrophoneSource)?.connect(newRadio);
AudioSinksChangedEvent.dispatch(this.#sinks);
}
@ -256,6 +285,32 @@ export class AudioManager {
return this.#running;
}
setInput(input: MediaDeviceInfo) {
if (this.#devices.includes(input)) {
this.#input = input;
AudioManagerInputChangedEvent.dispatch(input);
this.stop();
this.start();
} else {
console.error("Requested input device is not in devices list");
}
}
setOutput(output: MediaDeviceInfo) {
if (this.#devices.includes(output)) {
this.#input = output;
AudioManagerOutputChangedEvent.dispatch(output);
this.stop();
this.start();
} else {
console.error("Requested output device is not in devices list");
}
}
playText(text) {
this.#sources.find((source) => source instanceof TextToSpeechSource)?.playText(text);
}
#syncRadioSettings() {
/* Send the radio settings of each radio to the SRS backend */
let message = {
@ -275,4 +330,30 @@ export class AudioManager {
if (this.#socket?.readyState == 1) this.#socket?.send(new Uint8Array([AudioMessageType.settings, ...Buffer.from(JSON.stringify(message), "utf-8")]));
}
#startSpeechRecognition() {
const grammar =
"#JSGF V1.0; grammar colors; public <color> = aqua | azure | beige | bisque | black | blue | brown | chocolate | coral | crimson | cyan | fuchsia | ghostwhite | gold | goldenrod | gray | green | indigo | ivory | khaki | lavender | lime | linen | magenta | maroon | moccasin | navy | olive | orange | orchid | peru | pink | plum | purple | red | salmon | sienna | silver | snow | tan | teal | thistle | tomato | turquoise | violet | white | yellow ;";
//@ts-ignore
const recognition = new window.webkitSpeechRecognition();
//@ts-ignore
const speechRecognitionList = new window.webkitSpeechGrammarList();
speechRecognitionList.addFromString(grammar, 1);
recognition.grammars = speechRecognitionList;
recognition.continuous = true;
recognition.lang = "en-US";
recognition.interimResults = true;
//recognition.maxAlternatives = 1;
const diagnostic = document.querySelector(".output");
const bg = document.querySelector("html");
recognition.start();
recognition.onresult = (event) => {
const color = event.results[0][0].transcript;
diagnostic.textContent = `Result received: ${color}`;
bg.style.backgroundColor = color;
};
}
}

View File

@ -4,16 +4,25 @@ import { AudioSource } from "./audiosource";
export class MicrophoneSource extends AudioSource {
#sourceNode: MediaStreamAudioSourceNode;
#device: MediaDeviceInfo;
constructor() {
constructor(device?: MediaDeviceInfo) {
super();
this.setName("Microphone");
if (device) this.#device = device;
}
/* Asynchronously initialize the microphone and connect it to the output node */
async initialize() {
const microphone = await navigator.mediaDevices.getUserMedia({ audio: true });
const microphone = await navigator.mediaDevices.getUserMedia({
audio: this.#device
? {
deviceId: this.#device.deviceId,
}
: true,
});
if (getApp().getAudioManager().getAudioContext()) {
this.#sourceNode = getApp().getAudioManager().getAudioContext().createMediaStreamSource(microphone);
this.#sourceNode.connect(this.getOutputNode());

View File

@ -0,0 +1,136 @@
import { AudioSource } from "./audiosource";
import { getApp } from "../olympusapp";
import { AudioSourcesChangedEvent } from "../events";
export class TextToSpeechSource extends AudioSource {
#source: AudioBufferSourceNode;
#duration: number = 0;
#currentPosition: number = 0;
#updateInterval: any;
#lastUpdateTime: number = 0;
#playing = false;
#audioBuffer: AudioBuffer;
#restartTimeout: any;
#looping = false;
constructor() {
super();
this.setName("Text to speech")
}
playText(text: string) {
const requestOptions = {
method: "PUT", // Specify the request method
headers: { "Content-Type": "application/json" }, // Specify the content type
body: JSON.stringify({ text }), // Send the data in JSON format
};
fetch(getApp().getExpressAddress() + `/api/texttospeech/generate`, requestOptions)
.then((response) => {
if (response.status === 200) {
console.log(`Text to speech generate correctly`);
return response.blob();
} else {
throw new Error("Error generating text to speech");
}
}) // Parse the response
.then((blob) => {
return blob.arrayBuffer()
})
.then((contents) => {
getApp()
.getAudioManager()
.getAudioContext()
/* Decode the audio file. This method takes care of codecs */
.decodeAudioData(contents, (audioBuffer) => {
this.#audioBuffer = audioBuffer;
this.#duration = audioBuffer.duration;
this.play();
});
})
.catch((error) => console.error(error)); // Handle errors
}
play() {
/* A new buffer source must be created every time the file is played */
this.#source = getApp().getAudioManager().getAudioContext().createBufferSource();
this.#source.buffer = this.#audioBuffer;
this.#source.connect(this.getOutputNode());
this.#source.loop = this.#looping;
/* Start playing the file at the selected position */
this.#source.start(0, this.#currentPosition);
this.#playing = true;
const now = Date.now() / 1000;
this.#lastUpdateTime = now;
AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
this.#updateInterval = setInterval(() => {
/* Update the current position value every second */
const now = Date.now() / 1000;
this.#currentPosition += now - this.#lastUpdateTime;
this.#lastUpdateTime = now;
if (this.#currentPosition > this.#duration) {
this.#currentPosition = 0;
if (!this.#looping) this.pause();
}
AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
}, 1000);
}
pause() {
/* Disconnect the source and update the position to the current time (precisely)*/
this.#source.stop();
this.#source.disconnect();
this.#playing = false;
const now = Date.now() / 1000;
this.#currentPosition += now - this.#lastUpdateTime;
clearInterval(this.#updateInterval);
AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
}
getPlaying() {
return this.#playing;
}
getCurrentPosition() {
return this.#currentPosition;
}
getDuration() {
return this.#duration;
}
setCurrentPosition(percentPosition) {
/* To change the current play position we must:
1) pause the current playback;
2) update the current position value;
3) after some time, restart playing. The delay is needed to avoid immediately restarting many times if the user drags the position slider;
*/
if (this.#playing) {
clearTimeout(this.#restartTimeout);
this.#restartTimeout = setTimeout(() => this.play(), 1000);
}
this.pause();
this.#currentPosition = (percentPosition / 100) * this.#duration;
}
setLooping(looping) {
this.#looping = looping;
if (this.#source) this.#source.loop = looping;
AudioSourcesChangedEvent.dispatch(getApp().getAudioManager().getSources());
}
getLooping() {
return this.#looping;
}
}

View File

@ -466,6 +466,45 @@ export class AudioManagerStateChangedEvent {
}
}
export class AudioManagerDevicesChangedEvent {
static on(callback: (devices: MediaDeviceInfo[]) => void) {
document.addEventListener(this.name, (ev: CustomEventInit) => {
callback(ev.detail.devices);
});
}
static dispatch(devices: MediaDeviceInfo[]) {
document.dispatchEvent(new CustomEvent(this.name, { detail: { devices } }));
console.log(`Event ${this.name} dispatched`);
}
}
export class AudioManagerInputChangedEvent {
static on(callback: (input: MediaDeviceInfo) => void) {
document.addEventListener(this.name, (ev: CustomEventInit) => {
callback(ev.detail.input);
});
}
static dispatch(input: MediaDeviceInfo) {
document.dispatchEvent(new CustomEvent(this.name, { detail: { input } }));
console.log(`Event ${this.name} dispatched`);
}
}
export class AudioManagerOutputChangedEvent {
static on(callback: (output: MediaDeviceInfo) => void) {
document.addEventListener(this.name, (ev: CustomEventInit) => {
callback(ev.detail.output);
});
}
static dispatch(output: MediaDeviceInfo) {
document.dispatchEvent(new CustomEvent(this.name, { detail: { output } }));
console.log(`Event ${this.name} dispatched`);
}
}
/************** Mission data events ***************/
export class BullseyesDataChanged {
static on(callback: (bullseyes: { [name: string]: Bullseye }) => void) {

View File

@ -11,15 +11,27 @@ import { UnitSinkPanel } from "./components/unitsinkpanel";
import { UnitSink } from "../../audio/unitsink";
import { FaMinus, FaVolumeHigh } from "react-icons/fa6";
import { getRandomColor } from "../../other/utils";
import { AudioManagerStateChangedEvent, AudioSinksChangedEvent, AudioSourcesChangedEvent, ShortcutsChangedEvent } from "../../events";
import {
AudioManagerDevicesChangedEvent,
AudioManagerInputChangedEvent,
AudioManagerOutputChangedEvent,
AudioManagerStateChangedEvent,
AudioSinksChangedEvent,
AudioSourcesChangedEvent,
ShortcutsChangedEvent,
} from "../../events";
import { OlDropdown, OlDropdownItem } from "../components/oldropdown";
export function AudioMenu(props: { open: boolean; onClose: () => void; children?: JSX.Element | JSX.Element[] }) {
const [devices, setDevices] = useState([] as MediaDeviceInfo[]);
const [sinks, setSinks] = useState([] as AudioSink[]);
const [sources, setSources] = useState([] as AudioSource[]);
const [audioManagerEnabled, setAudioManagerEnabled] = useState(false);
const [activeSource, setActiveSource] = useState(null as AudioSource | null);
const [count, setCount] = useState(0);
const [shortcuts, setShortcuts] = useState({});
const [input, setInput] = useState(undefined as undefined | MediaDeviceInfo);
const [output, setOutput] = useState(undefined as undefined | MediaDeviceInfo);
/* Preallocate 128 references for the source and sink panels. If the number of references changes, React will give an error */
const sourceRefs = Array(128)
@ -61,6 +73,10 @@ export function AudioMenu(props: { open: boolean; onClose: () => void; children?
});
ShortcutsChangedEvent.on((shortcuts) => setShortcuts(shortcuts));
AudioManagerDevicesChangedEvent.on((devices) => setDevices([...devices]));
AudioManagerInputChangedEvent.on((input) => setInput(input));
AudioManagerOutputChangedEvent.on((output) => setOutput(output));
}, []);
/* When the sinks or sources change, use the count state to force a rerender to update the connection lines */
@ -127,6 +143,40 @@ export function AudioMenu(props: { open: boolean; onClose: () => void; children?
`}
style={{ paddingRight: `${paddingRight}px` }}
>
{audioManagerEnabled && (
<>
<span>Input</span>
<OlDropdown label={input ? input.label : "Default"}>
{devices
.filter((device) => device.kind === "audioinput")
.map((device, idx) => {
return (
<OlDropdownItem onClick={() => getApp().getAudioManager().setInput(device)}>
<div className="w-full truncate">{device.label}</div>
</OlDropdownItem>
);
})}
</OlDropdown>
</>
)}
{audioManagerEnabled && (
<>
{" "}
<span>Output</span>
<OlDropdown label={output ? output.label : "Default"}>
{devices
.filter((device) => device.kind === "audiooutput")
.map((device, idx) => {
return (
<OlDropdownItem onClick={() => getApp().getAudioManager().setOutput(device)}>
<div className="w-full truncate">{device.label}</div>
</OlDropdownItem>
);
})}
</OlDropdown>
</>
)}
{audioManagerEnabled && <span>Audio sources</span>}
<>
{sources.map((source, idx) => {

View File

@ -151,6 +151,7 @@ export function AWACSMenu(props: { open: boolean; onClose: () => void; children?
{readout.map((line) => (
<span className="font-bold italic text-cyan-500">{line}</span>
))}
<button onClick={() => getApp().getAudioManager().playText(readout.reduce((acc, line) => acc += " " + line, ""))}>Play</button>
</>
)}
</div>

View File

@ -7,14 +7,16 @@ import { FaChevronUp, FaVolumeHigh, FaXmark } from "react-icons/fa6";
import { OlRangeSlider } from "../../components/olrangeslider";
import { FileSource } from "../../../audio/filesource";
import { MicrophoneSource } from "../../../audio/microphonesource";
import { TextToSpeechSource } from "../../../audio/texttospeechsource";
export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpanded: () => void }, ref: ForwardedRef<HTMLDivElement>) => {
const [meterLevel, setMeterLevel] = useState(0);
const [expanded, setExpanded] = useState(false);
const [text, setText] = useState("");
useEffect(() => {
if (props.onExpanded) props.onExpanded();
}, [expanded])
}, [expanded]);
useEffect(() => {
setInterval(() => {
@ -46,13 +48,9 @@ export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpa
/>
</div>
<div className="flex w-full overflow-hidden">
<span
className={`my-auto truncate`}
>
{props.source.getName()}
</span>
<span className={`my-auto truncate`}>{props.source.getName()}</span>
</div>
{!(props.source instanceof MicrophoneSource) && (
{!(props.source instanceof MicrophoneSource) && !(props.source instanceof TextToSpeechSource) && (
<div
className={`
mb-auto aspect-square cursor-pointer rounded-md p-2
@ -68,21 +66,38 @@ export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpa
</div>
{expanded && (
<>
{props.source instanceof FileSource && (
{(props.source instanceof FileSource || props.source instanceof TextToSpeechSource) && (
<div className="flex flex-col gap-2 rounded-md bg-olympus-400 p-2">
{props.source instanceof TextToSpeechSource &&
<input
className={`
block h-10 w-full border-[2px] bg-gray-50 py-2.5 text-center
text-sm text-gray-900
dark:border-gray-700 dark:bg-olympus-600 dark:text-white
dark:placeholder-gray-400 dark:focus:border-blue-700
dark:focus:ring-blue-700
focus:border-blue-700 focus:ring-blue-500
`}
value={text}
onChange={(ev) => {
setText(ev.target.value);
}}
></input>
}
<div className="flex gap-4">
<OlStateButton
checked={false}
icon={props.source.getPlaying() ? faPause : faPlay}
onClick={() => {
if (props.source instanceof FileSource) props.source.getPlaying() ? props.source.pause() : props.source.play();
else if (props.source instanceof TextToSpeechSource) props.source.getPlaying() ? props.source.pause() : props.source.playText(text);
}}
tooltip="Play file"
tooltip="Play file / Text to speech"
></OlStateButton>
<OlRangeSlider
value={props.source.getDuration() > 0 ? (props.source.getCurrentPosition() / props.source.getDuration()) * 100 : 0}
onChange={(ev) => {
if (props.source instanceof FileSource) props.source.setCurrentPosition(parseFloat(ev.currentTarget.value));
if (props.source instanceof FileSource || props.source instanceof TextToSpeechSource) props.source.setCurrentPosition(parseFloat(ev.currentTarget.value));
}}
className="my-auto"
/>
@ -90,7 +105,7 @@ export const AudioSourcePanel = forwardRef((props: { source: AudioSource; onExpa
checked={props.source.getLooping()}
icon={faRepeat}
onClick={() => {
if (props.source instanceof FileSource) props.source.setLooping(!props.source.getLooping());
if (props.source instanceof FileSource || props.source instanceof TextToSpeechSource) props.source.setLooping(!props.source.getLooping());
}}
tooltip="Loop"
></OlStateButton>

View File

@ -21,6 +21,7 @@
"express-basic-auth": "^1.2.1",
"http-proxy-middleware": "^2.0.6",
"morgan": "~1.9.1",
"node-gtts": "^2.0.2",
"open": "^10.0.0",
"regedit": "^5.1.2",
"save": "^2.9.0",

View File

@ -27,6 +27,7 @@ module.exports = function (configLocation, viteProxy) {
"databases"
)
);
const textToSpeechRouter = require("./routes/api/texttospeech")();
/* Load the config and create the express app */
let config = {};
@ -75,10 +76,12 @@ module.exports = function (configLocation, viteProxy) {
app.use("/api/airbases", airbasesRouter);
app.use("/api/elevation", elevationRouter);
app.use("/api/databases", databasesRouter);
app.use("/api/texttospeech", textToSpeechRouter);
app.use("/resources", resourcesRouter);
app.use("/express/api/airbases", airbasesRouter);
app.use("/express/api/elevation", elevationRouter);
app.use("/express/api/databases", databasesRouter);
app.use("/express/api/texttospeech", textToSpeechRouter);
app.use("/express/resources", resourcesRouter);
/* Set default index */

View File

@ -0,0 +1,14 @@
import express = require('express');
import fs = require('fs');
var gtts = require('node-gtts')('en');
const router = express.Router();
module.exports = function () {
router.put( "/generate", ( req, res, next ) => {
res.set({'Content-Type': 'audio/mpeg'});
gtts.stream(req.body.text).pipe(res);
});
return router;
}