import { Mic } from '@solaborate/calls/webrtc';
import { getAzureSpeechToken } from 'api/speechToText.js';
import useConferenceConfigurations from 'calls/hooks/useConferenceConfigurations.js';
import Avatar from 'calls/components/Avatar.jsx';
import Modal from 'calls/components/Modal.jsx';
import translate from 'i18n-translations/translate.jsx';
import { buildProfilePic } from 'infrastructure/helpers/thumbnailHelper.js';
import { SpeechRecognizer, SpeechConfig, AudioConfig } from 'microsoft-cognitiveservices-speech-sdk';
import { forwardRef, useRef, useEffect } from 'react';
import { isMobile } from 'react-device-detect';
import styled, { css } from 'styled-components';
import { v4 } from 'uuid';
import LocalParticipant from 'calls/LocalParticipant.js';
import { useIntl } from 'react-intl';
import ControlsActions from 'calls/enums/ControlsActions.js';
import DarkTheme from 'calls/styles/DarkTheme.js';
import { APP_CONFIG } from 'constants/global-variables.js';

const StyledSpeechToText = styled.div`
	display: flex;
	flex-direction: column;
	height: 100%;
	padding: 10px;
	margin-left: auto;
	margin-right: auto;
	border-radius: 6px;
	overflow: auto;

	& > * + * {
		margin-top: 24px;
	}

	p {
		font-size: 16px;
		padding: 0;
		margin: 0 0 0 16px;
		${props =>
			props.$isDarkMode &&
			css`
				color: ${DarkTheme.colors.grayFive};
			`}
	}

	${props =>
		props.$isMobile &&
		css`
			@media (max-width: 480px) {
				font-size: 18px;
				max-width: 300px;
			}
		`}
`;

const StyledMessages = styled.div`
	display: flex;
	align-items: center;
	padding: 4px;
	${props =>
		props.$isDarkMode &&
		css`
			color: ${DarkTheme.colors.grayFive};
		`}

	.avatar {
		display: flex;
		align-self: start;
	}

	.participant-details {
		p {
			font-size: 14px !important;
			line-height: 16px !important;
			${props =>
				props.$isDarkMode &&
				css`
					color: ${DarkTheme.colors.grayFive};
					opacity: 0.75;
				`}
		}
	}

	.participant-message {
		font-size: 16px;
		font-weight: 500;
	}

	.content {
		display: flex;
		flex-direction: column;
		width: 100%;

		div {
			display: flex;
			align-items: center;
			justify-content: space-between;
			flex-direction: row;
		}
	}
`;

const StyledEmptyState = styled.div`
	font-weight: 500;
	text-align: center;
	margin-top: 32px;

	${props =>
		props.$isDarkMode &&
		css`
			color: ${DarkTheme.colors.grayFive};
		`}
`;

const ForwardedSpeechToText = forwardRef(
	(
		/**
		 * @type {{isMobile: boolean; isDarkMode: boolean} & React.DetailedHTMLProps<React.HTMLAttributes<HTMLDivElement>, HTMLDivElement>}
		 */
		{ children, isMobile, isDarkMode, ...rest },
		ref
	) => {
		return (
			<StyledSpeechToText $isMobile={isMobile} $isDarkMode={isDarkMode} ref={ref} {...rest}>
				{children}
			</StyledSpeechToText>
		);
	}
);

/**
 * @typedef {object} Message
 * @property {string} id
 * @property {string} participantName
 * @property {string} participantId
 * @property {string} participantPicture
 * @property {number} timestamp
 * @property {string} message
 * @param {object} props
 * @param {Message} props.message
 * @param {boolean} props.isGridView
 * @param {boolean} props.isDarkMode
 * @description Contains a single message that is recognized, or being recognized by the speech SDK
 */
const Message = ({ message, isGridView, isDarkMode }) => (
	<StyledMessages key={message.id} $isDarkMode={isDarkMode}>
		<div className='avatar'>
			<Avatar
				size={isGridView ? 60 : 48}
				src={message.participantPicture?.includes('user') ? null : buildProfilePic(message.participantPicture)}
				name={message.participantName}
			/>
		</div>
		<div className='content'>
			<div className='participant-details'>
				<p>{message.participantName}</p>
				<p>{new Date(message.timestamp).toLocaleTimeString()}</p>
			</div>
			<p className='participant-message'>{message.message}</p>
		</div>
	</StyledMessages>
);

/**
 * @param {object} props
 * @param {import('calls/LocalParticipant.js').default | import('calls/RemoteParticipant.js').default} props.participant
 * @description This component is used to create a socket connection to the microsoft speech SDK for each participant in the call, but renders nothing
 */
const Connection = ({ participant }) => {
	const conferenceConfigs = useConferenceConfigurations();
	/** @type {import('react').MutableRefObject<SpeechRecognizer>} */
	const speechRecognizer = useRef(null);
	const speechConfig = useRef(null);
	const track =
		(participant instanceof LocalParticipant
			? participant.localTrackController.tracks[Mic].track
			: participant.remoteTrackController.tracks[Mic].track) ?? null;

	useEffect(() => {
		const fetchToken = async () => {
			const response = await getAzureSpeechToken();
			const tokenResponse = await getTokenOrRefresh(response);
			speechConfig.current = SpeechConfig.fromAuthorizationToken(tokenResponse.authToken, APP_CONFIG.speechToTextRegion);
			speechConfig.current.speechRecognitionLanguage = 'en-US';
			const stream = new MediaStream([track]);
			let currentMessage = null;
			const audioConfig = AudioConfig.fromStreamInput(stream);
			speechRecognizer.current = new SpeechRecognizer(speechConfig.current, audioConfig);
			speechRecognizer.current.recognizing = (_sender, event) => {
				if (!currentMessage) {
					currentMessage = {
						id: v4(),
						participantId: participant.id,
						participantName: participant.name,
						participantPicture: participant.picture || '',
						timestamp: new Date().getTime(),
						message: event.result.text,
					};
					if (event.result.text) {
						conferenceConfigs.pushMessage(currentMessage);
					}
				} else {
					currentMessage.id = v4();
					currentMessage.message = event.result.text;
					conferenceConfigs.pushMessage(null);
				}
			};

			speechRecognizer.current.recognized = (_sender, event) => {
				if (currentMessage && event.result.text) {
					currentMessage.id = v4();
					currentMessage.message = event.result.text;
					conferenceConfigs.pushMessage(null);
					currentMessage = null;
				}
			};

			speechRecognizer.current.canceled = (_sender, event) => {
				conferenceConfigs.setSpeechToTextMessages([]);
				conferenceConfigs.onConfigurationToggleAction(ControlsActions.TOGGLE_LIVE_CAPTIONS);
				conferenceConfigs.setConferenceErrorMessages([{ id: v4(), message: translate('failedToGetCaptions') }]);
				console.error({
					error: {
						code: event.errorCode,
						details: event.errorDetails,
						reason: event.reason,
					},
				});
			};

			speechRecognizer.current.startContinuousRecognitionAsync();
		};

		fetchToken();

		return () => {
			speechRecognizer.current?.stopContinuousRecognitionAsync();
		};
	}, [track]);

	return <></>;
};

const getTokenOrRefresh = async tokenResponse => {
	try {
		const token = tokenResponse.data;
		const region = APP_CONFIG.speechToTextRegion;
		return { authToken: token, region: region };
	} catch (err) {
		return { authToken: null, error: err.response.data };
	}
};

const SpeechToTextView = () => {
	const intl = useIntl();
	const conferenceConfigs = useConferenceConfigurations();
	/** @type {import('react').MutableRefObject<HTMLDivElement>} */
	const speechContainerRef = useRef(null);

	useEffect(() => {
		if (speechContainerRef.current) {
			speechContainerRef.current.scrollIntoView({ behavior: 'smooth', block: 'end' });
		}
	}, [conferenceConfigs.speechToTextMessages]);

	return (
		<Modal
			position='right'
			onDismiss={() => {
				conferenceConfigs.onConfigurationToggleAction(ControlsActions.TOGGLE_LIVE_CAPTIONS);
				conferenceConfigs.setSpeechToTextMessages([]);
			}}
			title={intl.formatMessage({ id: 'liveCaptions' })}
			zIndex={1}>
			<Modal.Content>
				<ForwardedSpeechToText ref={speechContainerRef} isMobile={isMobile} isDarkMode={conferenceConfigs.isDarkMode}>
					{conferenceConfigs.speechToTextMessages.length === 0 && (
						<StyledEmptyState $isDarkMode={conferenceConfigs.isDarkMode}>
							{translate('liveCaptionsEmptyMessage')}
						</StyledEmptyState>
					)}
					{conferenceConfigs.speechToTextMessages.map(message => (
						<Message
							key={message.id}
							message={message}
							isGridView={conferenceConfigs.isGridView}
							isDarkMode={conferenceConfigs.isDarkMode}
						/>
					))}
					{conferenceConfigs.speechToTextParticipants.map(participant => {
						const localOrRemoteParticipant =
							participant instanceof LocalParticipant ? participant.localTrackController : participant.remoteTrackController;

						return <>{localOrRemoteParticipant.tracks[Mic] && <Connection participant={participant} />}</>;
					})}
				</ForwardedSpeechToText>
			</Modal.Content>
		</Modal>
	);
};

export default SpeechToTextView;
