// src/videoTranscriptionProcessor/utils/textUtils.ts

import {
  TranscriptPunctElement,
  TranscriptTextElement,
  TranscriptElement,
} from '../../types.ts/transcript';

import { FILLER_WORDS } from '../constants/karaokeConstants';

export function getParts(transcriptEl: TranscriptElement): TranscriptElement[] {
  let part;
  const text = transcriptEl.value!;
  if (text.length === 0) {
    return [transcriptEl];
  }

  if (text.trim().length === 0) {
    part = transcriptEl;
    part.value = text[0];
    return [part];
  }

  if (text.trim().split(' ').length === 1 || transcriptEl.type !== 'text')
    return [transcriptEl];

  const parts = text.split(' ');
  const elementDuration = transcriptEl.end_ts! - transcriptEl.ts!;
  const partDuration = elementDuration / parts.filter((p) => !!p).length;
  const newElements = parts
    .flatMap((e, ind) => [
      e
        ? ({
            type: 'text',
            value: e,
            ts: transcriptEl.ts + ind * partDuration,
            end_ts: transcriptEl.ts! + (ind + 1) * partDuration,
          } as TranscriptTextElement)
        : null,
      e && ind < parts.length - 1
        ? ({
            type: 'punct',
            value: ' ',
          } as TranscriptPunctElement)
        : null,
    ])
    .filter((p) => p && p.value) as TranscriptElement[];
  if (transcriptEl.karaoke_break) {
    newElements[newElements.length - 1].karaoke_break = true;
  }
  if (transcriptEl.karaoke_break_start_ts_diff) {
    newElements.find((el) => el.type === 'text')!.karaoke_break_start_ts_diff =
      transcriptEl.karaoke_break_start_ts_diff;
  }
  if (transcriptEl.karaoke_break_end_ts_diff) {
    //@ts-ignore
    newElements.findLast(
      (el: TranscriptElement) => el.type === 'text',
    )!.karaoke_break_end_ts_diff = transcriptEl.karaoke_break_end_ts_diff;
  }
  return newElements;
}

export function removeFillerWords(text: string): string {
  const fillerRegex = new RegExp(
    `(?:\\b|^)(?:,\\s*)?(?:${FILLER_WORDS.join('|')})(?:,\\s*)?(?=\\b|$)`,
    'gi',
  );
  return text.replace(fillerRegex, ' ');
}

export function sanitizeText(rawText: string, config: any): string {
  let text = rawText;
  if (config.hideFillers) {
    text = capitalizeAfterPeriods(removeFillerWords(text).trim());
  }

  if (config.hideComma) {
    text = text.replaceAll(',', '');
  }
  if (config.hidePeriod) {
    text = text.replaceAll('.', '');
  }
  return text;
}

export function capitalizeAfterPeriods(text: string): string {
  let capitalizeNext = true;
  return text.replace(
    /([.!?])\s*(\w)/g,
    (match: string, punctuation: string, letter: string): string => {
      if (capitalizeNext) {
        capitalizeNext = false;
        return punctuation + ' ' + letter.toUpperCase();
      } else {
        return match;
      }
    },
  );
}
