// KaraokeSegmenter.ts
import {
  TranscriptElement,
  TranscriptTextElement,
  TranscriptPunctElement,
} from '../../types.ts/transcript';

import KaraokeWord from './KaraokeWord';

import {
  NATURAL_BEAT_DURATION_MS,
  PAUSE_DURATION_MS,
  MIN_SEGMENT_LENGTH,
  MAX_SEGMENT_LENGTH,
  PUNCT_ELEMENT_DURATION_MS,
  MINIMUM_WORD_OFFSET,
} from '../constants/karaokeConstants';

export class KaraokeSegment {
  constructor(
    public words: KaraokeWord[],

    // Segment global time stamps.
    public startTs: number,
    public endTs: number,
  ) {}
}

export default class KaraokeSegmenter {
  private elements: TranscriptElement[];
  private clipStartTs: number;
  private clipEndTs: number;
  private onlyOnTranscriptBreaks: boolean;

  constructor(
    elements: TranscriptElement[],
    startTs: number,
    endTs: number,
    onlyOnTranscriptBreaks: boolean = false,
  ) {
    this.elements = elements;
    this.clipStartTs = startTs;
    this.clipEndTs = endTs;
    this.onlyOnTranscriptBreaks = onlyOnTranscriptBreaks;
  }

  public splitTextIntoSegments(): KaraokeSegment[] {
    const segments: KaraokeSegment[] = [];
    let currentSegment: KaraokeSegment | null = null;

    for (let idx = 0; idx < this.elements.length; idx++) {
      const element = this.elements[idx];

      if (!element.value) {
        continue;
      }

      // Skip elements that are 'removed' or 'cut'
      if (['removed', 'cut'].includes(element.state ?? '')) {
        continue;
      }

      // Even if the element is muted, we include it for timing purposes
      if (element.type === 'text') {
        const textElement = element as TranscriptTextElement;

        // Skip elements beyond clipEndTs
        if (textElement.ts > this.clipEndTs) {
          break;
        }

        if (!currentSegment) {
          currentSegment = new KaraokeSegment(
            [],
            Math.max(textElement.ts, this.clipStartTs),
            textElement.end_ts ?? textElement.ts,
          );
        }

        // Adjust word-level timing.
        const wordStartTs = textElement.ts - currentSegment.startTs;
        const wordEndTs =
          (textElement.end_ts !== null
            ? Math.min(textElement.end_ts, this.clipEndTs)
            : textElement.ts) - currentSegment.startTs;

        const word = new KaraokeWord(
          element,
          wordStartTs,
          wordEndTs,
          element.current_index || idx, // Should never fallback to idx, but just in case.
        );
        currentSegment.words.push(word);

        // Adjust segment timing.
        currentSegment.endTs = Math.min(
          textElement.end_ts ?? textElement.ts,
          this.clipEndTs,
        );
      } else if (element.type === 'punct' && currentSegment) {
        this.appendPunctuationToLastWord(currentSegment, element);
      }

      if (currentSegment) {
        // Account for timeline edits if we encounter any attached to element.
        if (element.karaoke_break_start_ts_diff) {
          currentSegment.startTs += element.karaoke_break_start_ts_diff;
        }

        if (element.karaoke_break_end_ts_diff) {
          currentSegment.endTs += element.karaoke_break_end_ts_diff;
        }

        if (currentSegment && this.shouldEndSegment(currentSegment, idx)) {
          this.addSegmentIfValid(segments, currentSegment);
          currentSegment = null;
        }
      }
    }

    if (currentSegment) {
      this.addSegmentIfValid(segments, currentSegment);
    }

    return segments;
  }

  private appendPunctuationToLastWord(
    segment: KaraokeSegment,
    element: TranscriptPunctElement,
  ): void {
    if (segment.words.length > 0) {
      const lastWord = segment.words[segment.words.length - 1];
      lastWord.appendPunctuation(element);

      if (segment.endTs !== null) {
        segment.endTs += PUNCT_ELEMENT_DURATION_MS;
        // Ensure endTs does not exceed clipEndTs
        segment.endTs = Math.min(segment.endTs, this.clipEndTs);
      }
    }
  }

  private addSegmentIfValid(
    segments: KaraokeSegment[],
    segment: KaraokeSegment,
  ): void {
    if (segment.words.length > 0 && segment.startTs < this.clipEndTs) {
      // Adjust endTs if necessary
      segment.endTs = Math.min(segment.endTs, this.clipEndTs);

      if (segment.endTs > segment.startTs) {
        segments.push(segment);
      }
    }
  }

  private shouldEndSegment(segment: KaraokeSegment, idx: number): boolean {
    return this.onlyOnTranscriptBreaks
      ? this.isKaraokeBreak(idx)
      : this.isMaxLengthReached(segment) ||
          this.isSignificantPunctuation(idx) ||
          this.isPauseDetected(idx) ||
          this.isTimeGapExceeded(idx);
  }

  private isMaxLengthReached(segment: KaraokeSegment): boolean {
    return segment.words.length >= MAX_SEGMENT_LENGTH;
  }

  private isSignificantPunctuation(idx: number): boolean {
    const element = this.elements[idx];
    if (element.type !== 'punct') {
      return false;
    }
    const value = element.value ?? '';
    return ['.', '!', '?'].some((punc) => value.endsWith(punc));
  }

  private isKaraokeBreak(idx: number): boolean {
    const element = this.elements[idx];
    return element.karaoke_break === true;
  }

  private isPauseDetected(idx: number): boolean {
    const currentElement = this.elements[idx];
    if (currentElement.type !== 'text') {
      return false;
    }
    const nextElement = this.findNextTextElement(idx);
    if (!nextElement) {
      return false;
    }

    // Skip if next element is beyond clipEndTs
    if (nextElement.ts > this.clipEndTs) {
      return true;
    }

    const currentEndTs =
      (currentElement as TranscriptTextElement).end_ts ??
      (currentElement as TranscriptTextElement).ts;

    return nextElement.ts - currentEndTs > PAUSE_DURATION_MS;
  }

  private isTimeGapExceeded(idx: number): boolean {
    const currentElement = this.elements[idx];
    if (currentElement.type !== 'text') {
      return false;
    }
    const nextElement = this.findNextTextElement(idx);
    if (!nextElement) {
      return false;
    }

    // Skip if next element is beyond clipEndTs
    if (nextElement.ts > this.clipEndTs) {
      return true;
    }

    const currentEndTs =
      (currentElement as TranscriptTextElement).end_ts ??
      (currentElement as TranscriptTextElement).ts;

    return nextElement.ts - currentEndTs > NATURAL_BEAT_DURATION_MS;
  }

  private findNextTextElement(idx: number): TranscriptTextElement | null {
    for (let i = idx + 1; i < this.elements.length; i++) {
      const el = this.elements[i];
      if (el.type === 'text') {
        return el as TranscriptTextElement;
      }
    }
    return null;
  }
}
