//
// Inspired by https://github.com/Jam3/voice-activity-detection
//
import { VueLogger } from 'vue-logger-plugin'

import analyserFrequency from '@/util/analyser-frequency-average'
import EWMA from '@/util/EWMA'

import workletUrl from './VoiceActivityDetectorWorklet.js?url'

const ANALYSIS_SAMPLE_PERIOD = 50 // ms
const DEBUG = false

export interface VoiceActivityDetectorOptions {
  bufferLen: number
  smoothingTimeConstant: number
  minCaptureFreq: number // in Hz
  maxCaptureFreq: number // in Hz
  initialNoiseCaptureDuration: number // how long to sample background noise (in ms)
  skipBackgroundSampling: boolean
  noiseStartingLevel: number
  minNoiseLevel: number
  noiseEwmaHalfLife: number // EWMA half life in ms for noise detector
  soundEwmaHalfLife: number // EWMA half life in ms for sound detector, longer trend
  shortEwmaHalfLife: number // EWMA half life in ms for sound detector, short trend

  // if the shortEwma is dropping >= this percentage under soundEwma towards the
  // triggerLevel, then increase soundEwmaHalfLife
  slowSilenceDetection: boolean
  descentPercentageTrigger: number
  slowdownEwmaMultiplier: number

  noiseMultiplierToTrigger: number // by how much the EWMA needs to exceed the noise to cause a trigger to active
  onBaseLevel: null | ((baseLevel: number, percent: number) => void)
  onTriggerLevel: null | ((triggerLevel: number) => void)
  onVoiceStart: null | (() => void)
  onStartSilence: null | (() => void)
  onVoiceStop: null | (() => Promise<void>)
  onSoundUpdate: null | ((n: number) => void)
  disableAnalyzer: boolean
  flipResistance: number // don't double state transitions again within this time (in ms)
  logger: VueLogger | null
}

export class VoiceActivityDetector {
  private analyser: AnalyserNode

  private scriptProcessorNode!: AudioWorkletNode

  private captureTimeout: ReturnType<typeof setTimeout> | null

  private isNoiseCapturing: boolean = false

  private startNoiseCapture: Date | null = null

  private options: VoiceActivityDetectorOptions

  private prevVoiceActive: boolean

  private source: AudioNode

  private dest: AudioNode

  private voiceActive: boolean

  private shortEwma: EWMA

  private soundEwma: EWMA

  private inSoundEwmaSlowDown: boolean = false

  private noiseEwma: EWMA

  private paused: boolean

  private mostRecentFlip: number | null

  private interval: NodeJS.Timeout | null

  private static defaults: VoiceActivityDetectorOptions = {
    bufferLen: 4096,

    descentPercentageTrigger: 0.1,

    disableAnalyzer: false,

    flipResistance: 800,

    initialNoiseCaptureDuration: 5500, // ms

    logger: null,

    maxCaptureFreq: 800,

    // male: 60-180
    // female: 160-300
    minCaptureFreq: 50,
    // milliseconds
    minNoiseLevel: 0.15,

    noiseEwmaHalfLife: 1500,

    // factor
    noiseMultiplierToTrigger: 1.16,

    noiseStartingLevel: 0.5,

    // 1.2 is a TINY bit too slow, 1.18 still caused short words to not trigger, 1.1 was too aggressive
    onBaseLevel: null,

    onSoundUpdate: null,

    onStartSilence: null,

    onTriggerLevel: null,

    onVoiceStart: null,

    onVoiceStop: null,

    shortEwmaHalfLife: 50,

    skipBackgroundSampling: false,

    // 300 was a TINY bit too slow
    // slow silence detection ensures that we still rapidly detect voice, but we
    // are slow to decide that the voice stopped; we do this by increasing the
    // half life of the EWMA when we believe the voice has stopped. how do we
    // detect that voice has stopped? the trigger is when the shortEwa has
    // dropped 10% soundEwma on its way to the triggerValue. When that occurs,
    // we increase the halfLife of soundEwma by {slowdownEwmaMultplier}.
    // We also *undo* the slowdown, when the trend goes back up again.
    slowSilenceDetection: false,

    slowdownEwmaMultiplier: 2.0,
    smoothingTimeConstant: 0.2,
    soundEwmaHalfLife: 250,
  }

  constructor(audioContext: AudioContext, source: AudioNode, dest: AudioNode, opts?: VoiceActivityDetectorOptions) {
    this.options = { ...VoiceActivityDetector.defaults, ...opts }
    this.prevVoiceActive = false
    this.voiceActive = false
    this.captureTimeout = null
    this.source = source
    this.dest = dest
    this.paused = false
    this.shortEwma = new EWMA(VoiceActivityDetector.defaults.shortEwmaHalfLife)
    this.soundEwma = new EWMA(VoiceActivityDetector.defaults.soundEwmaHalfLife)
    this.noiseEwma = new EWMA(VoiceActivityDetector.defaults.noiseEwmaHalfLife)
    this.interval = null

    this.analyser = audioContext.createAnalyser()
    this.analyser.smoothingTimeConstant = this.options.smoothingTimeConstant

    this.mostRecentFlip = null

    this.options.logger && this.options.logger.debug('VoiceActivityDetector.constructor, this.options =')
    this.options.logger && this.options.logger.debug(this.options)

    audioContext.audioWorklet.addModule(workletUrl).then(() => {
      this.scriptProcessorNode = new AudioWorkletNode(audioContext, 'vad-processor')

      this.connect()
      this.unpause()
      this.noiseEwma = new EWMA(this.options.noiseEwmaHalfLife, this.options.noiseStartingLevel)

      if (!this.options.skipBackgroundSampling) {
        // start with background noise capturing
        this.options.logger && this.options.logger.debug('VoiceActivityDetector.constructor will noiseCapture')
        this.isNoiseCapturing = true
        this.startNoiseCapture = new Date()
        this.captureTimeout = setTimeout(() => {
          this.stopDetectBaseLevel()
        }, this.options.initialNoiseCaptureDuration)
      } else {
        this.options.logger && this.options.logger.debug('VoiceActivityDetector.constructor will NOT noiseCapture')
        this.isNoiseCapturing = false
        this.startNoiseCapture = null
        this.captureTimeout = null
        this.stopDetectBaseLevel()
      }
    })
  }

  private sampleAnalyser() {
    if (DEBUG) {
      this.options.logger && this.options.logger.debug('sampleAnalyser')
    }

    // update the average frequency
    const frequencies = new Uint8Array(this.analyser.frequencyBinCount)
    this.analyser.getByteFrequencyData(frequencies)
    const soundLevel = analyserFrequency(this.analyser, frequencies, this.options.minCaptureFreq, this.options.maxCaptureFreq)
    if (DEBUG) {
      this.options.logger && this.options.logger.debug(`soundLevel = ${soundLevel}`)
    }

    // if the VAD is paused, don't update
    if (this.paused) {
      if (DEBUG) {
        this.options.logger && this.options.logger.debug('returning from paused VAD')
      }
      return
    }

    // update the noise EWMA using the soundLevel
    if (this.isNoiseCapturing) {
      this.updateNoiseLevel(soundLevel)
      return
    }

    // after the noise capture stage, also update the sound EWMA
    this.soundEwma.insert(soundLevel)
    this.shortEwma.insert(soundLevel)
    if (this.options.onSoundUpdate) {
      this.options.onSoundUpdate(this.soundEwma.value())
    }

    // trigger voice activation
    const noiseValue = Math.max(this.noiseEwma.value(), this.options.minNoiseLevel)
    if (DEBUG) {
      this.options.logger && this.options.logger.debug(`noiseValue = ${noiseValue}`)
    }

    const triggerValue = noiseValue * this.options.noiseMultiplierToTrigger
    if (this.options.onTriggerLevel) {
      this.options.onTriggerLevel(triggerValue)
    }

    this.voiceActive = this.soundEwma.value() >= triggerValue
    if (DEBUG) {
      this.options.logger && this.options.logger.debug(`soundEwma.value = ${this.soundEwma.value()}, noiseValue = ${noiseValue}`)
      this.options.logger && this.options.logger.debug(`voiceActive = ${this.voiceActive}`)
    }

    // prevent rapid voice (de)activation oscillation
    let msSinceMostRecentFlip = this.options.flipResistance + 1
    if (this.mostRecentFlip) {
      msSinceMostRecentFlip = new Date().getTime() - this.mostRecentFlip
    }

    // flip state if it's been long enough since the last state transition
    if (this.prevVoiceActive !== this.voiceActive && msSinceMostRecentFlip > this.options.flipResistance) {
      // voice went active
      if (this.voiceActive) {
        if (this.options.onVoiceStart) {
          this.options.onVoiceStart()
        }
        // voice went inactive
      } else if (this.options.onVoiceStop) {
        this.paused = true
        this.options.onVoiceStop().then(() => {
          this.paused = false
        })
        // reset the voice EWMA to half the background level (to avoid it
        // jumping straight over again)
        this.soundEwma = new EWMA(this.options.soundEwmaHalfLife, this.noiseEwma.value() / 2)
      }
      this.mostRecentFlip = new Date().getTime()
      this.prevVoiceActive = this.voiceActive
    }

    if (this.options.slowSilenceDetection && this.voiceActive) {
      // there is apparently a short silence; slow down soundEwma to avoid
      // setting the turn-off trigger too quickly
      const soundEwmaTriggerDiff = this.soundEwma.value() - triggerValue
      const slowDownThreshold = this.soundEwma.value() - soundEwmaTriggerDiff * this.options.descentPercentageTrigger
      if (this.shortEwma.value() < slowDownThreshold) {
        if (!this.inSoundEwmaSlowDown) {
          this.soundEwma = new EWMA(this.options.soundEwmaHalfLife * this.options.slowdownEwmaMultiplier, this.soundEwma.value())
          if (this.options.onStartSilence) {
            this.options.onStartSilence()
          }
        }
        this.inSoundEwmaSlowDown = true
      } else if (this.inSoundEwmaSlowDown) {
        this.soundEwma = new EWMA(this.options.soundEwmaHalfLife, this.soundEwma.value())
        this.inSoundEwmaSlowDown = false
      }
    }

    // voice is inactive; update the noiseEwma
    if (!this.voiceActive) {
      this.updateNoiseLevel(soundLevel)
    }
  }

  public reset(): void {
    this.soundEwma = new EWMA(this.options.soundEwmaHalfLife, this.noiseEwma.value() / 2)
    this.shortEwma = new EWMA(this.options.shortEwmaHalfLife, this.noiseEwma.value() / 2)
  }

  public unpause(): void {
    // presumably, reactivating the VAD suggest we're newly interested in voice
    // detection; reset the EWMAs so we start from scratch
    this.reset()

    // just to be safe, first clear a possible running timer so we don't have parallel interval cycles
    this.pause()
    this.interval = setInterval(() => {
      this.sampleAnalyser()
    }, ANALYSIS_SAMPLE_PERIOD)
  }

  public pause(): void {
    if (this.interval) {
      clearInterval(this.interval)
    }
  }

  //
  public setSlowSilenceDetection(onoff: boolean): void {
    this.options.slowSilenceDetection = onoff
  }

  public setSlowdownEwmaMultiplier(multiplier: number): void {
    this.options.slowdownEwmaMultiplier = multiplier
  }

  public setSoundEwmaHalfLifeMultiplier(multiplier: number): void {
    this.soundEwma = new EWMA(this.options.soundEwmaHalfLife * multiplier, this.soundEwma.value())
  }

  // value is a number between 1..10
  public setSensitivity(value: number): void {
    const minval = 1
    const maxval = 100
    if (value < minval || value > maxval) {
      this.options.logger &&
        this.options.logger.debug(`VoiceActivityDetector.setSensitivity expects number between ${minval} and ${maxval}`)
      return
    }
    const valdiff = maxval - minval

    // uses sensitivity to set between these values
    const min = 1.0
    const max = 2.2
    const diff = (max - min) * 100
    this.options.noiseMultiplierToTrigger = max - (((value - 1) / valdiff) * diff) / 100
  }

  public connect(): void {
    this.source.connect(this.analyser)
    this.analyser.connect(this.scriptProcessorNode)
    this.scriptProcessorNode.connect(this.dest)
  }

  public disconnect(): void {
    this.scriptProcessorNode.disconnect()
    this.analyser.disconnect()
    this.source.disconnect()
  }

  public destroy(): void {
    if (this.captureTimeout) {
      clearTimeout(this.captureTimeout)
    }
    this.disconnect()
    // this.scriptProcessorNode.onaudioprocess = null;
  }

  private stopDetectBaseLevel(): void {
    this.isNoiseCapturing = false
    this.startNoiseCapture = null

    if (this.options.onBaseLevel) {
      this.options.onBaseLevel(this.noiseEwma.value(), 100)
    }

    // now switch the ewma to voice activity detection
    this.soundEwma = new EWMA(this.options.soundEwmaHalfLife, this.noiseEwma.value())
    this.shortEwma = new EWMA(this.options.shortEwmaHalfLife, this.noiseEwma.value())
    this.inSoundEwmaSlowDown = false
  }

  private updateNoiseLevel(average: number): void {
    this.noiseEwma.insert(average)
    let noiseValue = this.noiseEwma.value()
    if (this.noiseEwma.value() < this.options.minNoiseLevel) {
      noiseValue = this.options.minNoiseLevel
    }

    if (!this.isNoiseCapturing) {
      return
    }
    const now = new Date()
    if (!this.startNoiseCapture) {
      this.options.logger && this.options.logger.debug('unexpected null this.startNoiseCapture')
      return
    }
    const elapsedMs = now.getTime() - this.startNoiseCapture.getTime()
    const perc = Math.min((elapsedMs / this.options.initialNoiseCaptureDuration) * 100, 100)

    if (this.options.onBaseLevel) {
      this.options.onBaseLevel(noiseValue, perc)
    }
  }
}
