import _ from 'lodash'
import { SentenceType } from 'types/types'
import { computed } from 'vue'
import { VueLogger } from 'vue-logger-plugin'

import { isSeparator } from '@/util/CharacterClasses'
import { SegmentationType } from '@/util/Segmentation'

enum PHRASERS {
  BMT,
  DEFAULT,
}

const useVersePhraser = (logger: VueLogger) => {
  const phrasers = computed(() => {
    return [
      { title: 'BMT', value: PHRASERS.BMT },
      { title: 'default', value: PHRASERS.DEFAULT },
    ]
  })

  const MAQAF = '\u05BE'

  // disjunctives
  const ETNACHTA = '\u0591'
  const SEGOL = '\u0592'
  const SHALSHELET = '\u0593'
  const ZAQEF_QATAN = '\u0594'
  const ZAQEF_GADOL = '\u0595'
  const TIPCHA = '\u0596'
  const REVIA = '\u0597'
  const ZARQA = '\u0598'
  const PASHTA = '\u0599'
  const YETIV = '\u059A'
  const TEVIR = '\u059B'
  const GERESH = '\u059C'
  const GERSHAYIM = '\u059E'

  const PAZER = '\u05A1'
  const PAZER_GADOL = '\u059F'
  // const KARNEI_FARAH = PAZER_GADOL // same thing
  const TELISHA_GEDOLA = '\u05A0'
  const MUNACH_LEGARME = '\u05C0'

  // conjunctives
  // const GERESH_MUQDAM = '\u059D'

  const SPLITTERS: Record<PHRASERS, Array<Array<string>>> = {
    [PHRASERS.BMT]: [
      [ETNACHTA],
      [ZAQEF_GADOL, ZAQEF_QATAN, SEGOL, REVIA, GERESH, SHALSHELET],
      [TEVIR, PASHTA, YETIV, ZARQA],
      [PAZER, PAZER_GADOL, TELISHA_GEDOLA, GERSHAYIM, MUNACH_LEGARME],
    ],

    [PHRASERS.DEFAULT]: [
      [ETNACHTA], // level 1 disjunctives
      [TIPCHA, ZAQEF_GADOL, ZAQEF_QATAN, SEGOL, SHALSHELET], // L2 disjunctives
      [TEVIR, REVIA, PASHTA, YETIV, ZARQA], // L3 disjunctives
      [PAZER, PAZER_GADOL, TELISHA_GEDOLA, GERESH, GERSHAYIM, MUNACH_LEGARME],
    ],
  }

  // enum LEVELS {
  //   LEVEL_1,
  //   LEVEL_2,
  //   LEVEL_3,
  //   LEVEL_4,
  //   LEVEL_5,
  // }

  const splitOnTrope = (text: string, splitters: Array<string>): Array<string> => {
    const parts = []
    let lastIndex = 0

    for (let i = 0; i < text.length; i++) {
      if (
        _.find(splitters, (splitChar: string) => {
          return splitChar === text[i]
        })
      ) {
        let breakIndex = i + 1
        while (breakIndex < text.length && !isSeparator(text[breakIndex])) {
          breakIndex++
        }
        parts.push(text.substring(lastIndex, breakIndex))
        lastIndex = breakIndex + 1
      }
    }

    if (lastIndex < text.length) {
      parts.push(text.substring(lastIndex)) // Add the remaining part
    }

    return parts
  }

  const wordCount = (text: string) => {
    let count = 0
    let inWord = false

    for (let i = 0; i < text.length; i++) {
      if (!isSeparator(text[i]) || text[i] === MAQAF) {
        if (!inWord) {
          inWord = true // Start of a new word
        }
      } else if (inWord) {
        count++ // End of a word
        inWord = false
      }
    }

    if (inWord) {
      count++ // Count the last word if it doesn't end with a separator
    }

    return count
  }

  function parsePhrase(text: string, method: PHRASERS, level: number, maxLevel: number): Array<string> {
    logger.debug(`parsePhrase @ ${level}, text = ${text}`)

    // max recursion reached
    if (level > SPLITTERS[method].length || level > maxLevel - 1) {
      logger.debug(`parsePhrase @ ${level}, returns early with [${text}]`)
      return [text] // Base case: no further splitting, return text as is
    }

    // at level 1, we only divide if there are more than 4 words; if not, we jump straight to level 2
    // if (level === LEVELS.LEVEL_1) {
    //   // "If the whole verse is 4 words or fewer, then there is never an ETNACHTA; the whole verse is the SOF PASUK segment."
    //   // Thomer says: we treat the whole text as one that needs to be split at level 2
    //   if (wordCount(text) <= 4) {
    //     return parsePhrase(text, LEVELS.LEVEL_2)
    //   }
    // } else if (level === LEVELS.LEVEL_2) {
    //   // Rule 1: Any time we see a segment of three or more words, we have to divide further.
    //   // Thomer says: therefore, fewer than three words: we're done
    //   if (wordCount(text) <= 2) {
    //     return [text]
    //   }
    // }

    logger.debug(`parsePhrase @ ${level}, SPLITTERS[${method}][${level}] = ${SPLITTERS[method][level]}`)
    const segments = splitOnTrope(text, SPLITTERS[method][level])

    logger.debug(`parsePhrase @ ${level}, segments = ${segments}`)
    const r = segments.flatMap((segment) => parsePhrase(segment, method, level + 1, maxLevel))
    logger.debug(`parsePhrase @ ${level}, returns ${r}`)
    return r
  }

  const sentenceToSegmentation = (sentence: SentenceType, method: PHRASERS = PHRASERS.BMT, maxLevel: number = 2): SegmentationType => {
    // there is no etnachta in a sentence of 4 words or fewer; if that's the
    // case, start splitting at the L1 disjunctives. In most cases we can start
    // at L0.
    if (!sentence.text) {
      return []
    }
    const startingSplitLevel = wordCount(sentence.text) <= 4 ? 1 : 0
    const segments = parsePhrase(sentence.text, method, startingSplitLevel, maxLevel)

    // turn segments into BMT segmentations for color rendering
    //
    // annotated-text  will split on maqaf, but for the purpose of phrasing,
    // it's considered one word, so we need to count the number of maqafs in a
    // phrase and add that number to trick annotated-text
    const segmentation: SegmentationType = []
    _.forEach(segments, (segment: string) => {
      logger.debug(`count words in segment ${segment}`)
      const wc = wordCount(segment)
      const maqafCount = [...segment].filter((char) => char === MAQAF).length
      logger.debug(`wc = ${wc}, maqafCount = ${maqafCount}`)
      segmentation.push(wc + maqafCount)
    })
    logger.debug('segmentation =')
    logger.debug(segmentation)

    return segmentation
  }

  return { phrasers, sentenceToSegmentation }
}

export { PHRASERS, useVersePhraser }
