import _ from 'lodash'
import xregexp from 'xregexp'

import { AnnotationSetType, AnnotationType, SentenceType } from '../../types/types'
import { AnnotatedSegment } from './AnnotatedSegment'
import { isCantillation, isVowel, WordSplitCharRegEx } from './CharacterClasses'

class AnnotatedSentence {
  private xsentence: SentenceType

  private internalNormalizedText: string

  private cantillations: boolean

  private vowels: boolean

  private annotations: Array<AnnotationType>

  private segments: Array<AnnotatedSegment>

  private segmentBoundaries: Array<string | null>

  private segmentations: Array<number>

  private showEndOfSentenceCharacter: boolean

  private updating: boolean

  public style: CSSStyleDeclaration

  constructor(
    sentence: SentenceType,
    annotationSet: AnnotationSetType | null = null,
    segmentation = [] as Array<number>,
    showEndOfSentenceCharacter = false,
  ) {
    this.xsentence = sentence

    this.segmentations = segmentation ? _.map([...segmentation], (s) => +s) : []

    this.internalNormalizedText = ''
    this.showEndOfSentenceCharacter = showEndOfSentenceCharacter
    this.vowels = true
    this.cantillations = true
    this.updating = false
    this.annotations = new Array<AnnotationType>()
    if (!_.isEmpty(annotationSet)) {
      this.annotations = [...annotationSet.annotations]
    }

    this.segments = new Array<AnnotatedSegment>()
    this.segmentBoundaries = new Array<string>()
    this.style = {} as CSSStyleDeclaration
    this.generate()
  }

  sentence(): SentenceType {
    return this.xsentence
  }

  normalizedText(): string {
    return this.internalNormalizedText
  }

  isUpdating(): boolean {
    return this.updating
  }

  nSegments(): number {
    return this.segments.length
  }

  nWords(): number {
    let n = 0
    _.forEach(this.segments, (annotatedSegment: AnnotatedSegment) => {
      n += annotatedSegment.nWords()
    })
    return n
  }

  set(args: { vowels: boolean; cantillations: boolean }): void {
    this.vowels = args.vowels
    this.cantillations = args.cantillations
    this.generate()
  }

  setVowels(v: boolean): void {
    this.vowels = v
    this.generate()
  }

  setCantillations(v: boolean): void {
    this.cantillations = v
    this.generate()
  }

  setIsUpdating(u: boolean): void {
    this.updating = u
  }

  isEmpty(): boolean {
    return _.isEmpty(this.sentence)
  }

  // this is also implemented in models, for Sentence; maybe this should go into an API call to prevent duplicate code?
  transformedSentence(): {
    text: string
    annotations: Array<AnnotationType | null>
  } {
    let { text } = this.xsentence

    // this.$log.info(`AnnotatedSentence.transformedSentence, text pre-normalization (length ${text.length}) = ${text}`);

    // remove (.) at end of sentence
    text = text!.replace(/\s*\(.\)$/, '')

    // remove {.} at end of sentence
    text = text.replace(/\s*\{.\}$/, '')

    // remove : at end of sentence
    if (!this.showEndOfSentenceCharacter) {
      text = text.replace(/\s*[:׃]+\s*$/, '')
    }

    // remove , at end of sentence
    text = text.replace(/\s*,\s*$/, '')

    // attach the ׀ to the word that precedes it
    const spacedVbar = xregexp('\\s*\u05c0\\s*', 'gu')
    text = xregexp.replace(text, spacedVbar, '\u05c0 ')

    // map multiple occurences of split character onto one
    text = text.replace(/\s+/g, ' ')
    const dash1 = xregexp('\u05be+', 'gu') // -
    const dash2 = xregexp('\u05bf+', 'gu') // ־

    text = xregexp.replace(text, dash1, '\u05be')
    text = xregexp.replace(text, dash2, '\u05bf')
    text = text.trim()

    // replace 'foo [foobar]' with 'foobar'
    // for example Emor, Rishon, line 5.
    const bracketReplacement = xregexp('([^\u05be\u05bf\\s]+)\\s+\\[(\\S+)\\]', 'gu')
    text = xregexp.replace(text, bracketReplacement, '$2')

    this.internalNormalizedText = text

    // annotations is an array of nulls, except for those locations where an
    // annotation has been defined by the teacher
    let annotations = new Array<AnnotationType | null>()
    _.forEach(text, (__, index) => {
      const annotation = _.find(this.annotations, (a) => {
        return a.index === index
      })
      annotations.push(annotation ?? null)
    })

    // this.$log.info(`AnnotatedSentence.transformedSentence, text post-normalization (length ${text.length}) = ${text}`);
    // this.$log.info(`AnnotatedSentence.transformedSentence, annotations post-normalization (length ${annotations.length})`);
    // this.$log.info(annotations);

    // copy all non-cantilations; keep annotations in sync
    if (!this.cantillations) {
      let newT = ''
      const newA: Array<AnnotationType | null> = []
      _.forEach(text, (char, index) => {
        if (!isCantillation(char)) {
          newT += char
          newA.push(annotations[index])
        }
      })
      text = newT
      annotations = newA
    }

    // this.$log.info(`AnnotatedSentence.transformedSentence, text post-cantillations (length ${text.length}) = ${text}`);
    // this.$log.info(`AnnotatedSentence.transformedSentence, annotations post-cantillations (length ${annotations.length})`);
    // this.$log.info(annotations);

    // copy all non-vowels; keep annotations in sync
    // skip \u05BE and \u05BF
    if (!this.vowels) {
      let newT = ''
      const newA: Array<AnnotationType | null> = []
      _.forEach(text, (char, index) => {
        if (!isVowel(char)) {
          newT += char
          newA.push(annotations[index])
        }
      })
      text = newT
      annotations = newA

      // this.$log.info(`AnnotatedSentence.transformedSentence, text post-cantillations (length ${text.length}) = ${text}`);
      // this.$log.info(
      //   `AnnotatedSentence.transformedSentence, annotations post-cantillations (length ${annotations.length})`,
      // );
      // this.$log.info(annotations);
    }

    // this.$log.info(`AnnotatedSentence.transformedSentence. text.length ${text.length}`);
    // this.$log.info(`AnnotatedSentence.transformedSentence. annotations.length ${annotations.length}`);
    return { annotations, text }
  }

  generate(): void {
    const { annotations, text } = this.transformedSentence()
    this.segments.splice(0, this.segments.length)
    this.segmentBoundaries.splice(0, this.segmentBoundaries.length)
    // this.$log.info(`AnnotatedSentence.generate, text, length = ${text.length}:`);
    // this.$log.info(text);
    // this.$log.info(`AnnotatedSentence.generate, annotations (${annotations.length}):`);
    // this.$log.info(annotations);

    let chars = ''
    let boundaryChars = ''
    let segmentStartIndex = 0
    let segmentCounter = 0
    let wordCount = 0
    const characterAnnotations = new Array<AnnotationType | null>()
    for (let i = 0; i < text.length; i += 1) {
      // stumbled on a split character; if so, do we actually split it into
      // a word? if no segmentations have defined, the answer is always yes.
      // otherwise, we only split the word if the boundary number matches.
      let newSegment = false
      if (xregexp.match(text[i], WordSplitCharRegEx)) {
        boundaryChars += text[i]
      } else {
        if (boundaryChars) {
          wordCount += 1

          // no segmentations, then life is simple: every split character introduces a new segment
          if (_.isEmpty(this.segmentations)) {
            newSegment = true

            // if we are using segmentations, then only given segmentations introduce a new segment
          } else if (wordCount >= this.segmentations[segmentCounter]) {
            newSegment = true
            segmentCounter += 1
            wordCount = 0
          }
        }

        if (newSegment) {
          const annotationsForThisSegment = [...characterAnnotations]
          const annotatedSegment = new AnnotatedSegment(this, this.segments.length, segmentStartIndex, chars, annotationsForThisSegment)
          // this.$log.info(`AnnotatedSentence.generate, created segment ${chars} with annotations:`);
          // this.$log.info(annotationsForThisSegment);
          this.segments.push(annotatedSegment)
          this.segmentBoundaries.push(boundaryChars)
          chars = ''
          boundaryChars = ''
          segmentStartIndex = i
          characterAnnotations.splice(0, characterAnnotations.length)

          // not introducing a new segment, but we do still have
          // boundaryCharacters in the queue. so apparently those characters are
          // going to be part of the segment; copy them into the segment.
        } else if (boundaryChars) {
          for (let j = 0; j < boundaryChars.length; j += 1) {
            chars += boundaryChars[j]
            characterAnnotations.push(annotations[i - boundaryChars.length + j])
          }
          boundaryChars = ''
        }
        chars += text[i]
        characterAnnotations.push(annotations[i])
      }
    }

    // last segment
    this.segments.push(new AnnotatedSegment(this, this.segments.length, segmentStartIndex, chars, characterAnnotations))
    this.segmentBoundaries.push(null)
  }

  forEach(callback: (segment: AnnotatedSegment, segmentBoundary: string, index?: number) => void): void {
    _.forEach(this.segments, (annotatedSegment, segmentIndex) => {
      callback(annotatedSegment, this.segmentBoundaries[segmentIndex], segmentIndex)
    })
  }
}
export { AnnotatedSentence }
