import {Caret} from 'app/fragment/caret';
import {HTMLNodeParser} from 'app/fragment/core/parser/html-node-parser';
import {HTMLTableParser} from 'app/fragment/core/parser/html-table-parser';
import {Lexer} from 'app/fragment/core/parser/lexer';
import {Token} from 'app/fragment/core/parser/token';
import {ClauseFragment, ClauseType, Fragment, FragmentType, SectionType} from 'app/fragment/types';
import {ClauseGroupFragment} from 'app/fragment/types/clause-group-fragment';

/**
 * An enumeration of the formats supported by FragmentParser; see
 * https://w3c.github.io/clipboard-apis/#reading-from-clipboard.
 */
export enum ClipboardFormat {
  TEXT_PLAIN = 'text/plain',
  TEXT_HTML = 'text/html',
}

/**
 * An abstract class representing a tokenising fragment parser.  This class delegates tokenising
 * to its deriving classes, which will depend on the format of the input source string.  It does
 * handle lexing and assembly of the fragment AST.
 *
 * Users should not instantiate either this class or its deriving classes.  Instead, they should
 * use the static method FragmentParser::parseType() to parse a string with a given format.
 */
export abstract class FragmentParser {
  protected sectionType: SectionType;

  protected lexer: Lexer = new Lexer();

  /**
   * Static helper to parse a source with a given type.  Throws if no parser matches the format.
   *
   * @param format {ParserType}   The MIME type
   * @param source {string}       The source to parse
   * @returns      {Fragment[]}   The fragment AST
   */
  public static parseType(
    format: ClipboardFormat,
    sectionType: SectionType,
    htmlSource: string,
    textSource: string,
    caret: Caret
  ): Fragment[] {
    let parsed: Fragment[];

    switch (format) {
      case ClipboardFormat.TEXT_PLAIN:
        {
          parsed = new PlainTextFragmentParser(sectionType)._parse(textSource, caret);
        }
        break;
      case ClipboardFormat.TEXT_HTML:
        {
          parsed = new HTMLFragmentParser(sectionType)._parse(htmlSource, caret);
        }
        break;

      default: {
        const names: string = Object.keys(ClipboardFormat)
          .map((name: string) => ClipboardFormat[name])
          .join(`', '`);
        throw new Error(`Cannot find a FragmentParser for '${format}'; known formats are '${names}'.`);
      }
    }

    return parsed;
  }

  // Protected constructor to prevent external instantiation
  protected constructor() {}

  /**
   * Parse the given string to a fragment AST.
   *
   * @param source {string}       The source string to parse
   * @returns      {Fragment[]}   The resulting fragment AST
   */
  protected _parse(source: string, caret: Caret): Fragment[] {
    source = !!source ? source : '';
    const tokens: Token[] = this._tokenise(source, caret);
    const fragments: Fragment[] = this._lex(tokens);

    return fragments;
  }

  /**
   * Tokenise a source string to an array of tokens, ready for lexical analysis.  Overriding
   * classes must implement this method; it is assumed that a linearised stream of tokens
   * will be returned, in the order that a depth-first traversal of the resulting fragment
   * AST will visit their corresponding tree nodes.
   *
   * @param source {string}    The string to tokenise
   * @returns      {Token[]}   The resulting tokens
   */
  protected abstract _tokenise(source: string, caret: Caret): Token[];

  /**
   * Perform lexical analysis on an array of tokens, converting them into the AST of
   * corresponding fragments.  This is via a recursive descent strategy.
   *
   * @param tokens {Token[]}      The tokens to lex
   * @returns      {Fragment[]}   The fragment AST
   */
  protected _lex(tokens: Token[]): Fragment[] {
    return this.lexer.lex(tokens);
  }

  /**
   * Determines default clause type for the section we are in
   * if the parser cannot find an appropriate type.
   *
   * @returns {string} Default clause type
   */
  protected _getDefaultClauseType(): string {
    return this._is(SectionType.INTRODUCTORY, SectionType.APPENDIX) ? 'NORMAL' : 'REQUIREMENT';
  }

  /**
   * Convenience method to check section type.
   *
   * @param sectionTypes {SectionType[]} Section types to check
   * @returns            {boolean}       True if one of the given types
   */
  protected _is(...sectionTypes: SectionType[]): boolean {
    return sectionTypes.length === 0 || sectionTypes.indexOf(this.sectionType) >= 0;
  }

  /**
   * Checking that the fragment isn't a child of it's parent catches both the case where the caret is in an equation source
   * and in a caption.
   */
  protected _isInEquationOrCaption(caret: Caret): boolean {
    return (
      caret.fragment.parent.is(FragmentType.TABLE, FragmentType.FIGURE, FragmentType.EQUATION) &&
      caret.fragment.parent.children.indexOf(caret.fragment) < 0
    );
  }

  /**
   * Checks if the caret is in a fragment with restricted inputs, either part of a clause group or a specifier instruction
   */
  protected _isInClauseGroupOrSpecifierInstruction(caret: Caret): boolean {
    const clause: ClauseFragment = caret.fragment?.findAncestorWithType(FragmentType.CLAUSE) as ClauseFragment;
    const clauseGroup: ClauseGroupFragment = clause?.findAncestorWithType(
      FragmentType.CLAUSE_GROUP
    ) as ClauseGroupFragment;

    return (!!clause && clause.isClauseOfType(ClauseType.SPECIFIER_INSTRUCTION)) || !!clauseGroup;
  }
}

/**
 * A concrete implementation of FragmentParser, for parsing fragments from a string of plain
 * text with no additional markup.  Such input is generated by copying from PDF or plain text.
 * This class is not exported, and should be accessed via FragmentType::parseType().
 */
class PlainTextFragmentParser extends FragmentParser {
  // Will match 'x', 'x.y', 'x.y.z', with or without a trailing fullstop; or 'note' or 'note x'.
  private static _CLAUSE_NORMATIVE_REGEXP: RegExp = /^\s*([0-9]+\.([0-9]+\.?){0,2}|NOTE\.?)\s*$/;

  private static _CLAUSE_APPENDIX_REGEXP: RegExp = /^\s*([A-Z]+\d+(\.\d+){0,2}\.?)\s*$/;
  // Will match Roman numerals '(i)', numbers '(1)', or Latin numerals '(a)', or '*', '·' or 'i'
  // for bullet points; see https://stackoverflow.com/a/267405 for the Roman numerals regexp.
  private static _LIST_ITEM_REGEXP: RegExp =
    /^\s*(\(((M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3}))|[0-9]+|[a-z]+)\)|(\*|·|•|-))\s*$/i; // eslint-disable-line

  public static tokenise(sectionType: SectionType, source: string, caret: Caret): Token[] {
    return new PlainTextFragmentParser(sectionType)._tokenise(source, caret);
  }

  constructor(sectionType: SectionType) {
    super();
    this.sectionType = sectionType;
  }

  /**
   * @inheritdoc
   */
  protected _tokenise(source: string, caret: Caret): Token[] {
    const isInEquationOrCaption: boolean = this._isInEquationOrCaption(caret);

    // Replace all non-breaking spaces with actual spaces, as non-breaking spaces break our validation rules
    source = source.replace(/\u00A0/gm, ' ');

    if (isInEquationOrCaption || this._isInClauseGroupOrSpecifierInstruction(caret)) {
      // Remove new lines if pasting into an equation or input.
      source = source.replace(/(\r|\r\n\t|\n|\r\t)/gm, '');

      // Remove non ascii characters if pasting into an equation.
      if (caret.fragment.parent.is(FragmentType.EQUATION)) {
        source = source.replace(/[^ -~£]/gm, '');
      }

      // Return the source as a TEXT token so we don't try to parse any clauses
      return [new Token(FragmentType.TEXT, this._getWords(source).join(' '))];
    }

    const isInTable: boolean = !!caret.fragment.findAncestorWithType(FragmentType.TABLE_CELL);
    const words: string[] = this._getWords(source);

    const tokens: Token[] = [];
    let startOfLine: boolean = true;

    if (isInTable) {
      const numberRegexp: RegExp = /^\d+$/;
      for (let i = 0; i < words.length; i++) {
        if (this._isNormativeOrAppendix(words[i]) && words[i - 1] && words[i - 1] === '\n') {
          const removed: string = words.splice(i, 1)[0];
          i--;
          if (removed === 'NOTE' && numberRegexp.test(words[i + 1])) {
            words.splice(i + 1, 1);
          }
        }
        if (words[i] === '\n' && words[i + 1] && words[i + 1] === '\n') {
          words.splice(i, 1);
          i--;
        }
      }
    }

    for (let i: number = 0; i < words.length; ++i) {
      const word: string = words[i];

      if (this._isNormativeOrAppendix(word) && startOfLine && !isInTable) {
        // Work out what kind of clause we're parsing
        let clauseType: string;

        if (this._isNormative(word)) {
          clauseType = 'REQUIREMENT';
          if (word.toUpperCase() === 'NOTE') {
            clauseType = 'NOTE';
            // If we're parsing a note, skip the following number, if it exists & we're in a Normative section
            if (/^\s*[0-9]+(\.)?\s*$/i.test(words[i + 1]) && this._is(SectionType.NORMATIVE)) {
              words.splice(i + 1, 1);
            }
          } else {
            // Get the numbers in the clause word; advice should have exactly three (x.y.z.)
            const numbers: string[] = word.split('.').filter((n: string) => !!n);
            if (numbers.length === 3) {
              clauseType = 'ADVICE';
            }
          }
        } else {
          clauseType = 'HEADING_1';
          const numbers: string[] = word.split('.').filter((n: string) => !!n);
          if (numbers.length === 2) {
            clauseType = 'HEADING_2';
          } else if (numbers.length === 3) {
            clauseType = 'HEADING_3';
          }
        }

        if (words[i + 1] === '\n') {
          // After a line break, if the next word is also a clause then insert a blank character
          // in order to create an empty text fragment
          if ((this._isNormativeOrAppendix(words[i + 2]) && !isInTable) || i + 1 === words.length - 1) {
            words.splice(i + 1, 0, '\u200B');
          } else {
            // Otherwise remove the unneeded line break at the start of the clause
            words.splice(i + 1, 1);
          }
          // Insert a blank character if this is the last word to avoid empty clause fragments
        } else if (i === words.length - 1) {
          words.splice(i + 1, 0, '\u200B');
        }

        if (!this._clauseTypeCanBeParsedInSection(clauseType)) {
          clauseType = this._getDefaultClauseType();
          --i; // Otherwise we ignore clause numbering
        }

        startOfLine = false;
        tokens.push(new Token(FragmentType.CLAUSE, clauseType));
      } else if (this._isListItem(word)) {
        const ordered: boolean = true;
        tokens.push(new Token(FragmentType.LIST_ITEM, ordered.toString()));
      } else if (word === '\n' && i !== words.length - 1) {
        const clauseType: string = this._getDefaultClauseType();
        tokens.push(new Token(FragmentType.CLAUSE, clauseType));
      } else {
        let end: number = i;
        while (
          end < words.length &&
          !(startOfLine && ((this._isNormativeOrAppendix(words[end]) && !isInTable) || this._isListItem(words[end])))
        ) {
          // If the next word is a newline, set the flag and remove the \n
          startOfLine = words[end + 1] === '\n';
          // If the next word is a newline and the line after that is a clause or list item then splice this from source array.
          const splice: boolean =
            startOfLine &&
            ((this._isNormativeOrAppendix(words[end + 2]) && !isInTable) || this._isListItem(words[end + 2]));
          words.splice(end + 1, splice ? 1 : 0);
          ++end;
          if (startOfLine && !splice) {
            break;
          }
        }

        // Slice out the words we want and join together as a sentence
        const value: string = words.slice(i, end).join(' ');
        tokens.push(new Token(FragmentType.TEXT, value));
        i = end - 1;
      }
    }
    return tokens;
  }

  private _getWords(source: string): string[] {
    // Replace (>= 1) newlines (\n or \r\n) with a newline padded by spaces, so we retain the
    // \n in our word stream; then split on any non-newline whitespace.
    const words: string[] = source
      .replace(/((\r?\n)+\ *)+/g, ' \n ')
      .split(/[ \t\r]+/)
      .filter((word: string) => !!word);

    // If the first character is a newline, skip it
    words.splice(0, words[0] === '\n' ? 1 : 0);

    return words;
  }

  /**
   * Convenience method to check if given word is a normative clause.
   *
   * @param word {string}  Word to check
   * @returns    {boolean} True if normative clause
   */
  private _isNormative(word: string): boolean {
    return PlainTextFragmentParser._CLAUSE_NORMATIVE_REGEXP.test(word);
  }

  /**
   * Convenience method to check if given word is an appendix clause.
   *
   * @param word {string}  Word to check
   * @returns    {boolean} True if appendix clause
   */
  private _isAppendix(word: string): boolean {
    return PlainTextFragmentParser._CLAUSE_APPENDIX_REGEXP.test(word);
  }

  /**
   * Convenience method to check if given word is a list.
   *
   * @param word {string}  Word to check
   * @returns    {boolean} True if list item
   */
  private _isListItem(word: string): boolean {
    return PlainTextFragmentParser._LIST_ITEM_REGEXP.test(word);
  }

  /**
   * Convenience method for
   *
   * @param word {string}  Word to check
   * @returns    {boolean} True if normative or appendix clause
   */
  private _isNormativeOrAppendix(word: string): boolean {
    return this._isNormative(word) || this._isAppendix(word);
  }

  /**
   * Determines whether the parsed clause type applies to the class' section.
   * Clause type is determined by RegExp on clause indexes.
   * We cannot determine a HEADING in a Normative section by plain text => it has come from an Appendix.
   *
   * @param clauseType {ClauseType} Clause type to check
   * @returns          {boolean}    True if applies
   */
  private _clauseTypeCanBeParsedInSection(clauseType: string): boolean {
    switch (clauseType) {
      case 'REQUIREMENT':
      case 'ADVICE':
      case 'NOTE': {
        return this._is(SectionType.NORMATIVE);
      }
      case 'HEADING_1':
      case 'HEADING_2':
      case 'HEADING_3': {
        return this._is(SectionType.APPENDIX);
      }
    }
  }
}

class HTMLFragmentParser extends FragmentParser {
  private static readonly interestingNodeNames: string[] = ['P', 'H2', 'H3', 'SPAN', 'TABLE'];

  private nodeParser: HTMLNodeParser = new HTMLNodeParser();
  private tableParser: HTMLTableParser = new HTMLTableParser();

  constructor(sectionType: SectionType) {
    super();
    this.sectionType = sectionType;
    this.nodeParser.sectionType = sectionType;
  }

  /**
   * @inheritdoc
   */
  protected _tokenise(source: string, caret: Caret): Token[] {
    const tokens: Token[] = [];
    const interestedNodes: HTMLElement[] = [];
    const sourceAsElement: HTMLElement = document.createElement('DIV');
    sourceAsElement.innerHTML = source;

    // Strip out all the extra garbage that word adds to the HTML, keeping only the nodes we care about
    for (let i = 0; i < sourceAsElement.childNodes.length; i++) {
      if (HTMLFragmentParser.interestingNodeNames.includes(sourceAsElement.childNodes[i].nodeName)) {
        interestedNodes.push(sourceAsElement.childNodes[i] as HTMLElement);
      } else if (sourceAsElement.childNodes[i].nodeName === 'DIV') {
        for (let j = 0; j < sourceAsElement.childNodes[i].childNodes.length; j++) {
          if (HTMLFragmentParser.interestingNodeNames.includes(sourceAsElement.childNodes[i].childNodes[j].nodeName)) {
            interestedNodes.push(sourceAsElement.childNodes[i].childNodes[j] as HTMLElement);
          }
        }
      }
    }

    // Check if the HMLTElements being pasted contain a table
    let textElements: boolean = true;
    interestedNodes.forEach((node: HTMLElement) => {
      if (node.nodeName === 'TABLE') {
        textElements = false;
      }
    });

    // If pasting into an equation or input and not pasting a table, combine all of the text and reduce to one node
    if ((this._isInClauseGroupOrSpecifierInstruction(caret) || this._isInEquationOrCaption(caret)) && textElements) {
      let innerText: string = '';
      interestedNodes.forEach((node: HTMLElement) => {
        innerText += ` ${node.innerText}`;
      });
      interestedNodes.splice(1, interestedNodes.length - 1);
      interestedNodes[0].innerText = innerText;
    }

    interestedNodes.forEach((node: HTMLElement) => {
      this._deleteCommentReferenceNodes(node);
      if ('TABLE' === node.nodeName) {
        tokens.push(...this.tableParser.parse(node));
      } else if (node.innerText.replace(/\s/g, '').length) {
        const parsed: Token[] = this.nodeParser.parse(node);
        if (parsed.length) {
          tokens.push(...parsed);
        } else {
          // Default to using the plain text parser.
          // Some Word documents are polluted with random new lines.
          const _source: string = node.innerText.replace(/\n|\u00A0+/g, ' ');
          const tokenised: Token[] = PlainTextFragmentParser.tokenise(this.sectionType, _source, caret);
          if (!!tokens.length && !tokenised.some((token: Token) => token.type === FragmentType.CLAUSE)) {
            tokenised.unshift(new Token(FragmentType.CLAUSE, this._getDefaultClauseType()));
          }
          tokens.push(...tokenised);
        }
      }
    });
    return tokens;
  }

  /**
   * This deletes all the descendants of the given node that have the classname MsoCommentReference, as we don't want to paste these.
   *
   * @param node {HTMLElement} The given node.
   */
  private _deleteCommentReferenceNodes(node: HTMLElement): void {
    if (node.childNodes.length > 0) {
      for (let i = node.childNodes.length - 1; i > -1; i--) {
        if ((node.childNodes.item(i) as HTMLElement).className === 'MsoCommentReference') {
          node.removeChild(node.childNodes.item(i));
        } else {
          this._deleteCommentReferenceNodes(node.childNodes.item(i) as HTMLElement);
        }
      }
    }
  }
}
