import {HTMLFragmentMapper} from 'app/fragment/core/parser/html-fragment-mapper';
import {Token, TokenType} from 'app/fragment/core/parser/token';
import {FragmentType, SectionType} from 'app/fragment/types';

export class HTMLNodeParser {
  // Matches the clause indexes as printed in word to strip them before creating the correct fragments:
  // x.x, x.x.x, Xx, Xx.x, Xx.x.x, NOTE X, etc
  protected static _CLAUSE_NORMATIVE_REGEXP: RegExp = /^\s*([0-9]+\.([0-9]+\.?){0,2}|NOTE(\s)?([0-9])?\.?|(·|•)?)\s*/;

  protected static _CLAUSE_APPENDIX_REGEXP: RegExp = /^\s*([A-Z]+\d+(\.\d+){0,2}\.?)\s*/;

  // Matches list indexes as printed in word
  // (a) a) a. (1) 1) 1. (iv) iv) iv.
  protected static _LIST_INDEX_REGEXP: RegExp =
    /^\s*\(?((M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3}))|[0-9]+|([a-z])\6{0,})(\.|·|\))\s*/i; // eslint-disable-line

  protected static readonly CAPTION_CLASS: string = 'SupportingText-bold';
  protected static readonly MEMO_CLASS: string = 'Memo';
  protected static readonly LIST_STYLE: string = 'mso-list';

  // RegExp used to clean text input
  protected cleanText: RegExp = /\s+/g;

  public sectionType: SectionType;

  public parse(node: HTMLElement): Token[] {
    const tokens: Token[] = [];

    if (node.innerText.replace(/\s/g, '').length) {
      const keys: string[] = [node.className, node.nodeName];
      if (HTMLFragmentMapper.hasMapping(this.sectionType, ...keys)) {
        const token: Token = HTMLFragmentMapper.getMapping(this.sectionType, ...keys);
        tokens.push(token);
        tokens.push(this.parseTextContent(node, tokens));
      } else if (this.isList(node) && !HTMLFragmentMapper.hasAnyMapping(...keys)) {
        // Needs to happen after checking for clauses, since clauses are also lists in word.
        tokens.push(...this.parseList(node));
      } else if (this.isCaption(node)) {
        const token: Token = this.parseTextContent(node, tokens);
        token.type = TokenType.CAPTION;
        tokens.push(token);
      }
      // Other cases are handled by the FragmentParser at the layer above.
    }

    return tokens;
  }

  protected isList(el: HTMLElement): boolean {
    return el && el.getAttribute('style') && el.getAttribute('style').includes(HTMLNodeParser.LIST_STYLE);
  }

  protected isCaption(el: HTMLElement): boolean {
    return el && el.className === HTMLNodeParser.CAPTION_CLASS;
  }

  protected parseList(el: HTMLElement): Token[] {
    const tokens: Token[] = [];
    const token: Token = new Token(FragmentType.LIST_ITEM, 'true');
    token.indented = this._findIndentationLevel(el.getAttribute('style')) > 1;
    tokens.push(token);
    tokens.push(this.parseTextContent(el, tokens));
    return tokens;
  }

  protected parseTextContent(el: HTMLElement, tokens: Token[]): Token {
    const type: FragmentType = el.className === HTMLNodeParser.MEMO_CLASS ? FragmentType.MEMO : FragmentType.TEXT;
    const value: string = this._cleanseNodeText(el.innerText, tokens);
    return new Token(type, value);
  }

  /**
   * Strips clause index, list indexes and collapses whitespace.
   *
   * @param text   {string}  Text to cleanse
   * @param tokens {Token[]} List of tokens previously generated
   * @returns      {string}  Text stripped of relevant clause indexes and white space
   */
  protected _cleanseNodeText(text: string, tokens: Token[]): string {
    text =
      this.sectionType === SectionType.NORMATIVE ? text.replace(HTMLNodeParser._CLAUSE_NORMATIVE_REGEXP, '') : text;
    text = this.sectionType === SectionType.APPENDIX ? text.replace(HTMLNodeParser._CLAUSE_APPENDIX_REGEXP, '') : text;
    text =
      tokens[tokens.length - 1] && tokens[tokens.length - 1].type === FragmentType.LIST_ITEM
        ? text.replace(HTMLNodeParser._LIST_INDEX_REGEXP, '')
        : text;
    return text.trim().replace(this.cleanText, ' ');
  }

  /**
   * Here we get the indentation level of the list from the given node style.
   *
   * @param source The node style attribute in string form.
   */
  protected _findIndentationLevel(source: string): number {
    let indent: number = null;
    // We can't just immediately split but ' ' as there are multiple mso style properies that use leveln as an argument.
    source.split(';').forEach((property: string) => {
      if (property.search(HTMLNodeParser.LIST_STYLE) === 0) {
        property.split(' ').forEach((arg: string) => {
          if (arg.search('level') === 0) {
            indent = Number(arg.slice(5));
          }
        });
      }
    });
    return indent;
  }
}
