/*
 * ===============================================================================
 *
 * DISTRIBUTION STATEMENT C. Distribution authorized to U.S. Government Agencies
 * and their contractors; 2022. Other request for this document shall be referred
 * to AF 517 TRG.
 *
 * WARNING: This document may contain technical data whose export is restricted by
 * the Arms Export Control Act (AECA) or the Export Administration Act
 * (EAA). Transfer of this data by any means to a non-US person who is not eligible
 * to obtain export-controlled data is prohibited. By accepting this data, the
 * consignee agrees to honor the requirements of the AECA and EAA. DESTRUCTION
 * NOTICE: For unclassified, limited distribution documents, destroy by any method
 * that will prevent disclosure of the contents or reconstruction of the document.
 *
 * This material is based upon work supported under Air Force Contract
 * No. FA8721-05-C-0002 and/or FA8702-15-D-0001. Any opinions, findings,
 * conclusions or recommendations expressed in this material are those of the
 * author(s) and do not necessarily reflect the views of the U.S. Air Force.
 *
 * © 2023 Massachusetts Institute of Technology.
 *
 * The software/firmware is provided to you on an As-Is basis
 *
 * Delivered to the US Government with Unlimited Rights, as defined in DFARS Part
 * 252.227-7013 or 7014 (Feb 2014). Notwithstanding any copyright notice,
 * U.S. Government rights in this work are defined by DFARS 252.227-7013 or DFARS
 * 252.227-7014 as detailed above. Use of this work other than as specifically
 * authorized by the U.S. Government may violate any copyrights that exist in this
 * work.
 * ===============================================================================
 */
/**
 * A file for functions related to text handling. For example: tilesToString - turning ALEF tiles into a string.
 * @module
 * @author Raymond Budd <a href="mailto:raymond.budd@steelcutsoftware.com">raymond.budd@steelcutsoftware.com</a>
 * @since v0.2.1, December 11, 2023
 * @copyright Copyright &copy; 2023 Massachusetts Institute of Technology, Lincoln Laboratory
 */
import { Tile, Token } from '../model/alef_model';
//import { getLogger } from '../config/LogConfig';

//const log = getLogger('core.text');

const dquoteRe = /" +([^"]*)"/g
const squoteRe = /' +([^']*)'/g

interface TileStrings {
  eStr: string,
  tlStr: string
}

/**
 * Convert a list of tiles to two spaced strings, the first with the target language,
 * and second with English.
 *
 * @param tiles The list of tiles to build the strings from.
 * @param removeDiamonds When true remove null/diamond markers, when false leave them in.
 */
function tilesToStrings(tiles: Tile[], removeDiamonds: boolean): TileStrings {
  return { 'eStr': tilesToString(tiles, removeDiamonds, false), 'tlStr': tilesToString(tiles, removeDiamonds, true) };
}

/**
 * Convert a list of tiles to a spaced string either target language, or English
 * depending on retTl. Ported from: alef/src/alef/service/document.py
 *
 * @param tiles The list of tiles to build a string from.
 * @param removeDiamonds When true remove null/diamond markers, when false leave them in.
 * @param retTl When true build a string from the target language, build a string for the english when false.a
 */
function tilesToString(tiles: Tile[], removeDiamonds: boolean, retTl: boolean): string {
  
  // This is all a bit of a hack to generate reasonable spacing (at least in English, and hopefully LTR languages)
  // RTL needs additional verification.
  let sent: string = '';
  if (tiles != null && tiles.length > 0) {
    // Skip over any initial diamond tokens when removeDiamonds is true otherwise start at 0
    const firstTileIdx = removeDiamonds ?
      tiles.findIndex(t => {
        let tok = retTl ? t.tl_token : t.e_token;
        return tok != null && tok.term !== '◇';
      }) : 0;
    
    // Do not continue if we have a string with all diamonds.
    if (firstTileIdx > -1) {
      let firstTokVal: string = retTl ? tiles[firstTileIdx].tl_token?.term : tiles[firstTileIdx].e_token?.term;
      sent += firstTokVal;
      if (tiles.length > firstTileIdx + 1) {
        // Do not add space in front of the first token.
        let tok_vals = tiles.slice(firstTileIdx + 1).map((t, i) => tileToString(t, removeDiamonds, retTl, tiles[i + firstTileIdx]));
        sent += tok_vals.join('');
      }
    }
  }
  sent = sent.replaceAll(dquoteRe, '"$1"')
  sent = sent.replaceAll(squoteRe, '\'$1\'')
  return sent
}

/**
 * Convert a tile to a spaced string, either the english or target
 * language depending on retTl. Ported from: alef/src/alef/service/document.py
 * @param t The tile to convert
 * @param removeDiamonds When true remove null/diamond markers, when false leave them in.
 * @param predecessor The tile that appears directly in front of t.
 * @param retTl When true build a string from the target language, build a string for the english when false.
 */
function tileToString(t: Tile, removeDiamonds: boolean, retTl: boolean, predecessor?: Tile): string {
  let tok: Token | null = retTl ? t.tl_token : t.e_token;
  
  if (tok != null && (!removeDiamonds || tok.term !== '◇')) {
    let predecessorTok: Token | null = null;
    if (predecessor != null) {
      predecessorTok = retTl ? predecessor.tl_token : predecessor.e_token;
    }
    
    const openPunct = ['¿', '¡', '(', '[', '【', '《', '〈', '﴾', '“', '‘', '「'];
    const forceSpace: boolean = predecessorTok != null && tok.term !== '.' && tok.is_punct && predecessorTok.is_punct;
    const skipSpace = predecessorTok != null && openPunct.includes(predecessorTok.term);
    const space = skipSpace || (!forceSpace && tok.is_punct && !openPunct.includes(tok.term)) ? '' : ' ';
    
    //log.info(`Looking at ${tok.term} : forceSpace=${forceSpace}, skipSpace=${skipSpace}, predecessor=${predecessorTok.term}`);
    
    return `${space}${tok.term}`;
  }
  return '';
}

export { tilesToStrings, tilesToString };



