All files / src xml-utils.ts

96.66% Statements 319/330
90.9% Branches 60/66
100% Functions 15/15
96.66% Lines 319/330

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 3321x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1131x 1131x 1131x 1131x 1131x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 3324x 1758x 3324x 1566x 1566x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 60x 60x 60x 60x 60x 5301x 1807x 55x 55x 1752x 1752x 1752x 1752x 3642x 1647x 1647x 3642x 1995x 1995x 1752x 1807x 1629x 1629x 1752x 5301x 3494x 3494x 5301x 60x 60x 60x 60x 60x 60x 3080x 1198x 1198x 1198x 82x 942x 1x 942x 941x 941x 82x 82x 1116x 1116x 1116x 2106x 1882x 2106x 224x 2x 224x 222x 222x 224x 1116x 1116x 3080x 1882x 1882x 3080x 60x 60x 60x 60x 60x 60x 60x 60x 60x 60x 60x 60x 60x 60x 2x 2x 2x     2x       2x 2x 2x 2x 2x 2x 2x 60x 60x 60x 32x 32x 32x 32x 32x 31x 31x 32x     32x 32x 6x 6x 6x 6x 6x 30x 30x 30x 30x 30x 30x 6x 32x 32x 60x 60x 60x 60x 60x 13x 60x 35x 45x 2x 2x 58x 58x 58x 60x 60x 60x 60x 1x 1x   60x 60x     60x 60x 60x 1x 1x 1x 1x 1x 1x 1x 1x 7x 7x 2370x 812x 29x 29x 29x 783x 783x 783x 1610x 733x 733x 741x 733x 1610x 877x 877x 783x 783x 783x 2370x 1558x 1558x 2370x 7x 7x 7x 7x 7x 7x 7x 7x 7x 7x 7x 7x 7x 1x 1x   7x 7x 7x    
/*
 * Keyman is copyright (C) SIL Global. MIT License.
 *
 * Created by srl on 2024-09-27
 *
 * Abstraction for XML reading and writing
 */
 
import { XMLParser, XMLBuilder } from 'fast-xml-parser';
 
export type KeymanXMLType =
  'keyboard3'           // LDML <keyboard3>
  | 'keyboardTest3'       // LDML <keyboardTest3>
  | 'kps'                 // <Package>
  | 'kvks'                // <visualkeyboard>
  | 'kpj'                 // <KeymanDeveloperProject>
  ;
 
/** Bag of options, maximally one for each KeymanXMLType */
type KeymanXMLOptionsBag = {
  [key in KeymanXMLType]?: any
};
 
/** map of options for the XML parser */
const PARSER_OPTIONS: KeymanXMLOptionsBag = {
  'keyboard3': {
    ignoreAttributes: false, // We'd like attributes, please
    attributeNamePrefix: '@__', // We'll use this to convert attributes to strings and subobjects to arrays, when empty.
    trimValues: false, // preserve spaces, but:
    htmlEntities: true,
    tagValueProcessor: (tagName: string, tagValue: string /*, jPath, hasAttributes, isLeafNode*/) => {
      // since trimValues: false, we need to zap any element values that would be trimmed.
      // currently, the LDML spec doesn't have any element values, but this
      // future-proofs us a little in that element values are allowed, just trimmed.
      // if we do need elements in the future, we'd check the preserve-space attribute here.
      return tagValue?.trim();
    },
  },
  'keyboardTest3': {
    ignorePiTags: true,
    htmlEntities: true,
    ignoreAttributes: false, // We'd like attributes, please
    attributeNamePrefix: '', // avoid @_
    preserveOrder: true,     // Gives us a 'special' format
  },
  'kps': {
    ignorePiTags: true,
    ignoreAttributes: false,
    htmlEntities: true,
    attributeNamePrefix: '$', // causes remapping into $: { … } objects
    textNodeName: '_',
    numberParseOptions: {
      skipLike: /(?:)/, // parse numbers as strings
      hex: null,
      leadingZeros: null,
      eNotation: null,
    },
  },
  'kpj': {
    ignorePiTags: true,
    textNodeName: '_',
    htmlEntities: true,
    ignoreAttributes: false, // We'd like attributes, please
    attributeNamePrefix: '', // to avoid '@_' prefixes
    numberParseOptions: {
      skipLike: /(?:)/, // parse numbers as strings
      hex: null,
      leadingZeros: null,
      eNotation: null,
    },
  },
  'kvks': {
    ignorePiTags: true,
    textNodeName: '_',
    htmlEntities: true,
    ignoreAttributes: false, // We'd like attributes, please
    attributeNamePrefix: '$', // causes remapping into $: { … } objects
    numberParseOptions: {
      skipLike: /(?:)/, // parse numbers as strings
      hex: null,
      leadingZeros: null,
      eNotation: null,
    },
    trimValues: false, // preserve spaces, but:
    tagValueProcessor: (tagName: string, tagValue: string, jPath: string, hasAttributes: string, isLeafNode: boolean) : string | undefined => {
      if (!isLeafNode) {
        return tagValue?.trim(); // trimmed value
      } else {
        return null;  // no change to leaf nodes
      }
    },
  },
};
 
const GENERATOR_OPTIONS: KeymanXMLOptionsBag = {
  kvks: {
    attributeNamePrefix: '$',
    ignoreAttributes: false,
    format: true,
    textNodeName: '_',
    suppressEmptyNode: true,
  },
  kpj: {
    attributeNamePrefix: '$',
    ignoreAttributes: false,
    format: true,
    textNodeName: '_',
    suppressEmptyNode: true,
  },
  kps: {
    attributeNamePrefix: '$',
    ignoreAttributes: false,
    format: true,
    textNodeName: '_',
    suppressEmptyNode: true,
  },
  keyboard3: {
    attributeNamePrefix: '$',
    ignoreAttributes: false,
    format: true,
    textNodeName: '_',
    suppressEmptyNode: true,
  },
};
 
/** wrapper for XML parsing support */
export class KeymanXMLReader {
  public constructor(public type: KeymanXMLType) {
  }
 
  /** move `{ $abc: 4 }` into `{ $: { abc: 4 } }` */
  private static fixupDollarAttributes(data: any) : any {
    if (typeof data === 'object') {
      if (Array.isArray(data)) {
        return data.map(v => KeymanXMLReader.fixupDollarAttributes(v));
      }
      // object
      const e : any = [];
      const attrs : any = [];
      Object.entries(data).forEach(([k, v]) => {
        if (k[0] === '$') {
          k = k.slice(1);
          attrs.push([k, KeymanXMLReader.fixupDollarAttributes(v)]);
        } else {
          e.push([k, KeymanXMLReader.fixupDollarAttributes(v)]);
        }
      });
      if (attrs.length) {
        e.push(['$', Object.fromEntries(attrs)]);
      }
      return Object.fromEntries(e);
    } else {
      return data;
    }
  }
 
  /**
   * Requires attribute prefix @__ (double underscore)
   * For attributes, just remove @__ and continue.
   * For objects, replace any empty string "" with an empty object {} */
  private static fixupEmptyStringToEmptyObject(data: any) : any {
    if (typeof data === 'object') {
      // For arrays of objects, we map "" to {}
      // "" means an empty object
      if (Array.isArray(data)) {
        return data.map(v => {
          if (v === '') {
            return {};
          } else {
            return KeymanXMLReader.fixupEmptyStringToEmptyObject(v);
          }
        });
      }
      // otherwise: remove @__ for attributes, remap objects
      const e: any = [];
      Object.entries(data).forEach(([k, v]) => {
        if (k.startsWith('@__')) {
          e.push([k.substring(3), KeymanXMLReader.fixupEmptyStringToEmptyObject(v)]);
        } else {
          if (v === '') {
            e.push([k, {}]);
          } else {
            e.push([k, KeymanXMLReader.fixupEmptyStringToEmptyObject(v)]);
          }
        }
      });
      return Object.fromEntries(e);
    } else {
      return data;
    }
  }
 
  /**
   * Replace:
   * ```json
   * [ { "info": [], ":@": { "abc": "def" } }]
   * ```
   * with:
   * ```json
   * [{"$": { "abc": "def" }, "#name": "info" }]
   * ```
   * see https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/docs/v4/2.XMLparseOptions.md#preserveorder
   * @param data input data
   */
  private static fixupPreserveOrder(data: any): any {
 
    // we need to extract the root name specially
    if (!Array.isArray(data)) {
      throw Error(`Internal Error: XML parser preserveOrder did not yield an array.`);
    }
    if (data.length !== 1) {
      // we ignore comments, so should only have one element
      throw Error(`Internal Error: XML parser preserveOrder did not yield an array of size 1.`);
    }
    // the root element is special, we copy it into a property
    const rootElement = KeymanXMLReader.fixupPreserveOrderObject(data[0]);
    const rootElementName = rootElement['#name'];
    const out: any = {};
    out[rootElementName] = rootElement;
    return out;
  }
 
  /** takes an 'object' with a property `:@` containing attrs, and one other property with the object name */
  private static fixupPreserveOrderObject(data: any): any {
    const attrs = data[':@'];
    const mainEntry : any = Object.entries(data).filter(([k,v]) => k !== ':@');
    const [elementName, subItems] = mainEntry[0];
    const out : any = {};
    if ( attrs ) {
      out['$'] = attrs;
    }
    if (!elementName) {
      throw Error(`could not find elementName in ${JSON.stringify(mainEntry[0])}`);
    }
    out['#name'] = elementName;
    if (subItems && subItems.length) {
      out['$$'] = subItems.map((subObject: any) => KeymanXMLReader.fixupPreserveOrderObject(subObject));
      // xml2js duplicated data here, including elements in their 'non-preserved-order' form.
      // we don't read this data, but we're maintaining compatibility here with the read format.
      // example:  emit: […], keystroke:[…]
      for (const o of out['$$']) {
        const subElementName = o['#name'];
        const nonPreservedElements = out[subElementName] = out[subElementName] ?? [];
        const oWithoutName = {...o};
        delete oWithoutName['#name']; // #name is only there in the preserved-order form.
        nonPreservedElements.push(oWithoutName);
      }
    }
    return out;
  }
 
  public parse(data: string): any {
    const parser = this.parser();
    let result = parser.parse(data, true);
    if (PARSER_OPTIONS[this.type].attributeNamePrefix === '$') {
      result = KeymanXMLReader.fixupDollarAttributes(result);
    } else if (PARSER_OPTIONS[this.type].attributeNamePrefix === '@__') {
      result = KeymanXMLReader.fixupEmptyStringToEmptyObject(result);
    } else if (PARSER_OPTIONS[this.type].preserveOrder) {
      result = KeymanXMLReader.fixupPreserveOrder(result);
    }
    delete result['?xml'];
    return result;
  }
 
  public parser() {
    let options = PARSER_OPTIONS[this.type];
    if (!options) {
      /* c8 ignore next 1 */
      throw Error(`Internal error: unhandled XML type ${this.type}`);
    }
    options = Object.assign({}, options); // TODO: xml2js likes to mutate the options here. Shallow clone the object.
    if (options.emptyTag) {
        options.emptyTag = {}; // TODO: xml2js likes to mutate the options here. Reset it.
    }
    return new XMLParser(options);
  }
}
 
/**
 * Fixed prologue for writing XML
 */
const PROLOGUE = { '?xml': { '$version': '1.0', '$encoding': 'utf-8' } };
 
/** wrapper for XML generation support */
export class KeymanXMLWriter {
 
  private static fixDataForWrite(data: any) : any {
    if(typeof data === 'object') {
      if (Array.isArray(data)) {
        // just fixup each item of the array
        return data.map(d => KeymanXMLWriter.fixDataForWrite(d));
      }
      // else object
      const e : any = [];
      Object.entries(data).forEach(([k,v]) => {
        if (k === '$') {
          /* convert $: { a: 1, b: 2 } to { $a: 1, $b: 2} */
          Object.entries(v).forEach(([k,v]) => {
            e.push([`\$${k}`, KeymanXMLWriter.fixDataForWrite(v)]);
          });
        } else {
          e.push([k, KeymanXMLWriter.fixDataForWrite(v)]);
        }
      });
      // reconstitute with $ elements fixed
      return Object.fromEntries(e);
    } else {
      return data; // string or something else
    }
  }
 
  write(data: any): string {
    const builder = this.builder();
    data = KeymanXMLWriter.fixDataForWrite(data);
    return builder.build({ ...PROLOGUE, ...data });
  }
 
  constructor(public type: KeymanXMLType) {
  }
 
  public builder() {
    const options = GENERATOR_OPTIONS[this.type];
    if (!options) {
      /* c8 ignore next 1 */
      throw Error(`Internal error: unhandled XML type ${this.type}`);
    }
    return new XMLBuilder(Object.assign({}, options)); // Shallow clone in case the options are mutated.
  }
}