/**
* Public API
* @module gcapp
*/
"use strict";
const yaml = require("js-yaml");
const fs = require("fs");
const p = require("path");
const gc = require("./gcapp_config.js");
const Gclog = require("../gclog/gclog.js");
const Gcstd_eval = require("../gcstd/gcstd_eval.js");
const gctax = require("../gctax/gctax.js");
const gcstd = require("../gcstd/gcstd.js");
const Gcapp_tp_res = require("./gcapp_tp_res.js");
const Gctax_tent = require("../gctax/gctax_tent.js");
const Gctax_group = require("../gctax/gctax_group.js");
const Gctax_vec_map = require("../gctax/gctax_vec_map.js");
const Gcntree = require("../gctypes/gcntree/gcntree.js");
const gctax_group_schema = require("../gctax/schemas/gctax_group_schema.js");
const gctax_tent_schema = require("../gctax/schemas/gctax_tent_schema.js");
const Validator = require("jsonschema").Validator;
/**
* Gcapp exposes Ground Control's high level API
* @constructor
* @param {Object} config - configuration
* @param {Array.<Gcstore>} config.data_modules - an array of data I/O modules to use
*/
function Gcapp({data_modules = []} = {}) {
this.data_modules = data_modules;
this.id = gc.DEFAULT_HASH(Date.now());
}
/**
* Hash a value using the systemwide default hash function
* @static
* @param {any} data - the value to hash
* @returns {string}
*/
Gcapp.dhash = function(data) {
return gc.DEFAULT_HASH(data);
}
/**
* Return a list of keys corresponding to the nonscalar values in a standard
* @static
* @param {Object} sch - a standard schema in jsonschema format
* @returns {Array.<string>}
*/
Gcapp.get_nonscalar_keys = function(sch) {
return gcstd.get_nonscalar_keys(sch);
}
/**
* Fetch the systemwide vocabulary of vector names
* @static
* @returns {Array.<string>}
*/
Gcapp.get_vector_names = function() {
return gctax.get_vector_names();
}
/**
* Compute the intersection of sets of vectors over an array of testable entities
* @static
* @param {Array.<module:gctax_tent~Gctax_tent>} tent_list - the testable entities to compute over
* @returns {Array.<string>} an array representing the set of common vectors
*/
Gcapp.get_common_vecs = function(tent_list) {
return gctax.get_common_vecs(tent_list);
}
/**
* Compute the hash of a node in a standard specified by absolute node number using the default hash function
* @static
* @param {Gcntree} std - a standard
* @param {number} n - node number
* @returns {string}
*/
Gcapp.get_node_hash = function(std, n) {
let count = 0;
return std.dfs((node, data) => {
if (count === n) {
data.push(Gcapp.dhash(node.data));
}
count += 1;
})[0];
}
/**
* Search the nodes of a standard for a text string, case sensitive
* @static
* @param {Gcntree} std - a standard
* @param {string} str - text string to search for
* @returns {Array.<Array>} nodes where the string was found, as [node number, node data]
*/
Gcapp.text_search_nodes = function(std, str) {
let n = 0;
// TODO: this grossly assumes that node.data are constructed using
// the to_obj transformer
return std.dfs((node, found) => {
if (JSON.stringify(node.data).includes(str)) {
found.push([n, node.data]);
}
n += 1;
});
}
/**
* Compute the asymmetric difference between Gcntree A and Gcntree B
* @static
* @param {Gcntree} a - Gcntree A
* @param {Gcntree} b - Gcntree B
* @returns {Array} set of node data representing the asymmetric difference
*/
Gcapp.asymdif = function(a, b) {
// TODO: this is O(a * b), right? and it's always worst case bc we don't terminate DFS early
// maybe create a BST of hashes and check in O(h)?
return a.dfs((node, bad_nodes) => {
const hash_a = Gcapp.dhash(node.data);
let found = false;
b.dfs((node, data) => {
const hash_b = Gcapp.dhash(node.data);
if (hash_a === hash_b) {
found = true;
}
});
if (!found) {
bad_nodes.push(node.data);
}
});
}
/**
* Create a vector map from a standard
* @static
* @param {Gcntree} std - the standard to derive mappings from
* @param {Array.<Array>} nums - a 2D array of absolute node numbers, where the array at nums[i] corresponds to the ith
* vector name returned by {@link module:gcapp~Gcapp.get_vector_names}
* @param {string=} name - name for the vector map
* @returns {module:gctax_vec_map~Gctax_vec_map}
*/
Gcapp.make_vec_map = function(std, nums = [], name = "") {
const vecs = Gcapp.get_vector_names();
// Very basic validation - are there as many columns as we have vectors?
if (nums.length !== vecs.length) {
throw new Error("Vectors length mismatch");
}
const vec_map = new Gctax_vec_map({name: name});
nums.forEach((num_arr, i) => {
num_arr.forEach(num => vec_map.add_link(vecs[i], Gcapp.get_node_hash(std, num)));
});
return vec_map;
}
/**
* Write a vector map to disk in YML format
* @static
* @param {module:gctax_vec_map~Gctax_vec_map} vec_map - the vector map to write
* @param {string} notes - data for the notes field
* @returns {string} output path
*/
Gcapp.write_vec_map_ext = function(vec_map, notes = "") {
const output = {
vec_map: vec_map.name,
notes: notes,
vecs: Array.from(vec_map.data.entries()).map((entry) => {
return {
vec_name: entry[0],
hash_list: entry[1].map(hash => Object.fromEntries([["hash", hash]]))
};
})
};
const output_path = `${process.cwd()}/../../out/vec_map_${Date.now()}.yml`;
// TODO: what happens on error?
fs.writeFileSync(output_path, yaml.dump(output));
return output_path;
}
/**
* Load a vector map from a YML file
* @static
* @param {string} path - path to a vector map in YML format
* @returns {module:gctax_vec_map~Gctax_vec_map}
*/
Gcapp.load_vec_map_ext = function(path) {
// TODO: what happens if errors?
const doc = fs.readFileSync(path, {encoding: "utf8"});
const json = yaml.safeLoad(doc, "utf8");
if (!Gctax_vec_map.is_valid(json)) {
throw new Error(`${path} is not a valid vector map`);
}
// TODO: We enforce a simple rule: any vector map file must have exactly the same vector vocabulary
// as the currently running version of GC - however, it may instead be preferable to allow
// files which have a valid subset of our vector vocabulary...
const system_vecs = new Set(Gcapp.get_vector_names());
const present_vecs = json.vecs.map(vec => vec.vec_name)
if (present_vecs.length !== system_vecs.size || !present_vecs.every(vec => system_vecs.has(vec))) {
throw new Error(`${path} is a mismatch for system vectors`);
}
const vec_map = new Gctax_vec_map({name: json.vec_map});
json.vecs.forEach(vec => vec.hash_list.forEach(hash => vec_map.add_link(vec.vec_name, hash.hash)));
return vec_map;
}
/**
* Create an evaluation set from a standard
* @static
* @param {Gcntree} std - the standard to derive evaluations from
* @param {Array.<number>} nums - nodes to include, as absolute node numbers
* @param {string=} name - name for the evaluation set
* @returns {module:gcstd_eval~Gcstd_eval}
*/
Gcapp.make_eval_set = function(std, nums = [], name = "") {
if (nums.length === 0) {
throw new Error("You must specify at least one node to create an evaluation set");
}
return Gcstd_eval.from_nodes({std: std, nums: nums, name: name});
}
/**
* Write an evaluation set to disk in YML format
* @static
* @param {module:gcstd_eval~Gcstd_eval} es - the evaluation set to write
* @param {string} notes - data for the notes field
* @returns {string} output path
*/
Gcapp.write_eval_set_ext = function(es, notes = "") {
const output = {
eval: es.name,
notes: notes,
set: Array.from(es.set.values()).map(hash => Object.fromEntries([["hash", hash]]))
};
const output_path = `${process.cwd()}/../../out/eval_set_${Date.now()}.yml`;
// TODO: what happens on error?
fs.writeFileSync(output_path, yaml.dump(output));
return output_path;
}
/**
* Load an evaluation set from a YML file
* @static
* @param {string} path - path to an evaluation set in YML format
* @returns {module:gcstd_eval~Gcstd_eval}
*/
Gcapp.load_eval_set_ext = function(path) {
// TODO: what happens if errors?
const doc = fs.readFileSync(path, {encoding: "utf8"});
const json = yaml.safeLoad(doc, "utf8");
if (!Gcstd_eval.is_valid(json)) {
throw new Error(`${path} is not a valid evaluation set`);
}
// TODO: makes you wonder if Gcstd_eval objects should have a from_json constructor, and if the transformation
// in write_eval_set_ext above should become a to_json method...
const es = new Gcstd_eval({name: json.eval});
json.set.forEach(hash => es.set.add(hash.hash));
return es;
}
/**
* Load a testable entity from a YML file
* @static
* @param {string} path - path to a testable entity in YML format
* @returns {module:gctax_tent~Gctax_tent}
*/
Gcapp.load_tent_ext = function(path) {
// TODO: what happens if errors happen during file I/O or deserialization?
const doc = fs.readFileSync(path, {encoding: "utf8"});
const json = yaml.safeLoad(doc, "utf8");
if (!Gctax_tent.is_valid(json)) {
throw new Error(`${path} is not a valid testable entity`);
}
const bad_vecs = Gctax_tent.get_unknown_vecs(json);
if (bad_vecs.length > 0) {
throw new Error(`Testable entity '${json.tent}' has unknown vector(s): ${bad_vecs.join(", ")}`);
}
return new Gctax_tent({name: json.tent, notes: json.notes, vecs: json.vecs.map(vec => vec.vec)});
}
/**
* Load a group from a YML file
* @static
* @param {string} path - path to a group in YML format (assumes relative pathnames)
* @returns {module:gctax_group~Gctax_group}
*/
Gcapp.load_group_ext = function(path) {
// TODO: what happens if errors happen during file I/O or deserialization?
const doc = fs.readFileSync(path, {encoding: "utf8"});
const json = yaml.safeLoad(doc, "utf8");
if (!Gctax_group.is_valid(json)) {
throw new Error(`${path} is not a valid group`);
}
return new Gctax_group({
name: json.group,
notes: json.notes,
tents: json.tent_paths.map(tent_path => Gcapp.load_tent_ext(`${p.dirname(path)}/${tent_path.tent_path}`))
});
}
/**
* Load a standard from a YML file
* @static
* @param {string} std_path - path to a standard in YML format
* @param {string=} schema_path - path to a standard schema for validation (if the schema module exports multiple objects, the 0th is consdered the parent and the following are registered as children)
* @returns {Gcntree}
*/
Gcapp.load_std_ext = async function(std_path, schema_path = null) {
// TODO: what happens if errors?
const doc = fs.readFileSync(std_path, {encoding: "utf8"});
const ymldoc = yaml.safeLoad(doc, "utf8");
if (schema_path !== null) {
// ES2020 dynamic import, time to get weird!
const sch = await import(schema_path);
const sch_objs = Object.values(sch.default);
const v = new Validator();
if (sch_objs.length > 1) {
sch_objs.slice(1).forEach(obj => v.addSchema(obj, obj.id));
}
const res = v.validate(ymldoc, sch_objs[0], {nestedErrors: true});
if (res.errors.length > 0) {
throw new Error(`${std_path} is not a valid instance of standard ${schema_path}`);
}
}
return Gcntree.from_json_doc(ymldoc, Gcntree.trans.to_obj);
}
/**
* Load a standard schema (in jsonschema format) from disk
* @static
* @param {string} path - path to an importable JavaScript module
* @returns {Object} a jsonschema object
*/
Gcapp.load_schema_ext = async function(path) {
const sch = await import(path);
return sch.default;
}
/**
* Generate a testplan from YML files
* @static
* @param {string} sub_path - path to a testable entity OR group in YML format
* @param {string} std_path - path to a standard in YML format
* @param {string} vec_map_path - path to a vector map in YML format
* @param {string} eval_path - path to an evaluation set in YML format
* @returns {Gcapp_tp_res}
*/
Gcapp.testplan_ext = function(subj_path, std_path, vec_map_path, eval_path) {
if (!subj_path || !std_path || !vec_map_path) {
throw new Error("Missing path");
}
const vec_map = Gcapp.load_vec_map_ext(vec_map_path);
const eval_set = eval_path ? Gcapp.load_eval_set_ext(eval_path) : null;
// Deserialize the file for the test subject and determine if it's a tent or a group
// TODO: this duplicates the validation that occurs in the group and tent loaders, do we care?
const subj_doc = fs.readFileSync(subj_path, {encoding: "utf8"});
const subj_obj = yaml.safeLoad(subj_doc, "utf8");
const v = new Validator();
let is_group = true;
let subj = null;
if (v.validate(subj_obj, gctax_group_schema).errors.length === 0) {
subj = Gcapp.load_group_ext(subj_path);
} else if (v.validate(subj_obj, gctax_tent_schema).errors.length === 0) {
is_group = false;
subj = Gcapp.load_tent_ext(subj_path);
} else {
throw new Error(`${subj_path} doesn't seem to be a group or a testable entity`);
}
// Load the standard file and transform to a Gcntree
const doc = fs.readFileSync(std_path, {encoding: "utf8"});
const ymldoc = yaml.safeLoad(doc, "utf8");
const doc_tree = Gcntree.from_json_doc(ymldoc, Gcntree.trans.to_obj);
const vecs_to_evaluate = is_group ? this.get_common_vecs(subj.tents) : subj.vecs;
const vec_coverage = vecs_to_evaluate.map((vec) => {
return vec_map.get_links(vec).map((node_hash) => {
if (eval_set === null || eval_set.set.has(node_hash)) {
return true;
}
return false;
});
});
const total_evals = vec_coverage.reduce((acc, bool_list) => {
return acc + bool_list.length;
}, 0);
const selected_evals = vec_coverage.reduce((acc, bool_list) => {
return acc + bool_list.reduce((acc, bool) => {
return bool ? acc + 1 : acc;
}, 0);
}, 0);
// Associate selected hashes with their vec names
const a = new Map(vecs_to_evaluate.map((vec) => {
return vec_map.get_links(vec).filter((hash) => {
return eval_set === null || eval_set.set.has(hash);
}).map((hash) => {
return [hash, vec];
});
}).flat());
// Prep a hashmap that associates vec names with tree search results
const b = new Map(Array.from(a.values()).map(val => [val, []]));
// Inorder traversal, if we get a hash match on a, push the text of the standard part and its node number into b
// Collect the matching hashes for later
let n = 0;
const found_hashes = doc_tree.dfs((node, data) => {
const vec_name = a.get(Gcapp.dhash(node.data));
if (vec_name) {
b.get(vec_name).push({std_txt: node.data, node_num: n});
data.push(Gcapp.dhash(node.data));
}
n += 1;
});
// Get the set complement of a with respect to the hashes found above, the result is the hashes that weren't found in the standard
const unfound = Array.from(a.keys()).filter((hash) => {
return !found_hashes.includes(hash);
});
return new Gcapp_tp_res({
map: b,
subj: subj,
is_group: is_group,
eval_set: eval_set,
std_path: std_path,
vecs_to_evaluate: vecs_to_evaluate,
selected_evals: selected_evals,
total_evals: total_evals,
vec_coverage: vec_coverage,
num_links: a.size,
num_unfound: unfound.length
});
}
/**
* Compute the absolute node numbers for one part of a standard file (in YML format).
* To get part IDs for a standard, see {@link module:gcapp~Gcapp.get_nonscalar_keys}
* @static
* @see module:gcapp~Gcapp.get_nonscalar_keys
* @param {string} std_path - path to a standard in YML format
* @param {string} sch_path - path to the standard schema for standard std_path
* @param {number} part_id - part ID
* @returns {Object} wraps node numbers and associated metadata, see source code
*/
Gcapp.num_ext = async function(std_path, sch_path, part_id) {
// Get the property name for the part code we're interested in
const schema = await Gcapp.load_schema_ext(sch_path);
const keys = Gcapp.get_nonscalar_keys(schema);
if (part_id < 0 || part_id > keys.length - 1) {
throw new Error(`Part ID out of range for standard schema ${sch_path}`);
}
const prop = keys[part_id];
// Load the standard
const doc_tree = await Gcapp.load_std_ext(std_path);
let total_nodes = 0;
const res = doc_tree.dfs((node, data) => {
// TODO: this is a brittle and bad way to discern between diff kinds of nodes
// we prob need a wrapper class for tree nodes which lets them reflect their type
// and the set of node types for a given standard is specified by the standard schema
if (node.parent && node.parent.data === prop && typeof node.data !== "string") {
const path = [];
let pnode = node.parent;
while (pnode !== null) {
if (pnode.parent && keys.includes(pnode.parent.data)) {
path.push(pnode.data);
}
pnode = pnode.parent;
}
// TODO: this grossly assumes that the standard was transformed to a Gcntree using the
// to_obj transformer... that's gonna break real quick
const pathstr = path.reverse().map((hop) => {
return `${Object.values(hop)[0]} /`;
}).join(" ");
data.push([total_nodes, node.data, pathstr.substr(0, pathstr.length - 2)]);
}
total_nodes += 1;
});
return {
prop: prop,
part_id: part_id,
total_nodes: total_nodes,
nodes: res
};
}
/**
* Find the symmetric difference between the sets of nodes in two external standard files, expressed as reciprocal asymmetric differences.
* Note that identical standards and permuted standards will return the same result
* @static
* @param {string} path1 - path to standard A in YML format
* @param {string} path2 - path to standard B in YML format
* @param {string} sch_path - path to the standard schema for both standards
* @returns {Object} wraps asymmetric differences, see source code
*/
Gcapp.cmp_ext = async function(path1, path2, sch_path) {
const doca_tree = await Gcapp.load_std_ext(path1, sch_path);
const docb_tree = await Gcapp.load_std_ext(path2, sch_path);
// TODO: This is asymptotically stupid, we should write a function to compare both trees simultaneously
const bada = Gcapp.asymdif(doca_tree, docb_tree);
const badb = Gcapp.asymdif(docb_tree, doca_tree);
return {
a: bada,
b: badb
};
}
/**
* Validate an external standard file against a standard schema
* @static
* @param {string} std_path - path to a standard in YML format
* @param {string} sch_path - path to the standard schema for standard std_path
* @returns {boolean} true if the standard is a valid instance of schema sch_path
*/
Gcapp.valid_ext = async function(std_path, sch_path) {
// TODO: it'd be nice to emit the errors found in invalid standards, but we gotta refactor load_std_ext to return em
// also TODO: this doesn't discern between an invalid path and an invalid schema
try {
await Gcapp.load_std_ext(std_path, sch_path);
return true;
} catch(err) {
return false;
}
}
/**
* Find which elements of an external evaluation set are resolved for a given external standard
* @static
* @param {string} eval_path - path to an evaluation set in YML format
* @param {string} std_path - path to a standard in YML format
* @returns {Object} wrapper for resolved parts and associated metadata, see source code
*/
Gcapp.checkset_ext = async function(eval_path, std_path) {
const ev = Gcapp.load_eval_set_ext(eval_path);
const doctree = await Gcapp.load_std_ext(std_path);
const parts = new Map();
let n = 0;
doctree.dfs((node, data) => {
const node_hash = Gcapp.dhash(node.data);
if (ev.set.has(node_hash)) {
parts.set(node_hash, node.data);
}
n += 1;
});
const h = Array.from(parts.values()).every((obj, i, arr) => {
return Object.keys(obj)[0] === Object.keys(arr[0])[0];
});
if (!h) {
throw new Error(`Illegal evaluation set - ${eval_path} is non-homogeneous`);
}
const found_hashes = Array.from(parts.keys());
const unresolved = Array.from(ev.set.values()).filter(node_hash => !found_hashes.includes(node_hash));
return {
resolved: Array.from(parts.values()),
unresolved: unresolved,
total_evals: ev.set.size,
total_nodes: n
};
}
/**
* Initialize this instance of Gcapp. Must be executed before a new Gcapp object is ready for use
*/
Gcapp.prototype.init = async function() {
Gclog.log(`[GCAPP] Initializing Ground Control kernel ${this.id}...`);
await Promise.all(this.data_modules.map(module => module.init()));
}
/**
* Return a list of the data modules associated with this instance of Gacpp
* @returns {Array.<Gcstore>} a list of data modules
*/
Gcapp.prototype.get_data_modules = function() {
return this.data_modules;
}
module.exports = Gcapp;