211 lines
4.8 KiB
JavaScript
211 lines
4.8 KiB
JavaScript
|
/**
|
||
|
* The MIT License (MIT)
|
||
|
* Copyright (c) 2017-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
|
||
|
*/
|
||
|
|
||
|
'use strict';
|
||
|
|
||
|
/**
|
||
|
* A regexp-tree plugin to replace standard character classes with
|
||
|
* their meta symbols equivalents.
|
||
|
*/
|
||
|
|
||
|
module.exports = {
|
||
|
_hasIFlag: false,
|
||
|
_hasUFlag: false,
|
||
|
init: function init(ast) {
|
||
|
this._hasIFlag = ast.flags.includes('i');
|
||
|
this._hasUFlag = ast.flags.includes('u');
|
||
|
},
|
||
|
CharacterClass: function CharacterClass(path) {
|
||
|
|
||
|
// [0-9] -> \d
|
||
|
rewriteNumberRanges(path);
|
||
|
|
||
|
// [a-zA-Z_0-9] -> \w
|
||
|
rewriteWordRanges(path, this._hasIFlag, this._hasUFlag);
|
||
|
|
||
|
// [ \t\r\n\f] -> \s
|
||
|
rewriteWhitespaceRanges(path);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Rewrites number ranges: [0-9] -> \d
|
||
|
*/
|
||
|
function rewriteNumberRanges(path) {
|
||
|
var node = path.node;
|
||
|
|
||
|
|
||
|
node.expressions.forEach(function (expression, i) {
|
||
|
if (isFullNumberRange(expression)) {
|
||
|
path.getChild(i).replace({
|
||
|
type: 'Char',
|
||
|
value: '\\d',
|
||
|
kind: 'meta'
|
||
|
});
|
||
|
}
|
||
|
});
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Rewrites word ranges: [a-zA-Z_0-9] -> \w
|
||
|
* Thus, the ranges may go in any order, and other symbols/ranges
|
||
|
* are kept untouched, e.g. [a-z_\dA-Z$] -> [\w$]
|
||
|
*/
|
||
|
function rewriteWordRanges(path, hasIFlag, hasUFlag) {
|
||
|
var node = path.node;
|
||
|
|
||
|
|
||
|
var numberPath = null;
|
||
|
var lowerCasePath = null;
|
||
|
var upperCasePath = null;
|
||
|
var underscorePath = null;
|
||
|
var u017fPath = null;
|
||
|
var u212aPath = null;
|
||
|
|
||
|
node.expressions.forEach(function (expression, i) {
|
||
|
|
||
|
// \d
|
||
|
if (isMetaChar(expression, '\\d')) {
|
||
|
numberPath = path.getChild(i);
|
||
|
}
|
||
|
|
||
|
// a-z
|
||
|
else if (isLowerCaseRange(expression)) {
|
||
|
lowerCasePath = path.getChild(i);
|
||
|
}
|
||
|
|
||
|
// A-Z
|
||
|
else if (isUpperCaseRange(expression)) {
|
||
|
upperCasePath = path.getChild(i);
|
||
|
}
|
||
|
|
||
|
// _
|
||
|
else if (isUnderscore(expression)) {
|
||
|
underscorePath = path.getChild(i);
|
||
|
} else if (hasIFlag && hasUFlag && isU017fPath(expression)) {
|
||
|
u017fPath = path.getChild(i);
|
||
|
} else if (hasIFlag && hasUFlag && isU212aPath(expression)) {
|
||
|
u212aPath = path.getChild(i);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// If we found the whole pattern, replace it.
|
||
|
if (numberPath && (lowerCasePath && upperCasePath || hasIFlag && (lowerCasePath || upperCasePath)) && underscorePath && (!hasUFlag || !hasIFlag || u017fPath && u212aPath)) {
|
||
|
|
||
|
// Put \w in place of \d.
|
||
|
numberPath.replace({
|
||
|
type: 'Char',
|
||
|
value: '\\w',
|
||
|
kind: 'meta'
|
||
|
});
|
||
|
|
||
|
// Other paths are removed.
|
||
|
if (lowerCasePath) {
|
||
|
lowerCasePath.remove();
|
||
|
}
|
||
|
if (upperCasePath) {
|
||
|
upperCasePath.remove();
|
||
|
}
|
||
|
underscorePath.remove();
|
||
|
if (u017fPath) {
|
||
|
u017fPath.remove();
|
||
|
}
|
||
|
if (u212aPath) {
|
||
|
u212aPath.remove();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Rewrites whitespace ranges: [ \t\r\n\f] -> \s.
|
||
|
*/
|
||
|
function rewriteWhitespaceRanges(path) {
|
||
|
var node = path.node;
|
||
|
|
||
|
|
||
|
var spacePath = null;
|
||
|
var tPath = null;
|
||
|
var nPath = null;
|
||
|
var rPath = null;
|
||
|
var fPath = null;
|
||
|
|
||
|
node.expressions.forEach(function (expression, i) {
|
||
|
|
||
|
// Space
|
||
|
if (isChar(expression, ' ')) {
|
||
|
spacePath = path.getChild(i);
|
||
|
}
|
||
|
|
||
|
// \t
|
||
|
else if (isMetaChar(expression, '\\t')) {
|
||
|
tPath = path.getChild(i);
|
||
|
}
|
||
|
|
||
|
// \n
|
||
|
else if (isMetaChar(expression, '\\n')) {
|
||
|
nPath = path.getChild(i);
|
||
|
}
|
||
|
|
||
|
// \r
|
||
|
else if (isMetaChar(expression, '\\r')) {
|
||
|
rPath = path.getChild(i);
|
||
|
}
|
||
|
|
||
|
// \f
|
||
|
else if (isMetaChar(expression, '\\f')) {
|
||
|
fPath = path.getChild(i);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// If we found the whole pattern, replace it.
|
||
|
// Make \f optional.
|
||
|
if (spacePath && tPath && nPath && rPath) {
|
||
|
|
||
|
// Put \s in place of \n.
|
||
|
nPath.node.value = '\\s';
|
||
|
|
||
|
// Other paths are removed.
|
||
|
spacePath.remove();
|
||
|
tPath.remove();
|
||
|
rPath.remove();
|
||
|
|
||
|
if (fPath) {
|
||
|
fPath.remove();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function isFullNumberRange(node) {
|
||
|
return node.type === 'ClassRange' && node.from.value === '0' && node.to.value === '9';
|
||
|
}
|
||
|
|
||
|
function isChar(node, value) {
|
||
|
var kind = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'simple';
|
||
|
|
||
|
return node.type === 'Char' && node.value === value && node.kind === kind;
|
||
|
}
|
||
|
|
||
|
function isMetaChar(node, value) {
|
||
|
return isChar(node, value, 'meta');
|
||
|
}
|
||
|
|
||
|
function isLowerCaseRange(node) {
|
||
|
return node.type === 'ClassRange' && node.from.value === 'a' && node.to.value === 'z';
|
||
|
}
|
||
|
|
||
|
function isUpperCaseRange(node) {
|
||
|
return node.type === 'ClassRange' && node.from.value === 'A' && node.to.value === 'Z';
|
||
|
}
|
||
|
|
||
|
function isUnderscore(node) {
|
||
|
return node.type === 'Char' && node.value === '_' && node.kind === 'simple';
|
||
|
}
|
||
|
|
||
|
function isU017fPath(node) {
|
||
|
return node.type === 'Char' && node.kind === 'unicode' && node.codePoint === 0x017f;
|
||
|
}
|
||
|
function isU212aPath(node) {
|
||
|
return node.type === 'Char' && node.kind === 'unicode' && node.codePoint === 0x212a;
|
||
|
}
|