I have a Javascript application which retrieves shortcode stings from a WordPress database. So I may end up with a variable like this:
var shortcode = '[wp-form id="1946" title="My Test Form"]';
I am looking to use pure Javascript to access the attributes so I can extract the title, etc. I imagine this will be some form or regex and split(). But so far my efforts get frustrated by splitting by whitespace.
Any ideas greatly appreciated.
I have a Javascript application which retrieves shortcode stings from a WordPress database. So I may end up with a variable like this:
var shortcode = '[wp-form id="1946" title="My Test Form"]';
I am looking to use pure Javascript to access the attributes so I can extract the title, etc. I imagine this will be some form or regex and split(). But so far my efforts get frustrated by splitting by whitespace.
Any ideas greatly appreciated.
Share Improve this question asked Jun 1, 2016 at 19:05 lukemcdlukemcd 1,5012 gold badges16 silver badges24 bronze badges 1- Are attributes always the same? – Sergey Khalitov Commented Jun 1, 2016 at 19:15
4 Answers
Reset to default 6Try to use this code:
const shortcode = '[wp-form id="1946" title="My Test Form" empty=""]';
let attributes = {};
shortcode.match(/[\w-]+=".*?"/g).forEach(function(attribute) {
attribute = attribute.match(/([\w-]+)="(.*?)"/);
attributes[attribute[1]] = attribute[2];
});
console.log(attributes);
Output:
Object {id: "1946", title: "My Test Form", empty: ''}
Okay, even though I'm late to the party I'm going to throw an answer in. I'm surprised nobody plained "you can't parse with just a regular expression!" I guess this used to be a much more fashionable ment to make . Anyways, I think it's perfectly reasonable to use just a regex and see some reasonable attempts already given.
However, if you want to really parse the tag, here's a quick parser I whipped up.
function parseShortCode(shortCode) {
var re = /(\s+|\W)|(\w+)/g;
var match;
var token;
var curAttribute = '';
var quoteChar;
var mode = 'NOT STARTED'
var parsedValue = {
name: '',
attributes: {}
};
while ((match = re.exec(shortCode)) != null) {
token = match[0];
switch (mode) {
case 'NOT STARTED':
if (token == '[') {
mode = 'GETNAME';
}
break;
case 'GETNAME':
if (!(/\s/.test(token))) {
parsedValue.name += token;
} else if (parsedValue.name) {
mode = 'PARSING';
}
break;
case 'PARSING':
// if non text char throw it
if (token == "]") { mode = 'COMPLETE'; }
else if (token == "=") {
if (!curAttribute) throw ('invalid token: "' + token + '" encountered at ' + match.index);
else mode = 'GET ATTRIBUTE VALUE';
}
else if (!/\s/.test(token)) {
curAttribute += token;
} else if (curAttribute) {
mode = 'SET ATTRIBUTE'
}
break;
case 'SET ATTRIBUTE':
// these are always from match[1]
if (/\s/.test(token)) { parsedValue.attributes[curAttribute] = null; }
else if (token == '=') { mode = 'GET ATTRIBUTE VALUE'; }
else { throw ('invalid token: "' + token + '" encountered at ' + match.index); }
break;
case 'GET ATTRIBUTE VALUE':
if (!(/\s/.test(token))) {
if (/["']/.test(token)) {
quoteChar = token;
parsedValue.attributes[curAttribute] = '';
mode = 'GET QUOTED ATTRIBUTE VALUE';
} else {
parsedValue.attributes[curAttribute] = token;
curAttribute = '';
mode = 'PARSING';
}
}
break;
case 'GET QUOTED ATTRIBUTE VALUE':
if (/\\/.test(token)) { mode = 'ESCAPE VALUE'; }
else if (token == quoteChar) {
mode = 'PARSING';
curAttribute = '';
}
else { parsedValue.attributes[curAttribute] += token; }
break;
case 'ESCAPE VALUE':
if (/\\'"/.test(token)) { parsedValue.attributes[curAttribute] += token; }
else { parsedValue.attributes[curAttribute] += '\\' + token; }
mode = 'GET QUOTED ATTRIBUTE VALUE';
break;
}
}
if (curAttribute && !parsedValue.attributes[curAttribute]) {
parsedValue.attributes[curAttribute] = '';
}
return parsedValue;
}
function doUpdate() {
var text = document.getElementById('shortcode').value;
var output;
try {
output = parseShortCode(text);
} catch (err) {
output = err;
}
document.getElementById('result').innerHTML = JSON.stringify(output);
}
document.getElementById('updateBtn').addEventListener("click", doUpdate);
doUpdate();
Short Code:
<textarea type=text id="shortcode" style="width:100%; height:60px">[wp-form id="1946" title="My Test Form"]</textarea>
<div>
<button id="updateBtn">Update</button>
</div>
<div>
<pre id="result"></pre>
</div>
I'm sure this has bugs, but I got it to work with your case, and some cases the other answers couldn't handle. Unless shortcodes get really intense, I'd just stick with a regex. But if you encounter stuff like unquoted attribute values and empty attributes this might work for you.
Don't try to use String.prototype.split
in this case, describe an attribute with its value and build a pattern to match them using RegExp.prototype.exec
:
var re = /([\w-]+)="([^"]*)"/g;
var str = '[wp-form id="1946" title="My Test Form"]';
var m;
while ((m = re.exec(str)) !== null) {
console.log(m[1] + "\n" + m[2] + "\n");
}
can be done using regex simply
var shortcode = '[wp-form id="1946" title="My Test Form"]';
// use of regex to extract id , title
var arr = /id\=\"(.*?)\".*title=\"(.*?)\"/.exec(shortcode);
var id = arr[1];
var title = arr[2];