BigQuery uses Javascript for its user-defined functions. Input and outputs that are BYTES
in BigQuery are mapped to and from base64-encoded strings in Javascript.
BigQuery doesn't have the browser window
object, so atob
and btoa
are missing. Is there an easy way to encode and decode in the Bigquery JS environment, or do you have to include a library for doing the mapping?
BigQuery uses Javascript for its user-defined functions. Input and outputs that are BYTES
in BigQuery are mapped to and from base64-encoded strings in Javascript.
BigQuery doesn't have the browser window
object, so atob
and btoa
are missing. Is there an easy way to encode and decode in the Bigquery JS environment, or do you have to include a library for doing the mapping?
1 Answer
Reset to default 8You'll need to include a library, but it's fairly straightforward once you get the JavaScript onto Cloud Storage, and you can use this approach for other mon libraries that you want to import. I found an implementation in a StackOverflow post, and I put these contents in a file named btoa_atob.js
:
(function () {
var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';
function InvalidCharacterError(message) {
this.message = message;
}
InvalidCharacterError.prototype = new Error;
InvalidCharacterError.prototype.name = 'InvalidCharacterError';
// encoder
// [https://gist.github./999166] by [https://github./nignag]
btoa = function (input) {
var str = String(input);
for (
// initialize result and counter
var block, charCode, idx = 0, map = chars, output = '';
// if the next str index does not exist:
// change the mapping table to "="
// check if d has no fractional digits
str.charAt(idx | 0) || (map = '=', idx % 1);
// "8 - idx % 1 * 8" generates the sequence 2, 4, 6, 8
output += map.charAt(63 & block >> 8 - idx % 1 * 8)
) {
charCode = str.charCodeAt(idx += 3/4);
if (charCode > 0xFF) {
throw new InvalidCharacterError("'btoa' failed: The string to be encoded contains characters outside of the Latin1 range.");
}
block = block << 8 | charCode;
}
return output;
};
// decoder
// [https://gist.github./1020396] by [https://github./atk]
atob = function (input) {
var str = String(input).replace(/[=]+$/, ''); // #31: ExtendScript bad parse of /=
if (str.length % 4 == 1) {
throw new InvalidCharacterError("'atob' failed: The string to be decoded is not correctly encoded.");
}
for (
// initialize result and counters
var bc = 0, bs, buffer, idx = 0, output = '';
// get next character
buffer = str.charAt(idx++);
// character found in table? initialize bit storage and add its ascii value;
~buffer && (bs = bc % 4 ? bs * 64 + buffer : buffer,
// and if not first of each 4 characters,
// convert the first 8 bits to one ascii character
bc++ % 4) ? output += String.fromCharCode(255 & bs >> (-2 * bc & 6)) : 0
) {
// try to find character in table (0-63, not found => -1)
buffer = chars.indexOf(buffer);
}
return output;
};
}());
Then I copied the file to my Cloud Storage:
gsutil cp btoa_atob.js gs://my-bucket/
Then I wrote a dummy function that uses it:
#standardSQL
CREATE TEMP FUNCTION Foo(b BYTES) RETURNS STRING LANGUAGE js AS """
var result = atob(b);
// ... process result of atob.
return result;
"""
OPTIONS (library='gs://my-bucket/btoa_atob.js');
SELECT Foo(b'\xa0b1\xff\xee');