JavaScript: create a string or char from an UTF-8 value

JavaScript: create a string or char from an UTF-8 value - javascript

Same question as this, but with UTF-8 instead of ASCII
In JavaScript, how can you get a string representation of a UTF-8 value?
e.g. how to turn "c385" into "Å" ?
or how to turn "E28093" into "—" (m dash) ?
or how to turn "E282AC" into "€" (euro sign) ?
My question is NOT a duplicate of Hex2Asc. You can see for yourself: hex2a("E282AC") will transform the string into "â¬" instead of transforming it into "€" (euro sign) !!

I think this will do what you want:
function convertHexToString(input) {
// split input into groups of two
var hex = input.match(/[\s\S]{2}/g) || [];
var output = '';
// build a hex-encoded representation of your string
for (var i = 0, j = hex.length; i < j; i++) {
output += '%' + ('0' + hex[i]).slice(-2);
}
// decode it using this trick
output = decodeURIComponent(output);
return output;
}
console.log("'" + convertHexToString('c385') + "'"); // => 'Å'
console.log("'" + convertHexToString('E28093') + "'"); // => '–'
console.log("'" + convertHexToString('E282AC') + "'"); // => '€'
DEMO
Credits:
Javascript elegant way to split string into segments n characters long
Convert integer array to string at javascript
https://stackoverflow.com/a/14028246/74757

var hex = "c5";
String.fromCharCode(parseInt(hex, 16));
you have to use c5, not c3 85 ref: http://rishida.net/tools/conversion/
Lear more about code point and code unit
http://en.wikipedia.org/wiki/Code_point
http://www.coderanch.com/t/416952/java/java/Unicode-code-unit-Unicode-code

Related

Make a utf-8 string shorter with a utf-32 encoding in Javascript?

I'm trying to find a way to compress/decompress a string in Javascript. By compress I mean to make the string look shorter (less char). That's my goal.
Here's an example of how things should work:
// The string that I want to make shorter
// It will only contain [a-zA-Z0-9] chars and some ponctuations like ()[]{}.,;'"!
var string = "I like bananas !";
// The compressed string, maybe something like "䐓㐛꯱字",
// which is shorter than the original
var shortString = compress(string);
// The original string, "I like banana !"
var originalString = decompress(shortString);
Here's my first idea (maybe there's a better way to get to my goal, and if so I'm interested in it).
I know that my original string will be in utf-8. So I'm thinking of using utf-32 for the encoding, which should divide by 4 the length of the string.
But I don't know how to do these 2 functions that construct new strings with different encoding. Here's the code I have so far that doesn't work...
function compress(string) {
string = unescape(encodeURIComponent(string));
var newString = '';
for (var i = 0; i < string.length; i++) {
var char = string.charCodeAt(i);
newString += parseInt(char, 8).toString(32);
}
return newString;
}

Since you're using a set of less than 100 characters and that javascript strings are encoded in UTF-16 (which mean you have 65536 possible characters), what you can do is concatenate the character codes so as to have one "compressed" character per two basic character. This allows you to compress strings to half the length.
Like this for example:
document.getElementById('compressBtn').addEventListener('click', function() {
var stringToCompress = document.getElementById('tocompress').value;
var compressedString = compress(stringToCompress);
var decompressedString = decompress(compressedString);
if (stringToCompress === decompressedString) {
document.getElementById('display').innerHTML = stringToCompress + ", length of " + stringToCompress.length  + " characters compressed to " + compressedString + ", length of " + compressedString.length + " characters back to " + decompressedString;
} else {
document.getElementById('display').innerHTML = "This string cannot be compressed"
}
})
function compress(string) {
string = unescape(encodeURIComponent(string));
var newString = '',
char, nextChar, combinedCharCode;
for (var i = 0; i < string.length; i += 2) {
char = string.charCodeAt(i);
if ((i + 1) < string.length) {
// You need to make sure that you don't have 3 digits second character else you might go over 65536.
// But in UTF-16 the 32 characters aren't in your basic character set. But it's a limitation, anything
// under charCode 32 will cause an error
nextChar = string.charCodeAt(i + 1) - 31;
// this is to pad the result, because you could have a code that is single digit, which would make
// decompression a bit harder
combinedCharCode = char + "" + nextChar.toLocaleString('en', {
minimumIntegerDigits: 2
});
// You take the concanated code string and convert it back to a number, then a character
newString += String.fromCharCode(parseInt(combinedCharCode, 10));
} else {
// Here because you won't always have pair number length
newString += string.charAt(i);
}
}
return newString;
}
function decompress(string) {
var newString = '',
char, codeStr, firstCharCode, lastCharCode;
for (var i = 0; i < string.length; i++) {
char = string.charCodeAt(i);
if (char > 132) {
codeStr = char.toString(10);
// You take the first part of the compressed char code, it's your first letter
firstCharCode = parseInt(codeStr.substring(0, codeStr.length - 2), 10);
// For the second one you need to add 31 back.
lastCharCode = parseInt(codeStr.substring(codeStr.length - 2, codeStr.length), 10) + 31;
// You put back the 2 characters you had originally
newString += String.fromCharCode(firstCharCode) + String.fromCharCode(lastCharCode);
} else {
newString += string.charAt(i);
}
}
return newString;
}
var stringToCompress = 'I like bananas!';
var compressedString = compress(stringToCompress);
var decompressedString = decompress(compressedString);
document.getElementById('display').innerHTML = stringToCompress + ", length of " + stringToCompress.length  + " characters compressed to " + compressedString + ", length of " + compressedString.length + " characters back to " + decompressedString;
body {
padding: 10px;
}
#tocompress {
width: 200px;
}
<input id="tocompress" placeholder="enter string to compress" />
<button id="compressBtn">
Compress input
</button>
<div id="display">
</div>
Regarding the possible use of UTF-32 to further compress, I'm not sure it's possible, I might be wrong on that, but from my understanding it's not feasible. Here's why:
The approach above is basically concatenating two 1 byte values in one 2 bytes value. This is possible because javascript strings are encoded in 2 bytes (or 16 bits) (note that from what I understand the engine could decide to store differently making this compression unnecessary from a purely memory space point of view - that being said, in the end, one character is considered being 16 bits). A cleaner way to make the compression above would in fact to user the binary numbers instead of the decimal, it would make much more sense. Like this for example:
document.getElementById('compressBtn').addEventListener('click', function() {
var stringToCompress = document.getElementById('tocompress').value;
var compressedString = compress(stringToCompress);
var decompressedString = decompress(compressedString);
if (stringToCompress === decompressedString) {
document.getElementById('display').innerHTML = stringToCompress + ", length of " + stringToCompress.length + " characters compressed to " + compressedString + ", length of " + compressedString.length + " characters back to " + decompressedString;
} else {
document.getElementById('display').innerHTML = "This string cannot be compressed"
}
})
function compress(string) {
string = unescape(encodeURIComponent(string));
var newString = '',
char, nextChar, combinedCharCode;
for (var i = 0; i < string.length; i += 2) {
// convert to binary instead of keeping the decimal
char = string.charCodeAt(i).toString(2);
if ((i + 1) < string.length) {
nextChar = string.charCodeAt(i + 1).toString(2) ;
// you still need padding, see this answer https://stackoverflow.com/questions/27641812/way-to-add-leading-zeroes-to-binary-string-in-javascript
combinedCharCode = "0000000".substr(char.length) + char + "" + "0000000".substr(nextChar.length) + nextChar;
// You take the concanated code string and convert it back to a binary number, then a character
newString += String.fromCharCode(parseInt(combinedCharCode, 2));
} else {
// Here because you won't always have pair number length
newString += string.charAt(i);
}
}
return newString;
}
function decompress(string) {
var newString = '',
char, codeStr, firstCharCode, lastCharCode;
for (var i = 0; i < string.length; i++) {
char = string.charCodeAt(i);
if (char > 132) {
codeStr = char.toString(2);
// You take the first part (the first byte) of the compressed char code, it's your first letter
firstCharCode = parseInt(codeStr.substring(0, codeStr.length - 7), 2);
// then the second byte
lastCharCode = parseInt(codeStr.substring(codeStr.length - 7, codeStr.length), 2);
// You put back the 2 characters you had originally
newString += String.fromCharCode(firstCharCode) + String.fromCharCode(lastCharCode);
} else {
newString += string.charAt(i);
}
}
return newString;
}
var stringToCompress = 'I like bananas!';
var compressedString = compress(stringToCompress);
var decompressedString = decompress(compressedString);
document.getElementById('display').innerHTML = stringToCompress + ", length of " + stringToCompress.length + " characters compressed to " + compressedString + ", length of " + compressedString.length + " characters back to " + decompressedString;
<input id="tocompress" placeholder="enter string to compress" />
<button id="compressBtn">
Compress input
</button>
<div id="display">
</div>
So why not push the logic and use utf-32, which should be 4 bytes, meaning four 1 byte characters. One problem is that javascript has 2 bytes string. It's true that you can use pairs of 16 bits characters to represent utf-32 characters. Like this:
document.getElementById('test').innerHTML = "\uD834\uDD1E";
<div id="test"></div>
But if you test the length of the resulting string, you'll see that it's 2, even if there's only one "character". So from a javascript perspective, you're not reducing the actual string length.
The other thing is that UTF-32 has in fact 221 characters. See here: https://en.wikipedia.org/wiki/UTF-32
It is a protocol to encode Unicode code points that uses exactly 32
bits per Unicode code point (but a number of leading bits must be zero
as there are fewer than 221 Unicode code points)
So you don't really have 4 bytes, in fact you don't even have 3, which would be needed to encode 3. So UTF-32 doesn't seem to be a way to compress even more. And since javascript has native 2 bytes strings, it seems to me to be the most efficient - using that approach at least.

If your strings only contain ASCII characters [0, 127] you can "compress" the string using a custom 6 or 7-bit code page.
You can do this several ways, but I think one of the simpler methods is to define an array holding all allowed characters - a LUT, lookup-table if you like, then use its index value as the encoded value. You would of course have to manually mask and shift the encoded value into a typed array.
If your LUT looked like this:
var lut = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.,:;!(){}";
you would in this case deal with a LUT of length 71 which means we would need to use a 7-bit range or [0, 127] (if length were 64 we could've reduced the it to 6-bit [0, 63] values).
Then you would take each characters in the string and convert to index values (you would normally do all the following steps in a single operation but I have separated them for simplicity):
var lut = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.,:;!(){}";
var str = "I like bananas !";
var page = [];
Array.prototype.forEach.call(str, function(ch) {
var i = lut.indexOf(ch);
if (i < 0) throw "Invalid character - can't encode";
page.push(i);
});
console.log("Intermediate page:", page);
You can always tweak the LUT so that the most used characters are in the beginning, then support variable encoding bit-range, find max value and use that to determine what range you want to encode in. You can add an initial bit as a flag as to which range the encoding uses (for example bit 0 set if 6-bit fits, otherwise use 7-bit range).
Now that you know the indices we can start to encode the binary output itself using a 7-bit approach. Since JavaScript only support byte values, i.e. 8-bit width, we have to do all the split, shift and merge operations manually.
This means we need to keep track of remainder and position on a bit-level.
Say first index value was the following 7-bit value (full 7-bit range for readability - all in pseudo format):
&b01111111
The first step would be to shift it over to bit position 0 and keep track of a remainder:
&b01111111 << 1
Resulting in:
&b11111110
^
new bit position: 7
new remainder : 1
Then the next index value, for example:
&b01010101
would be encoded like this - first convert to 7-bit value in its own byte representation:
&b01010101 << 1 => &b10101010
Then get the reminder part first. To obtain this will shift everything right-wise using 8-bit minus the current remainder (within modulo of 8):
remainderValue = &b10101010 >>> (8 - remainder)
leaving us with the following representation:
&b00000001
(Note that we use triple >>> to shift right to avoid issues with sign.)
Next step now is to merge this value with our previous value that has already been encoded and stored into our destination byte array - for this we'll use an OR operation:
Index 0 New value Result in index 0 (index of dst. array)
&b11111110 | &b00000001 => &b11111111
then go to next index in our destination array and store the rest of the current value, then update the remainder and position.
The "leftover" of the byte is calculated like this using the original (after shifting it) 7-bit byte value:
leftover = &b10101010 << remainder => &b01010100
which we now put into the next position:
Index 0 Index 1 (destination array index, not page index)
&b11111111 01010100
^
new bit position: 14
new remainder : 2
And so on with the remaining index values. See this answer for actual code on how you can do this in JavaScript - the code in this answer doesn't deal with string encoding per-se, but it shows how you can shift byte buffers bit-wise which is essentially the same you need for this task.
To calculate the remainder step, use 8-bits minus your custom bit-range:
step = 8 - newRange (here 7) => 1
This will also be the start remainder. For each character, you'll add the step to remainder after it has been processed, but remember to use modulo 8 (byte width) when you use it for shifting:
remainder += step;
numOfBitsToShift = remainder % 8;
Bit-position uses of course the bit-range, in this case 7:
bitPosition += 7;
Then to find which indices you're dealing with you divide the bitPosition on 8, if any decimal you have to deal with two indexes (old and new), if no decimal the current position represents new index only (only shift is needed for current index value).
You can also use modulo and when modulo of remainder = step you know you that you are dealing with a single index in the destination.
To calculate the final length you would use the bit-length and length of string, then ceil the result so that all characters will fit into a 8-byte byte array which is the only array we can get in JavaScript:
dstLength = Math.ceil(7 * str.length / 8);
To decode you just reverse all the steps.
An alternative, if you use long strings or have to move forward fast, is to use an established compressor such as zlib which has a very compact header as well as good performance in JavaScript in the case of the linked solution. This will also deal with "patterns" in the string to further optimize the resulting size.
Disclaimer: as this is mostly a theoretical answer there might be some errors. Feel free to comment if any are found. Refer to linked answer for actual code example.

for full code see here: https://repl.it/NyMl/1
using the Uint8Array you can work with the bytes.
let msg = "This is some message";
let data = []
for(let i = 0; i < msg.length; ++i){
data[i] = msg.charCodeAt(i);
}
let i8 = new Uint8Array(data);
let i16 = new Uint16Array(i8.buffer);
you could also think of a compression like this: http://pieroxy.net/blog/pages/lz-string/demo.html
if you don't want to use a 3rd party library, the lz based compression should be fairly simple. see here (wikipedia)

I use the same library mentioned above, lz-string https://github.com/pieroxy/lz-string, and it creates file sizes that are smaller than most of the binary formats like Protocol Buffers.
I compress via Node.js like this:
var compressedString = LZString.compressToUTF16(str);
And I decompress client side like this:
var decompressedString = LZString.decompressFromUTF16(str);

JavaScript split and join

I have a string, 15.Prototypal-Inheritance-and-Refactoring-the-Slider.txt, I'd like to make it looks like 15.Prototypal...-Slider.txt
The length of the text is 56, how can I keep the first 12 letters and 10 last letters (incuding punctuation marks) and replace the others to ...
I don't really know how to commence the code, I made something like
var str="15.Prototypal-Inheritance-and-Refactoring-the-Slider.txt";
str.split("// ",1);
although this gives me what I need, how do I have the results base on letters not words.

You can use str.slice().
function middleEllipsis(str, a, b) {
if (str.length > a + b)
return str.slice(0, a) + '...' + str.slice(-b);
else
return str;
}
middleEllipsis("15.Prototypal-Inheritance-and-Refactoring-the-Slider.txt", 12, 10);
// "15.Prototypa...Slider.txt"
middleEllipsis("mpchc64.mov", 12, 10);
// "mpchc64.mov"

This function will do what you ask for:
function fixString(str) {
var LEN_PREFIX = 12;
var LEN_SUFFIX = 10;
if (str.length < LEN_PREFIX + LEN_SUFFIX) { return str; }
return str.substr(0, LEN_PREFIX) + '...' + str.substr(str.length - LEN_SUFFIX - 1);
}
You can adjust the LEN_PREFIX and LEN_SUFFIX as needed, but I've the values you specified in your post. You could also make the function more generic by making the prefix and suffix length input arguments to your function:
function fixString(str, prefixLength, suffixLength) {
if (str.length < prefixLength + suffixLength) { return str; }
return str.substr(0, prefixLength) + '...' + str.substr(str.length - suffixLength - 1);
}

I'd like to make it looks like 15.Prototypal...-Slider.txt
LIVE DEMO
No matter how long are the suffixed and prefixed texts, this will get the desired:
var str = "15.Prototypal-Inheritance-and-Refactoring-the-Slider.txt",
sp = str.split('-'),
newStr = str;
if(sp.length>1) newStr = sp[0]+'...-'+ sp.pop() ;
alert( newStr ); //15.Prototypal...-Slider.txt
Splitting the string at - and using .pop() method to retrieve the last Array value from the splitted String.
Instead of splitting the string at some defined positions it'll also handle strings like:
11.jQuery-infinite-loop-a-Gallery.txt returning: 11.jQuery...-Gallery.txt

Here's another option. Note that this keeps the first 13 characters and last 11 because that's what you gave in your example.:
var shortenedStr = str.substr(0, 13) + '...' + str.substring(str.length - 11);

You could use the javascript substring command to find out what you want.
If you string is always 56 characters you could do something like this:
var str="15.Prototypal-Inheritance-and-Refactoring-the-Slider.txt";
var newstr = str.substring(0,11) + "..." + str.substring(45,55)
if your string varies in length I would highly recommend finding the length of the string first, and then doing the substring.
have a look at: http://www.w3schools.com/jsref/jsref_substring.asp

How to change date value to Unicode with Javascript?

How can I convert a month and year value into Unicode using Javascript? For instance, "6, 2013" would become "\u1046, \u1042\u1040\u1041\u1043" (Myanmar text).
Patrick pointed me to Codepen where I got this:
<script type="text/javascript">
var d = new Date();
var mn = (d.getMonth()+1);
var dn = d.getDate();
var yn = d.getFullYear();
var toMyanmar = function (string) {
var unicodeString = '';
for (var i=0; i < string.length; i++) {
var char = string[i];
if (parseInt(char),16) {
char = String.fromCharCode(string.charCodeAt(i) + 4112)
}
unicodeString += char;
}
return unicodeString;
}
document.write("<br/>Myanmar Date: ");
document.write(toMyanmar(''+ mn) + ', ');
document.write(toMyanmar(''+ dn) + ', ');
document.write(toMyanmar(''+ yn));
document.write("<br/>Date; Latin/Arabic numerals: " + mn + ', ' + dn + ', ' + yn);
</script>
<body style="font-family:'Myanmar Text',Arial;"></body>
Zero (0; u1040, #4160) was ignored by this script until I moved the radix number to the same line as "parseInt". (mmrtext.ttf or similar Unicode font is needed to see characters displayed.)
So, this now works well for my purpose. And I think will be be the basis for future transcoding of character pages since Myanmar-related languages use many different sets.
Thanks a million until you are better paid.
R. Holland

Check out http://buildingonmud.blogspot.com/2009/06/convert-string-to-unicode-in-javascript.html
You will have to add some checking if you only want to encode numbers (run each element through parseInt most likely), since it would encode the command and the spaces as well.

How to trim a string to N chars in Javascript?

How can I, using Javascript, make a function that will trim string passed as argument, to a specified length, also passed as argument. For example:
var string = "this is a string";
var length = 6;
var trimmedString = trimFunction(length, string);
// trimmedString should be:
// "this is"
Anyone got ideas? I've heard something about using substring, but didn't quite understand.

Why not just use substring... string.substring(0, 7); The first argument (0) is the starting point. The second argument (7) is the ending point (exclusive). More info here.
var string = "this is a string";
var length = 7;
var trimmedString = string.substring(0, length);

Copying Will's comment into an answer, because I found it useful:
var string = "this is a string";
var length = 20;
var trimmedString = string.length > length ?
string.substring(0, length - 3) + "..." :
string;
Thanks Will.
And a jsfiddle for anyone who cares https://jsfiddle.net/t354gw7e/ :)

I suggest to use an extension for code neatness.
Note that extending an internal object prototype could potentially mess with libraries that depend on them.
String.prototype.trimEllip = function (length) {
return this.length > length ? this.substring(0, length) + "..." : this;
}
And use it like:
var stringObject= 'this is a verrrryyyyyyyyyyyyyyyyyyyyyyyyyyyyylllooooooooooooonggggggggggggsssssssssssssttttttttttrrrrrrrrriiiiiiiiiiinnnnnnnnnnnnggggggggg';
stringObject.trimEllip(25)

https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/String/substr
From link:
string.substr(start[, length])

let trimString = function (string, length) {
return string.length > length ?
string.substring(0, length) + '...' :
string;
};
Use Case,
let string = 'How to trim a string to N chars in Javascript';
trimString(string, 20);
//How to trim a string...

Prefer String.prototype.slice over the String.prototype.substring method (in substring, for some cases it gives a different result than what you expect).
Trim the string from LEFT to RIGHT:
const str = "123456789";
result = str.slice(0,5); // "12345", extracts first 5 characters
result = str.substring(0,5); // "12345"
startIndex > endIndex:
result = str.slice(5,0); // "", empty string
result = str.substring(5,0); // "12345" , swaps start & end indexes => str.substring(0,5)
Trim the string from RIGHT to LEFT: (-ve start index)
result = str.slice(-3); // "789", extracts last 3 characters
result = str.substring(-3); // "123456789" , -ve becomes 0 => str.substring(0)
result = str.substring(str.length - 3); // "789"

Little late... I had to respond. This is the simplest way.
// JavaScript
function fixedSize_JS(value, size) {
return value.padEnd(size).substring(0, size);
}
// JavaScript (Alt)
var fixedSize_JSAlt = function(value, size) {
return value.padEnd(size).substring(0, size);
}
// Prototype (preferred)
String.prototype.fixedSize = function(size) {
return this.padEnd(size).substring(0, size);
}
// Overloaded Prototype
function fixedSize(value, size) {
return value.fixedSize(size);
}
// usage
console.log('Old school JS -> "' + fixedSize_JS('test (30 characters)', 30) + '"');
console.log('Semi-Old school JS -> "' + fixedSize_JSAlt('test (10 characters)', 10) + '"');
console.log('Prototypes (Preferred) -> "' + 'test (25 characters)'.fixedSize(25) + '"');
console.log('Overloaded Prototype (Legacy support) -> "' + fixedSize('test (15 characters)', 15) + '"');
Step by step.
.padEnd - Guarentees the length of the string
"The padEnd() method pads the current string with a given string (repeated, if needed) so that the resulting string reaches a given length. The padding is applied from the end (right) of the current string. The source for this interactive example is stored in a GitHub repository."
source: developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/…
.substring - limits to the length you need
If you choose to add ellipses, append them to the output.
I gave 4 examples of common JavaScript usages. I highly recommend using the String prototype with Overloading for legacy support. It makes it much easier to implement and change later.

Just another suggestion, removing any trailing white-space
limitStrLength = (text, max_length) => {
if(text.length > max_length - 3){
return text.substring(0, max_length).trimEnd() + "..."
}
else{
return text
}

There are several ways to do achieve this
let description = "your test description your test description your test description";
let finalDesc = shortMe(description, length);
function finalDesc(str, length){
// return str.slice(0,length);
// return str.substr(0, length);
// return str.substring(0, length);
}
You can also modify this function to get in between strings as well.

Here is my solution, which includes trimming white space too.
const trimToN = (text, maxLength, dotCount) => {
let modText = text.trim();
if (modText.length > maxLength) {
modText = text.substring(0, maxLength - dotCount);
modText = modText.padEnd(maxLength, ".");
return modText;
}
return text;
};
trimToN('Javascript', 6, 2) will return "Java.."

I think that you should use this code :-)
// sample string
const param= "Hi you know anybody like pizaa";
// You can change limit parameter(up to you)
const checkTitle = (str, limit = 17) => {
var newTitle = [];
if (param.length >= limit) {
param.split(" ").reduce((acc, cur) => {
if (acc + cur.length <= limit) {
newTitle.push(cur);
}
return acc + cur.length;
}, 0);
return `${newTitle.join(" ")} ...`;
}
return param;
};
console.log(checkTitle(str));
// result : Hi you know anybody ...

String that contains all ascii characters

I want to create a string in JavaScript that contains all ascii characters. How can I do this?

var s = ' !"#$%&\'()*+,-./0123456789:;<=>?#ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';

My javascript is a bit rusty, but something like this:
s = '';
for( var i = 32; i <= 126; i++ )
{
s += String.fromCharCode( i );
}
Not sure if the range is correct though.
Edit:
Seems it should be 32 to 127 then. Adjusted.
Edit 2:
Since char 127 isn't a printable character either, we'll have to narrow it down to 32 <= c <= 126, in stead of 32 <= c <= 127.

Just loop the character codes and convert each to a character:
var s = '';
for (var i=32; i<=127;i++) s += String.fromCharCode(i);

Just wanted to put this here for reference. (takes about 13/100 to 26/100 of a ms on my computer to generate).
var allAsciiPrintables = JSON.stringify((Array.from(Array(126 + 32).keys()).slice(32).map((item) => {
return String.fromCharCode(item);
})).join(''));
Decomposed:
var allAsciiPrintables = (function() {
/* ArrayIterator */
var result = Array(126 + 32).keys();
/* [0, 126 + 32] */
result = Array.from(result);
/* [32, 126 + 32] */
result = result.slice(32);
/* transform each item from Number to its ASCII as String. */
result = result.map((item) => {
return String.fromCharCode(item);
});
/* convert from array of each string[1] to a single string */
result = result.join('');
/* create an escaped string so you can replace this code with the string
to avoid having to calculate this on each time the program runs */
result = JSON.stringify(result);
/* return the string */
return result;
})();
The most efficient solution(if you do want to generate the whole set each time the script runs, is probably)(takes around 3/100-35/100 of a millisecond on my computer to generate).
var allAsciiPrintables = (() => {
var result = new Array(126-32);
for (var i = 32; i <= 126; ++i) {
result[i - 32] = (String.fromCharCode(i));
}
return JSON.stringify(result.join(''));
})();
strangely, this is only 3-10 times slower than assigning the string literal directly(with backticks to tell javascript to avoid most backslash parsing).
var x;
var t;
t = performance.now();
x = '!\"#$%&\'()*+,-./0123456789:;<=>?#ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
t = performance.now() - t;
console.log(t);
.

This is a version written in python. Gives all ASCII characters in order as a single string.
all_ascii = ''.join(chr(k) for k in range(128)) # 7 bits
all_chars = ''.join(chr(k) for k in range(256)) # 8 bits
printable_ascii = ''.join(chr(k) for k in range(128) if len(repr(chr(k))) == 3)
>>> print(printable_ascii)
' !"#$%&\'()*+,-./0123456789:;<=>?#ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~'
The last string here, printable_ascii contains only those characters that contain no escapes (i.e. have length == 1). The chars like: \x05, \x06 or \t, \n which does not have its own glyph in your system's font, are filtered out.
len(repr(chr(k))) == 3 includes 2 quotes that come from repr call.

Without doing several appends:
var s = Array.apply(null, Array(127-32))
.map(function(x,i) {
return String.fromCharCode(i+32);
}).join("");
document.write(s);

Here is an ES6 one liner:
asciiChars = Array.from({ length: 95 }, (e, i) => String.fromCharCode(i + 32)).join('');
console.log(asciiChars)

let str = '';// empty string declear
for( var i = 32; i <= 126; i++ )
{
str = str + String.fromCharCode( i ); /* this method received one integer and
convert it into a ascii characters and store it str variable one by one by using
string concatenation method. The loop start for 32 and end 126 */
}

Here is a version in coffeescript
require 'fluentnode'
all_Ascii = ->
(String.fromCharCode(c) for c in [0..255])
describe 'all Ascii', ->
it 'all_Ascii', ->
all_Ascii.assert_Is_Function()
all_Ascii().assert_Size_Is 256
all_Ascii()[0x41].assert_Is 'A'
all_Ascii()[66 ].assert_Is 'B'
all_Ascii()[50 ].assert_Is '2'
all_Ascii()[150 ].assert_Is String.fromCharCode(150)

We Keep Coding

JavaScript is the programming language of the Web.

JavaScript: create a string or char from an UTF-8 value - javascript

var hex = "c5"; String.fromCharCode(parseInt(hex, 16)); you have to use c5, not c3 85 ref: http://rishida.net/tools/conversion/ Lear more about code point and code unit http://en.wikipedia.org/wiki/Code_point http://www.coderanch.com/t/416952/java/java/Unicode-code-unit-Unicode-code

Related

Make a utf-8 string shorter with a utf-32 encoding in Javascript?

JavaScript split and join

How to change date value to Unicode with Javascript?

How to trim a string to N chars in Javascript?

String that contains all ascii characters

Categories

Resources