I am starting to learn javascript and d3.js (version 3.3.3). I need to read data from files with a format which is neither csv nor tsv. I could probably use d3.dsv.parseRows, but I'm kind of stuck with it - I'd really appreciate some help to get started, an example would be great.
The data format is ASCII, two columns of numbers separated by an unknown number of whitespace characters (tabs or spaces). The comment character is #.
# Example
# The data is obviously poorly aligned
# The two values in each row are separated
# by one or more tabs and/or spaces
#
1.0 10.00
2.0 20
3.0 30. # this data line should be read
# 4.0 40.0 # this data line should be ignored
5.0 50.00
I need to have the data in an array of arrays of numbers, so I can go on with some nice d3 plotting:
[ [1.0,10.0], [2.0,20.0], [3.0,30.0], [5.0,50.0] ]
It sound like you'l have to write your own request, I've made a start on one with a couple of notes below, you'll need to finish it off though ...
var dsvFile = new XMLHttpRequest();
dsvFile.open("GET", "dsv.txt", true);
var req = new XMLHttpRequest();
req.onreadystatechange = function () {
if (req.readyState == 4) {
if (req.status === 200 ||
req.status === 0) {
var data = req.responseText;
cleanData(data)
}
}
};
req.open('GET', "dsv.txt", true);
req.send(null);
var cleanData = function(data) {
var clean = [];
var lines = data.split("\n");
for (var i = 0; i < lines.length; i++) {
var comment = /^#/
var whiteSpace = /^\s+/
var OK = comment.exec(lines[i]) // check for comment lines
var white = whiteSpace.exec(lines[i]) // check white whitespace at begining of line
if(!OK) // if no comments then
{
if(white) // if whitespace at begining of line remove
{
var str = lines[i].replace(whiteSpace, '')
}
else
{
var str = lines[i]
}
clean.push(str)
}
};
console.log(clean)
};
To process data files with multiple spaces (and maybe more complicated pattern),
d3.text can be combined with a regex and then d3.csvParseRows
var a = [];
d3.text("http://cdsarc.u-strasbg.fr/ftp/J/AJ/159/187/table3.dat")
.then(function(d){a = d3.csvParseRows(d.replace(/ +/g, ","))})
Note due to CORS, urls will work only for some webpages.
Related
here is the regex demo
the REGULAR EXPRESSION
getObj\("Frm_Logintoken"\).value = "(.*)";
this the TEST STRING
getObj("Frm_Logintoken").value = "3";
i want to get that number only "3" without quotes
it's in the Group 1 of the matches but i don't know how to get it from that group .
i can't var myString = "something format_abc";
because i am doing this to get the value that i don't know !!
And testing this in console results
var test = /getObj("Frm_Logintoken").value = "(.*)";/g
undefined
console.log(test1);
undefined
undefined
the same question but in a different way and detailed still unanswered
i have tried
getObj\("Frm_Logintoken"\).value = "(.*)";`.match(/getObj\("Frm_Logintoken"\).value = "(.*)";/)[1]
it give me this "(.*)" not the wanted value !!!
some notes
1-that value isn't static
2- i want to make the code works automatic so fetching the line "getObj("Frm_Logintoken").value = "3";"
from the page code manually is unwanted thing.
3- i want to make an auto login script without any User intervention.
4- if you still don't understand the question see the links pls
thanks
You can access group by accessing index of matched value
let str = `getObj("Frm_Logintoken").value = "3";`
let op = str.match(/getObj\("Frm_Logintoken"\).value = "(.*)";/)
console.log(op[1])
you must declare the string first !
so if you are trying to get the value from the current page html code you can just
let str = document.body.innerHTML;
let pattern =/\bgetObj\("Frm_Logintoken"\)\.value = "([^"]+)";/;
console.log(str.match(pattern)[1]);
and if you are trying to fetch the html string from other page using something like XMLHttpRequest
you can do this
let str = (http.responseText);
the full code :
const http = new XMLHttpRequest();
const url = 'http://page/';
http.open('get', url, false);
http.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');
http.onreadystatechange = function ()
{
if(http.readyState === 4)
{
if(http.status === 200 || http.status == 0)
{
let str = (http.responseText);
let pattern =/\bgetObj\("Frm_Logintoken"\)\.value = "([^"]+)";/;
let results = str.match(pattern)[1];
console.log(results);
}
}
}
http.send();
Hope you understand and make a Clearer question next time and write your really point of the question and the use of the wanted fix .
I need to get an array buffer from an http request sending me a base64 answer.
For this request, I can't use XMLHttpRequest.responseType="arraybuffer".
The response I get from this request is read through xhr.responseText. Hence it's encoded as a DOMString. I'm trying to get it back as an array buffer.
I've tried to go back to the base64 from the DOMString using btoa(mysString) or window.btoa(unescape(encodeURIComponent(str))) but the first option just fails, whereas the second option doesn't give the same base64. Example of the first few characters from each base64:
Incoming : UEsDBBQACAgIACp750oAAAAAAAAAAAAAAAALAAAAX3JlbHMvLnJlbH
After the second processing: UEsDBBQACAgIAO+/ve+/ve+/vUoAAAAAAAAAAAAAAAALAAAAX3JlbHMvLnJlbH
As you can see a part of it is similar, but some parts are way off.
What am I missing to get it right?
I have got same issue too.
The solution (I ran at Chrome(68.0.3440.84))
let url = ''
let iso_8859_15_table = { 338: 188, 339: 189, 352: 166, 353: 168, 376: 190, 381: 180, 382: 184, 8364: 164 }
function iso_8859_15_to_uint8array(iso_8859_15_str) {
let buf = new ArrayBuffer(iso_8859_15_str.length);
let bufView = new Uint8Array(buf);
for (let i = 0, strLen = iso_8859_15_str.length; i < strLen; i++) {
let octet = iso_8859_15_str.charCodeAt(i);
if (iso_8859_15_table.hasOwnProperty(octet))
octet = iso_8859_15_table[octet]
bufView[i] = octet;
if(octet < 0 || 255 < octet)
console.error(`invalid data error`)
}
return bufView
}
req = new XMLHttpRequest();
req.overrideMimeType('text/plain; charset=ISO-8859-15');
req.onload = () => {
console.log(`Uint8Array : `)
var uint8array = iso_8859_15_to_uint8array(req.responseText)
console.log(uint8array)
}
req.open("get", url);
req.send();
Below is explanation what I learned to solve it.
Explanation
Why some parts are way off?
because TextDecoder cause data loss (Your case is utf-8).
For example, let's talk about UTF-8
variable width character encoding for Unicode.
It has rules(This will become problem.) for reasons such as variable length characteristics and ASCII compatibility, etc.
so, decoder may replace a non-conforming characters to replacement character such as U+003F(?, Question mark) or U+FFFD(�, Unicode replacement character).
in utf-8 case, 0~127 of values are stable, 128~255 of values are unstable. 128~255 will converted to U+FFFD
Are other Text Decoders safe except UTF-8?
No. In most cases, not safe from rules.
UTF-8 is also unrecoverable. (128~255 are set to U+FFFD)
If the binary data and the decoded result can be corresponded to one-to-one, they can be recovered.
How to solve it?
Finds recoverable Text Decoders.
Force MIME type to recoverable charset of the incoming data.
xhr_object.overrideMimeType('text/plain; charset=ISO-8859-15')
Recover binary data from string with recover table when received.
Finds recoverable Text Decoders.
To recover, avoid the situation when decoded results' are duplicated.
The following code is a simple example, so there may be missing recoverable text decoders because it only consider Uint8Array.
let bufferView = new Uint8Array(256);
for (let i = 0; i < 256; i++)
bufferView[i] = i;
let recoverable = []
let decoding = ['utf-8', 'ibm866', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5', 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-8i', 'iso-8859-10', 'iso-8859-13', 'iso-8859-14', 'iso-8859-15', 'iso-8859-16', 'koi8-r', 'koi8-u', 'macintosh', 'windows-874', 'windows-1250', 'windows-1251', 'windows-1252', 'windows-1253', 'windows-1254', 'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258', 'x-mac-cyrillic', 'gbk', 'gb18030', 'hz-gb-2312', 'big5', 'euc-jp', 'iso-2022-jp', 'shift-jis', 'euc-kr', 'iso-2022-kr', 'utf-16be', 'utf-16le', 'x-user-defined', 'ISO-2022-CN', 'ISO-2022-CN-ext']
for (let dec of decoding) {
try {
let decodedText = new TextDecoder(dec).decode(bufferView);
let loss = 0
let recoverTable = {}
let unrecoverable = 0
for (let i = 0; i < decodedText.length; i++) {
let charCode = decodedText.charCodeAt(i)
if (charCode != i)
loss++
if (!recoverTable[charCode])
recoverTable[charCode] = i
else
unrecoverable++
}
let tableCnt = 0
for (let props in recoverTable) {
tableCnt++
}
if (tableCnt == 256 && unrecoverable == 0){
recoverable.push(dec)
setTimeout(()=>{
console.log(`[${dec}] : err(${loss}/${decodedText.length}, ${Math.round(loss / decodedText.length * 100)}%) alive(${tableCnt}) unrecoverable(${unrecoverable})`)
},10)
}
else {
console.log(`!! [${dec}] : err(${loss}/${decodedText.length}, ${Math.round(loss / decodedText.length * 100)}%) alive(${tableCnt}) unrecoverable(${unrecoverable})`)
}
} catch (e) {
console.log(`!! [${dec}] : not supported.`)
}
}
setTimeout(()=>{
console.log(`recoverable Charset : ${recoverable}`)
}, 10)
In my console, this return
recoverable Charset : ibm866,iso-8859-2,iso-8859-4,iso-8859-5,iso-8859-10,iso-8859-13,iso-8859-14,iso-8859-15,iso-8859-16,koi8-r,koi8-u,macintosh,windows-1250,windows-1251,windows-1252,windows-1254,windows-1256,windows-1258,x-mac-cyrillic,x-user-defined
And I used iso-8859-15 at beginning of this answer. (It has Smallest table size.)
Additional test) Comparison between UTF-8's and ISO-8859-15's result
Check U+FFFD is really disappeared when using ISO-8859-15.
function requestAjax(url, charset) {
let req = new XMLHttpRequest();
if (charset)
req.overrideMimeType(`text/plain; charset=${charset}`);
else
charset = 'utf-8';
req.open('get', url);
req.onload = () => {
console.log(`==========\n${charset}`)
console.log(`${req.responseText.split('', 50)}\n==========`);
console.log('\n')
}
req.send();
}
var url = '';
requestAjax(url, 'ISO-8859-15');
requestAjax(url);
Bottom line
Recover binary data to, from string needs some additional job.
Find recoverable text encoder/decoder.
Make a recover table
Recover with the table.
(You can refer to the very top of code.)
For use this trick, force MIME type of incoming data to desired charset.
it's my first question here. I tried to find an answer but couldn't, honestly, figure out which terms should I use, so sorry if it has been asked before.
Here it goes:
I have thousands of records in a .txt file, in this format:
(1, 3, 2, 1, 'John (Finances)'),
(2, 7, 2, 1, 'Mary Jane'),
(3, 7, 3, 2, 'Gerald (Janitor), Broflowski'),
... and so on. The first value is the PK, the other 3 are Foreign Keys, the 5th is a string.
I need to parse them as JSON (or something) in Javascript, but I'm having troubles because some strings have parentheses+comma (on 3rd record, "Janitor", e.g.), so I can't use substring... maybe trimming the right part, but I was wondering if there is some smarter way to parse it.
Any help would be really appreciated.
Thanks!
You can't (read probably shouldn't) use a regular expression for this. What if the parentheses contain another pair or one is mismatched?
The good news is that you can easily construct a tokenizer/parser for this.
The idea is to keep track of your current state and act accordingly.
Here is a sketch for a parser I've just written here, the point is to show you the general idea. Let me know if you have any conceptual questions about it.
It works demo here but I beg you not to use it in production before understanding and patching it.
How it works
So, how do we build a parser:
var State = { // remember which state the parser is at.
BeforeRecord:0, // at the (
DuringInts:1, // at one of the integers
DuringString:2, // reading the name string
AfterRecord:3 // after the )
};
We'll need to keep track of the output, and the current working object since we'll parse these one at a time.
var records = []; // to contain the results
var state = State.BeforeRecord;
Now, we iterate the string, keep progressing in it and read the next character
for(var i = 0;i < input.length; i++){
if(state === State.BeforeRecord){
// handle logic when in (
}
...
if(state === State.AfterRecord){
// handle that state
}
}
Now, all that's left is to consume it into the object at each state:
If it's at ( we start parsing and skip any whitespaces
Read all the integers and ditch the ,
After four integers, read the string from ' to the next ' reaching the end of it
After the string, read until the ) , store the object, and start the cycle again.
The implementation is not very difficult too.
The parser
var State = { // keep track of the state
BeforeRecord:0,
DuringInts:1,
DuringString:2,
AfterRecord:3
};
var records = []; // to contain the results
var state = State.BeforeRecord;
var input = " (1, 3, 2, 1, 'John (Finances)'), (2, 7, 2, 1, 'Mary Jane'), (3, 7, 3, 2, 'Gerald (Janitor), Broflowski')," // sample input
var workingRecord = {}; // what we're reading into.
for(var i = 0;i < input.length; i++){
var token = input[i]; // read the current input
if(state === State.BeforeRecord){ // before reading a record
if(token === ' ') continue; // ignore whitespaces between records
if(token === '('){ state = State.DuringInts; continue; }
throw new Error("Expected ( before new record");
}
if(state === State.DuringInts){
if(token === ' ') continue; // ignore whitespace
for(var j = 0; j < 4; j++){
if(token === ' ') {token = input[++i]; j--; continue;} // ignore whitespace
var curNum = '';
while(token != ","){
if(!/[0-9]/.test(token)) throw new Error("Expected number, got " + token);
curNum += token;
token = input[++i]; // get the next token
}
workingRecord[j] = Number(curNum); // set the data on the record
token = input[++i]; // remove the comma
}
state = State.DuringString;
continue; // progress the loop
}
if(state === State.DuringString){
if(token === ' ') continue; // skip whitespace
if(token === "'"){
var str = "";
token = input[++i];
var lenGuard = 1000;
while(token !== "'"){
str+=token;
if(lenGuard-- === 0) throw new Error("Error, string length bounded by 1000");
token = input[++i];
}
workingRecord.str = str;
token = input[++i]; // remove )
state = State.AfterRecord;
continue;
}
}
if(state === State.AfterRecord){
if(token === ' ') continue; // ignore whitespace
if(token === ',') { // got the "," between records
state = State.BeforeRecord;
records.push(workingRecord);
workingRecord = {}; // new record;
continue;
}
throw new Error("Invalid token found " + token);
}
}
console.log(records); // logs [Object, Object, Object]
// each object has four numbers and a string, for example
// records[0][0] is 1, records[0][1] is 3 and so on,
// records[0].str is "John (Finances)"
I echo Ben's sentiments about regular expressions usually being bad for this, and I completely agree with him that tokenizers are the best tool here.
However, given a few caveats, you can use a regular expression here. This is because any ambiguities in your (, ), , and ' can be attributed (AFAIK) to your final column; as all of the other columns will always be integers.
So, given:
The input is perfectly formed (with no unexpected (, ), , or ').
Each record is on a new line, per your edit
The only new lines in your input will be to break to the next record
... the following should work (Note "new lines" here are \n. If they're \r\n, change them accordingly):
var input = /* Your input */;
var output = input.split(/\n/g).map(function (cols) {
cols = cols.match(/^\((\d+), (\d+), (\d+), (\d+), '(.*)'\)/).slice(1);
return cols.slice(0, 4).map(Number).concat(cols[4]);
});
The code splits on new lines, then goes through row by row and splits into cells using a regular expression, which greedily attributes as much as it can to the final cell. It then turns the first 4 elements into integers, and sticks the 5th element (the string) onto the end.
This gives you an array of records, where each record is itself an array. The first 4 elements are your PK's (as integers) and your 5th element is the string.
For example, given your input, use output[0][4] to get "Gerald (Janitor), Broflowski", and output[1][0] to get the first PK 2 for the second record (don't forget JavaScript arrays are zero-indexed).
You can see it working here: http://jsfiddle.net/56ThR/
Another option would be to convert it into something that looks like an Array and eval it. I know it is not recommended to use eval, but it's a cool solution :)
var lines = input.split("\n");
var output = [];
for(var v in lines){
// Remove opening (
lines[v] = lines[v].slice(1);
// Remove closing ) and what is after
lines[v] = lines[v].slice(0, lines[v].lastIndexOf(')'));
output[v] = eval("[" + lines[v] + "]");
}
So, the eval parameter would look like: [1, 3, 2, 1, 'John (Finances)'], which is indeed an Array.
Demo: http://jsfiddle.net/56ThR/3/
And, it can also be written shorter like this:
var lines = input.split("\n");
var output = lines.map( function(el) {
return eval("[" + el.slice(1).slice(0, el.lastIndexOf(')') - 1) + "]");
});
Demo: http://jsfiddle.net/56ThR/4/
You can always do it "manually" :)
var lines = input.split("\n");
var output = [];
for(var v in lines){
output[v] = [];
// Remove opening (
lines[v] = lines[v].slice(1);
// Get integers
for(var i = 0; i < 4; ++i){
var pos = lines[v].indexOf(',');
output[v][i] = parseInt(lines[v].slice(0, pos));
lines[v] = lines[v].slice(pos+1);
}
// Get string betwen apostrophes
lines[v] = lines[v].slice(lines[v].indexOf("'") + 1);
output[v][4] = lines[v].slice(0, lines[v].indexOf("'"));
}
Demo: http://jsfiddle.net/56ThR/2/
What you have here is basically a csv (comma separated value) file which you wish to parse.
The easiest way would be to use an wxternal library that will take care of most of the issues you have
Example: jquery csv library is a good one. https://code.google.com/p/jquery-csv/
I have a lot of geo's which I want to use in webgl globe.
The format for Webgl is
Sample for Googles .json file in their working webgl globe source [["1993",[long, lat,weight,long, lat,weight],["1994",[long, lat,weight,long, lat,weight,long, lat,weight,long, lat,weight]]]
I've been looking for a way to convert this but i can't find a converter online.
Does anyone know where I can find a converter for this format or suggest a way to do this.
Sample of my data:
- Year Latitude Longitude Magnitude
- 1995 -82.8627519 -135 0.11
- 1995 -54.423199 3.413194 0.01
- 1994 -53.08181 73.504158 0.01
- 1994 -51.796253 -59.523613 0.04
- 1993 -49.280366 69.348557 0.02
- 1993 -41.4370868 147.1393767 0.18
Looking at this more, I think the json file Google are using is a nested json array of arrays. This
There are multiple ways to parse the data.
The first step is to save the data to a file.
For example:
Year Latitude Longitude Magnitude
1995 -82.8627519 -135 0.11
1995 -54.423199 3.413194 0.01
1994 -53.08181 73.504158 0.01
1994 -51.796253 -59.523613 0.04
1993 -49.280366 69.348557 0.02
1993 -41.4370868 147.1393767 0.18
in raw.txt
The second step is to load and parse data.
With parsing there are a couple of things to keep in mind:
The raw data has an inconsistent number of white spaces separating values, so those need to be collapsed first so we can split a row/line by the space character. Luckily the data doesn't contains names containing spaces so we can use a RegEx like so /\s{2,}/g
We want to gather all the data pertaining to one year into a single list. One way is to use an array and continuously check if it has the year value already. Another is to simply use an Object/associative array/dictionary and not worry about any checks.
Once the data is correctly gathered in a object we pop it into a an array so it matches the format of the data being used.
Here's what I mean:
xhr = new XMLHttpRequest();
xhr.open('GET', '/globe/raw.txt', true);
xhr.onreadystatechange = function(e) {
if (xhr.readyState === 4) {
if (xhr.status === 200) {
var lines = xhr.responseText.split("\n");//split .txt file into lines
var data = [];//prepare an array to hold the end result
var dict = {};//use an Object/Dictionary to collapse data from same key/year
for(var i = 1 ; i < lines.length; i++){//for each line
var line = lines[i].replace(/\s{2,}/g, ' ').split(' ');//collapse white spaces and split into an array of values
if( !dict[line[0]]) dict[line[0]] = [];//if there isn't an array to store that data yes, make one
dict[line[0]].push(parseFloat(line[1]));//append data into the coresponding key/year
dict[line[0]].push(parseFloat(line[2]));
dict[line[0]].push(parseFloat(line[3]));
}
for(var key in dict) data.push([key,dict[key]]);//at the end, loop through the object and populate an array
console.log(data);
}
}
};
xhr.send(null);
so if you use something like this:
xhr = new XMLHttpRequest();
xhr.open('GET', '/globe/raw.txt', true);
xhr.onreadystatechange = function(e) {
if (xhr.readyState === 4) {
if (xhr.status === 200) {
var lines = xhr.responseText.split("\n");//split .txt file into lines
var data = [];//prepare an array to hold the end result
var dict = {};//use an Object/Dictionary to collapse data from same key/year
for(var i = 1 ; i < lines.length; i++){//for each line
var line = lines[i].replace(/\s{2,}/g, ' ').split(' ');//collapse white spaces and split into an array of values
if( !dict[line[0]]) dict[line[0]] = [];//if there isn't an array to store that data yes, make one
dict[line[0]].push(parseFloat(line[1]));//append data into the coresponding key/year
dict[line[0]].push(parseFloat(line[2]));
dict[line[0]].push(parseFloat(line[3]));
}
for(var key in dict) data.push([key,dict[key]]);//at the end, loop through the object and populate an array
window.data = data;
for (i=0;i<data.length;i++) {
globe.addData(data[i][1], {format: 'magnitude', name: data[i][0], animated: true});
}
globe.createPoints();
settime(globe,0)();
globe.animate();
}
}
};
xhr.send(null);
in the WebGL Globe experiment running on a server you will see your data.
I have this piece of javascript code
var file = Components.classes["#mozilla.org/file/local;1"]
.createInstance(Components.interfaces.nsILocalFile);
file.initWithPath( this.savefile );
if ( file.exists() == false ) {
return null;
}
var is = Components.classes["#mozilla.org/network/file-input-stream;1"]
.createInstance( Components.interfaces.nsIFileInputStream );
is.init( file,0x01, 00004, null);
var sis = Components.classes["#mozilla.org/scriptableinputstream;1"]
.createInstance( Components.interfaces.nsIScriptableInputStream );
sis.init( is );
output = sis.read( sis.available() );
sis.close();
is.close();
this.filterData = output;
return output;
Actually the file that i am reading is a binary file and has lets say 350 bytes.
Now the 19 byte is "zero", so what happens is in the above code i get only 18 bytes in output.
when i tried debugging sis.available does return 350. But sis.read only reads upto Zero byte.
I want the way to read whole of 350 bytes in output.
EDIT
See https://developer.mozilla.org/en-US/docs/Reading_textual_data
Quote:
var charset = /* Need to find out what the character encoding is. Using UTF-8 for this example: */ "UTF-8";
var is = Components.classes["#mozilla.org/intl/converter-input-stream;1"]
.createInstance(Components.interfaces.nsIConverterInputStream);
// This assumes that fis is the nsIInputStream you want to read from
is.init(fis, charset, 1024, 0xFFFD);
is.QueryInterface(Components.interfaces.nsIUnicharLineInputStream);
if (is instanceof Components.interfaces.nsIUnicharLineInputStream) {
var line = {};
var cont;
do {
cont = is.readLine(line);
// Now you can do something with line.value
} while (cont);
}
This avoids the null byte problems, is unicode safe, and works with less esoteric object types.
Original:
As per my comment above, and in light of your edit,
See https://developer.mozilla.org/en-US/docs/XPCOM_Interface_Reference/nsIScriptableInputStream where read() comes with the warning: If the data contains a null byte, then this method will return a truncated string. You may want to use readBytes() instead.
Alternatively, here's another way to do it:
var ph = Components.classes["#mozilla.org/network/protocol;1?name=file"]
.createInstance(Components.interfaces.nsIFileProtocolHandler);
var file_to_read = ph.getURLSpecFromFile(file);
var req = new XMLHttpRequest();
req.onerror = function(e) {
onError(e);
}
req.onreadystatechange = function() {
if (log.readyState == 4) {
//...
}
}
req.open("GET", file_to_read, true);
I may be wrong, but have you tried sending a simple GET request? In AJAX? Or do you strictly want to use JS?
EDIT:
Refer to this - How do I load the contents of a text file into a javascript variable?