My goal is from an xlsx file that loads the client, represent the variables using Leaflet.
When I give the absolute path of the example that I have on my server everything is fine, the problem comes when I try to give the file from <input type = "file">.
From what I have been able to read in other questions, a "fakepath" is created for security reasons, but this means that I can not access the file that the client leaves in the input.
I'm new to programming and I'm pretty lost in terms of security and this stuff. If someone had a solution, I would be very grateful.
HTML
<input type="file" accept="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" />
JS
/* DATA --- Read xlsx file (foreign code - PLUGIN xlsx.full.min.js ) */
function readXlsxFile(url, sheet, cb) {
var oReq = new XMLHttpRequest();
oReq.open("GET", url, true);
oReq.responseType = "arraybuffer";
oReq.onerror = function() {
cb(new Error("Could not load file " + url));
}
oReq.onload = function() {
var arraybuffer = oReq.response;
// Convertir datos a binario
var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var bstr = arr.join("");
var workbook = XLSX.read(bstr, { type: "binary" });
var sheetName = workbook.SheetNames[sheet];
var worksheet = workbook.Sheets[sheetName];
var result = XLSX.utils.sheet_to_json(worksheet, { raw: true });
cb(null, result);
}
oReq.send();
};
function readXlsxFileSheet(url, cb) {
const rateFile = serializeMapFormValues().rate
if (rateFile === "travelers" || rateFile === "travelersPoblation" || rateFile === "travelersForeignPercent" || rateFile === "travelersNatPercent") {
readXlsxFile(url, 0, cb);
} else if (rateFile === "ruralTravelers" || rateFile === "ruralForeignPercent" || rateFile === "ruralPercent" || rateFile === "ruralNatPercent") {
readXlsxFile(url, 1, cb);
} else if (rateFile === "apartTravelers" || rateFile === "apartPoblation" || rateFile === "apartForeignPercent" || rateFile === "apartNatPercent") {
readXlsxFile(url, 2, cb);
} else if (rateFile === "hotelTravelers" || rateFile === "hotelForeignPercent" || rateFile === "hotelNatPercent") {
readXlsxFile(url, 3, cb);
} else {
console.log("Lost variable for sheet");
}
};
const submitButton = document.querySelector("input[type=submit]");
submitButton.addEventListener("click", function(ev) {
ev.preventDefault();
//Catch path and variable from the HTML form
let fileUrl = serializeMapFormValues().url;
let fileRow = serializeMapFormValues().rate;
// Update map data
readXlsxFileSheet(fileUrl, function(error, data) {
// code
});
});
If I change fileUrl ("C:\fakepath\turismo_ejemplo_resuelto.xlsx") for "files/turismo_ejemplo_resuelto.xlsx" everything is fine.
Related
There is a form that a user will use to upload an excel file to. From there, in JavaScript, I am able to pull the data (this includes if excel file has multiple sheets) and store it in arrays (columnA, columnB, etc.). However, I am not sure how to write to an existing excel file that is on the server with the data stored in the arrays. Any help would be appreciated.
The following is what I have so far:
let selectedFile, fileName;
document.getElementById('input-excel').addEventListener("change", (event) => {
selectedFile = event.target.files[0];
fileName = event.target.files[0].name;
})
let data=[{
"name":"jayanth",
"data":"scd",
"abc":"sdef"
}]
document.getElementById('button').addEventListener("click", () => {
XLSX.utils.json_to_sheet(data, selectedFile);
if(fileName.includes("Excel FileName")){
let fileReader = new FileReader();
fileReader.readAsBinaryString(selectedFile);
fileReader.onload = (event)=>{
let data = event.target.result;
let workbookInputted = XLSX.read(data,{type:"binary"}); //excel file
let residential_worksheet = workbookInputted.Sheets[workbookInputted.SheetNames[0]]; //test1 sheet
let non_residential_worksheet = workbookInputted.Sheets[workbookInputted.SheetNames[1]]; //test2 sheet
let peh_worksheet = workbookInputted.Sheets[workbookInputted.SheetNames[2]]; //test3 sheet
let educational_worksheet = workbookInputted.Sheets[workbookInputted.SheetNames[3]]; //test4 setting sheet
let correct_worksheet = workbookInputted.Sheets[workbookInputted.SheetNames[4]]; //test5 setting sheet
const sheetList = workbookInputted.SheetNames;
const res = WhichSheet(residential_worksheet,sheetList[0]);
const nonRes = WhichSheet(non_residential_worksheet,sheetList[1]);
const peh = WhichSheet(peh_worksheet,sheetList[2]);
const ed = WhichSheet(educational_worksheet,sheetList[3]);
const correct = WhichSheet(correct_worksheet,sheetList[4]);
//console.log(sheetList[3]);
}
}
});
function WhichSheet(worksheet, fileNme){
if(fileName.localeCompare("test")){
ReadColumntoJSData(worksheet, fileNme);
} else if(fileName.includes("test2")){
ReadColumntoJSData(worksheet, fileNme);
} else if(fileName.includes("test3")){
ReadColumntoJSData(worksheet, fileNme);
} else if(fileName.includes("test4")){
ReadColumntoJSData(worksheet, fileNme);
} else if(fileName.includes("test5")){
ReadColumntoJSData(worksheet, fileNme);
}
}
/*==
====== Following method reads each column per sheet. ======
===*/
function ReadColumntoJSData(worksheet, fileNme){
var colA = [];
var colB = [];
var colC = [];
var colD = [];
var colE = [];
var colF = [];
for(let col in worksheet){
if(col.toString()[0] === 'A'){
if(col === "A1" || col === "A2" || col === "A3"){
//do nothing
} else {
colA.push(worksheet[col].v);
}
}
if(col.toString()[0] === 'B'){
if(col === "B1" || col === "B2" || col === "B3"){
//do nothing
} else {
colB.push(worksheet[col].v);
}
}
if(col.toString()[0] === 'C'){
if(col === "C1" || col === "C2" || col === "C3"){
//do nothing
} else {
colC.push(worksheet[col].v);
}
}
if(col.toString()[0] === 'D'){
if(col === "D1" || col === "D2" || col === "D3"){
//do nothing
} else {
colD.push(worksheet[col].v);
}
}
if(col.toString()[0] === 'E'){
if(col === "E1" || col === "E2" || col === "E3"){
//do nothing
} else {
colE.push(worksheet[col].v);
}
}
if(fileNme == "test2" || fileNme == "test5"){
//do nothing
} else{
if(col.toString()[0] === 'F'){
if(col === "F1" || col === "F2" || col === "F3"){
//do nothing
} else {
colF.push(worksheet[col].v);
}
}
}
}
ExportToExcel(colA,colB,colC,colD,colE,colF);
}
function ExportToExcel(colA,colB,colC,colD,colE,colF,fileName){
console.log(colA)
}
UPDATE: I tried the following to write to the existing excel sheet. However, I am getting the following error: Uncaught TypeError: Cannot read properties of undefined (reading 'A3').
var req = new XMLHttpRequest();
req.open("GET", fileURL2, true);
req.responseType = "arraybuffer";
req.onload = function(e) {
var data = new Uint8Array(req.response);
const writeWorkBook = XLSX.read(data, {type:"array"});
const writeWorkSheet = writeWorkBook.Sheets["TEST123"]
console.log("Worksheet: " + JSON.stringify(writeWorkSheet));
writeWorkSheet['A3'].v = colA[0];
}
req.send();
I'm building a small web scraper and I have stumbled into the following problem: my applications needs to scrape different parts of a website and put the information into the database. Sometimes it gives crazy results such as duplicated entries, or it returns undefined from a function getPhoto(). However, if I only call that function (and don't run the rest of the script), it returns a correct result!
I have a for loop, that loops through different URL. It goes to each URL and scrapes the following information: 1. title, 2.description, 3. internal link, 4. calls a function that generates an image according to the title (getPhoto(...) ), 5. saves the results to the DB. Everything happens on the server (I'm using Cron jobs, no client interaction)
for (i = 0; i < AllLinks.length; i++) {
if (AllLinks[i] != undefined && AllLinks[i] != null && sepLink[2] == "www.fly4free.pl") {
var t2 = {
travelTitle: null,
travelTitle2: null,
travelTitle3: null,
travelDescription: null,
travelDescription2: null,
travelDescription3: null,
travelBuy: null,
travelBuy2: null,
travelImage: null
};
var TravelLink1 = AllLinks[i];
result = HTTP.get(AllLinks[i], {});
$ = cheerio.load(result.content);
t2.travelTitle = $('.article__title').text();
t2.travelDescription = $('.article__content').find('p').first().text();
if ($("img[src$='//www.fly4free.pl/wp-content/uploads/2016/09/lotJm.png']").parent().attr('href') != null) {
t2.travelBuy = $("img[src$='//www.fly4free.pl/wp-content/uploads/2016/09/lotJm.png']").parent().attr('href'); // Link to buy
}
if (t2.travelBuy) {
if (t2.travelBuy.split('https://').pop().split('http://').pop() != null) {
t2.travelBuy2 = t2.travelBuy.split('https://').pop().split('http://').pop(); // link ready for DB
} else {
t2.travelBuy2 = t2.travelBuy;
}
}
t2.travelTitle3 = convertCurrencyInText(t2.travelTitle, 'PLN');
t2.travelDescription3 = convertCurrencyInText(t2.travelDescription, 'PLN');
translate(t2.travelTitle3, {from: 'pl', to: 'en'}).then(res => {
t2.travelTitle2 = res.text; // title for DB
if (t2.travelTitle2) { t2.travelImage = getPhoto(t2.travelTitle2); }
translate(t2.travelDescription3, {from: 'pl', to: 'en'}).then(response => {
t2.travelDescription2 = response.text; // description for DB
if (t2.travelDescription2 != null && t2.travelTitle2 != null && t2.travelBuy2 != null && TravelLink1 != null && t2.travelImage != null) {
Links.insert({ title: t2.travelTitle2, description:t2.travelDescription2, image: t2.travelImage, buyLink:t2.travelBuy2, link: TravelLink1, datetime: new Date() });
}
}).catch(err => {
console.error(err);
});
}).catch(err => {
console.error(err);
});
}
}
"AllLinks" contains different URLs. I have problems scraping this URL: http://www.fly4free.pl/na-wakacje-do-toskanii-tanie-loty-do-pizy-z-gdanska-za-170-pln/
getPhoto() function
function getPhoto(title) {
var travelPlace = nlp(title).match('to *').out('text').replace('to','').trim();
if (travelPlace) {var travelPlace2 = travelPlace.split(' '); }
if (travelPlace2) {var travelPlace3 = travelPlace2[0] + "+" + travelPlace2[1]; }
if (travelPlace3) {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+travelPlace3+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
} else if (images.data.totalHits == 0) {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+travelPlace2[0]+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
}
} else if (nlp(title).places().data().length > 0) {
var result = nlp(title).places().data()[0].text.replace(/[^a-zA-Z ]/g, "").trim();
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+result+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
} else {
var title2 = title.replace(/[^a-zA-Z ]/g, "").split(" ");
if (title2) {
for(i = 0; i < title2.length; i++) {
if (cities[title2[i]] == 1) {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+title2[i]+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
} else {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q=travel&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
}
}
}
}
}
I try to console log the results - sometimes I get a correct image from getPhoto(), but an undefined link from t2.travelBuy, sometimes vice versa. Can you tell me what I'm doing wrong? I saw some people are using Promises or async/await functions on that kind of problems. Do you think that would help me? How should I change my code in order to scrape the website without getting "undefined"?
"translate" comes from "google-translate-api" package
you can try var new_func = Meteor.wrapAsync(YOUR FUNCTION THAT HAVE CALLBACK) and the when you use new_func() it will return the result as you would expect from normal function instead of waiting for callback
I'm trying to setup a file upload through rest for large files. The function below is taking care of chunking but I need to be able to recognize the last chunk because my rest call changes to /finishUpload() in order to commit the save.
Right now I'm only able to figure out when the blob is empty but I can't figure out how to determine the last iteration before the blob is empty.
This is the script I'm using below to parse my files.
export default function parseFile(file, options) {
var opts = typeof options === 'undefined' ? {} : options;
var fileSize = file.size;
var chunkSize = typeof opts['chunk_size'] === 'undefined' ? 64 * 1024 : parseInt(opts['chunk_size']);
var binary = typeof opts['binary'] === 'undefined' ? false : opts['binary'] == true;
var offset = 0;
var self = this; // we need a reference to the current object
var readBlock = null;
var chunkReadCallback = typeof opts['chunk_read_callback'] === 'function' ? opts['chunk_read_callback'] : function() {};
var chunkErrorCallback = typeof opts['error_callback'] === 'function' ? opts['error_callback'] : function() {};
var success = typeof opts['success'] === 'function' ? opts['success'] : function() {};
var onLoadHandler = function(evt) {
if (evt.target.result == "") {
console.log('Chunk empty, call finish');
success(file);
return;
}
if (evt.target.error == null) {
chunkReadCallback(evt.target.result, offset).then(function() {
offset += evt.target.result.length;
readBlock(offset, chunkSize, file);
});
} else {
chunkErrorCallback(evt.target.error);
return;
}
if (offset >= fileSize) {
success(file);
return;
}
}
readBlock = function(_offset, _chunkSize, _file) {
var r = new FileReader();
var blob = _file.slice(_offset, _chunkSize + _offset);
console.log("blob size:", blob.size, "offset:", _offset, "C+S:",_chunkSize + _offset)
r.onload = onLoadHandler;
if (binary) {
r.readAsArrayBuffer(blob);
} else {
r.readAsText(blob);
}
}
readBlock(offset, chunkSize, file);
}
Codepen
Why not rely on the filesize, i.e. check the condition _chunkSize + _offset >= fileSize?
You can use progress, loadend events to process File object one byte at a time; define a variable where processing should be paused or stopped at nth byte, every nth byte, or any byte during processing of file.
var str = "abcdefghijklmnopqrstuvwxyz";
var type = "application/octet-stream";
var data = new Blob([], {
type: type
});
var filename = "file.txt";
var reader = new FileReader();
var complete = false;
var beforeEnd = false;
var stopAt = str.length - 1;
function handleFile(e) {
data = new File([data.slice()
, str.slice(data.size, data.size + 1)]
, filename, {
type: type,
lastModifiedDate: new Date()
});
}
function handleRead(e) {
if (data.size <= str.length && !complete) {
if (data.size === stopAt && !beforeEnd) {
beforeEnd = true;
var r = new FileReader();
r.onloadend = function() {
alert(`stopAt: ${stopAt}\n`
+`data.size: ${data.size}\n`
+`result at stopAt: ${r.result[stopAt -1]}`);
reader.readAsArrayBuffer(data);
}
r.readAsText(data);
} else {
reader.readAsArrayBuffer(data)
}
} else {
console.log("complete")
}
}
function handleProgress(e) {
if (data.size <= str.length && !complete) {
var read = new FileReader();
read.onload = function() {
if (!complete) {
console.log(read.result);
if (read.result.length === str.length) {
complete = true;
console.log(data);
}
}
}
read.readAsText(data);
}
}
reader.addEventListener("load", handleFile);
reader.addEventListener("loadend", handleRead);
reader.addEventListener("progress", handleProgress);
reader.readAsArrayBuffer(data);
Situation:
I want to create a multithread script where I load a list of IPs + account information with a CSV.
I load the data and call a function where I open electron and run my nightmare script in combination with Vo. Inside the script I go to a site, loop through a list of links and check if someone lives in Australia.
When I have an error, for example Timeout, the browser stops working.
Error Example ->
{ message: 'navigation error',
code: -7,
details: 'Navigation timed out after 30000 ms',
url: 'https://facebook.com/login' }
Here is my Code
var fs = require('fs');
var csv = require('fast-csv');
var vo = require('vo');
var Nightmare = require('nightmare');
var count = 0;
var urls = fs.readFileSync('uniqueIds.csv').toString().split("\n");
var arrayUrls = Object.keys(urls).map(function (key) {return urls[key]});
var bloqNumber = 0;
function *run(proxy, user, pass, urlsID) {
var nightmare = new Nightmare({
webPreferences: { partition: 'your-custom-partition'},
switches:{
'proxy-server': proxy,
'ignore-certificate-errors': true
}, show: true });
yield nightmare
.goto('https://facebook.com/login')
.wait(".inputtext._55r1.inputtext._1kbt.inputtext._1kbt")
.type('input[name="email"]', user)
.type('input[name="pass"]', pass)
.click('button[name=login]')
.wait(29000);
var range = urlsID * 2000;
var rangeStart = range - 2000;
var urlsarray = arrayUrls.slice(rangeStart, range);
for (var i = 0; i < urlsarray.length; i++) {
count++;
console.log(count + " -> " + proxy);
if (count > 150){
yield nightmare.end();
}
yield nightmare
.goto("https://www.facebook.com/profile.php?id=" + urlsarray[i] + "&sk=about§ion=living&pnref=about")
.wait(1000);
var seqCheck = yield nightmare.exists(".captcha_interstitial");
var bloqCheck = yield nightmare.exists(".mvl.ptm.uiInterstitial.uiInterstitialLarge.uiBoxWhite");
if (seqCheck == true) {
console.log("Seqcheck");
yield nightmare.wait(29000);
}
if (bloqCheck == true) {
console.log("Blocked for a week" + user + proxy);
bloqNumber++;
console.log(bloqNumber);
if (bloqNumber > 6) {
yield nightmare.end();
}
continue;
}
var location = yield nightmare.exists("._3pw9._2pi4._2ge8");
bloqNumber = 0;
console.log(location);
if (location == true) {
var getLocation = yield nightmare.evaluate(function() {
var jsonObject = new Array();
var links = document.getElementsByClassName('_3pw9 _2pi4 _2ge8');
var numProfiles = links.length;
for(var i = 0; i< numProfiles; i++){
var elem;
try {
elem = links[0].querySelector("._50f5._50f7 a").text;
} catch (err) {
var arrr = new Array('Hello', 'world');
return arrr;
}
jsonObject.push(elem);
}
return jsonObject;
});
var locationString = getLocation.join(" + ");
console.log(locationString + " -> " + urlsarray[i]);
if (locationString.indexOf("Australia") !== -1 ||
locationString.indexOf("Queensland") !== -1 ||
locationString.indexOf("New South Wales") !== -1 ||
locationString.indexOf("Victoria") !== -1 ||
locationString.indexOf("Northern Territory") !== -1 ||
locationString.indexOf("South Australia") !== -1||
locationString.indexOf("Tasmania") !== -1 ||
locationString.indexOf("Sydney") !== -1 ||
locationString.indexOf("Adelaide") !== -1 ||
locationString.indexOf("Cairns") !== -1 ||
locationString.indexOf("Perth") !== -1 ||
locationString.indexOf("Melbourne") !== -1 ||
locationString.indexOf("Brisbane") !== -1 ||
locationString.indexOf("Bundaberg") !== -1 ||
locationString.indexOf("Canberra") !== -1 ||
locationString.indexOf("Newcastle") !== -1 ||
locationString.indexOf("Western Australia") !== -1 ) {
console.log("Im in australia");
var stringToPrint = urlsarray[i] + ", " + locationString + "\n";
fs.appendFile('pages.csv', stringToPrint.replace(/(\r\n|\n|\r)/gm,"") + "\n", function (err) {
console.log("a new entry");
});
}
} else {
console.log("It was false");
}
}
yield nightmare.end();
}
fs.createReadStream('proxies.csv')
.pipe(csv())
.on('data', function (data) {
var proxy = data[0];
var user = data[1];
var pass = data[2];
var urlsID = data[3];
console.log(urlsID);
console.log(user);
console.log(pass);
vo(run(proxy, user, pass, urlsID)).then(out => console.log('out', out)).catch(error => console.log(error));
}).on('end', function (data) {
console.log('CSV reading finished.')
});
Desired Outcome:
I want every time i get some kind of error that my thread is closing.
Solved. Just append .catch like in the example below.
yield nightmare
.goto('https://facebook.com/login')
.wait(".inputtext._55r1.inputtext._1kbt.inputtext._1kbt")
.type('input[name="email"]', user)
.type('input[name="pass"]', pass)
.click('button[name=login]')
.wait(29000).catch(function(err){
console.dir(err);
nightmare.end();
});
I am developping an hybrid app (JQM 1.4 + Phonegap 3.6.3).
I have a function populateImagesUrlsLocalAndServer that checks if an image file exists on my server, but I can't use this function because it makes my app crash on Phonegap...
I can't find why.
Can you help me fix this issue so I can use this feature in my app again ?
Thank
function populateImagesUrlsLocalAndServer(str, baseUrl, type) {
if (connectionStatus == "online") {
if (UrlExists('./'+baseUrl+str)) { //local file
imagesUrls[str] = './'+baseUrl+str;
} else if (isPhoneGap && UrlExists('http://boardlineapp.com/app/'+baseUrl+str)) { //server file....we exclude this for desktop browser because of cross domain error
console.log('retrieving '+'http://boardlineapp.com/app/'+baseUrl+str+' on server')
imagesUrls[str] = 'http://boardlineapp.com/app/'+baseUrl+str;
} else {
imagesUrls[str] = './'+baseUrl+'default.png';
}
} else { //offline
if (UrlExists('./'+baseUrl+str)) { //local file
imagesUrls[str] = './'+baseUrl+str;
} else {
imagesUrls[str] = './'+baseUrl+'default.png';
}
}
imagesUrls[str+'type'] = type;
}
instead, for now I am using :
function populateImagesUrls(str, baseUrl, type) {
if ( str == 'byrne-ow-fender.png'
|| str == 'ci-tacogrinder.png'
|| str == 'noamizuno.png'
|| str == 'brendanmargieson.png'
|| str == 'kaihing.png'
|| str == 'dustinhollick.png'
|| str == 'dhd-thetwin.png'
) {
//console.log(str);
//console.log(baseUrl);
imagesUrls[str] = './'+baseUrl+'default.png';
} else {
imagesUrls[str] = './'+baseUrl+str;
}
imagesUrls[str+'type'] = type;
}
the function that calls it is the following:
function checkIfImagesExistAllAtOnce() {
var prodataTemp = [];
prodataTemp = prodata.slice();
prodataTemp.shift();
prodataTemp.sort(sort_by('brand', 'name', 'model'));
var strBrandDone;
var strNameDone;
var strModelDone;
for (i = 1; i < prodataTemp.length; ++i) {
//check brand
var str = prodataTemp[i]['brand'].replace(/\s+/g, '').toLowerCase();
str = str+'.png';
if (str != strBrandDone) {
var baseUrl = "images/brands/";
strBrandDone = str;
var type = "brand";
populateImagesUrls(str, baseUrl, type);
//populateImagesUrlsLocalAndServer(str, baseUrl, type);
}
//check pro image
var str = prodataTemp[i]['name'].replace(/\s+/g, '').toLowerCase();
str = str+'.png';
if (str != strNameDone) {
var baseUrl = "images/pros/";
strNameDone = str;
var type = "pro";
populateImagesUrls(str, baseUrl, type);
//populateImagesUrlsLocalAndServer(str, baseUrl, type);
}
//check board image
var str = prodataTemp[i]['imageName'];
if (str != strModelDone) {
var baseUrl = "images/boards/";
strModelDone = str;
var type = "board";
populateImagesUrls(str, baseUrl, type);
//populateImagesUrlsLocalAndServer(str, baseUrl, type);
}
}
prodataTemp = null;
}