Perform OCR on image connected to Azure Blob Trigger

Perform OCR on image connected to Azure Blob Trigger - javascript

I'm using an Azure function to perform OCR on an image received on a Blob Trigger. I want to store the results in an output blob and I'm not sure how to proceed.
var abbyyclient = require('nodejs-ocr');
var client = new abbyyclient('ocrTest', 'ocrpassword', 'http://cloud.ocrsdk.com');
module.exports = function (context, inputBlob, resultBlob) {
function ocrComplete(err, results) {
if( !err ) {
//context.log(results.toString());
context.bindings.resultBlob = results;
}
}
let apiParameters = {
language: 'English',
exportFormat: 'docx'
};
client.processImage(apiParameters, inputBlob, ocrComplete);
context.done();
};
The code above is what I'm using in the Azure blob Trigger function but the results of the OCR returns a buffer instead of a word document though I've specified the export format. I am a complete beginner at this, can someone please help me out? Thanks.

Related

Watson Assistant context is not updated

I use watson assistant v1
My problem is that every time I make a call to the code in Nodejs, where I return the context, to have a coordinated conversation, the context is only updated once and I get stuck in a node of the conversation
this is my code
client.on('message', message => {
//general variables
var carpetaIndividual = <../../../>
var cuerpoMensaje = <....>
var emisorMensaje = <....>
//detect if context exists
if(fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = require(carpetaIndividual+'/contexto.json');
var variableContexto = watsonContexto;
} else {
var variableContexto = {}
}
//conection with Watson Assistant
assistant.message(
{
input: { text: cuerpoMensaje },
workspaceId: '<>',
context: variableContexto,
})
.then(response => {
let messageWatson = response.result.output.text[0];
let contextoWatson = response.result.context;
console.log('Chatbot: ' + messageWatson);
//Save and create JSON file for context
fs.writeFile(carpetaIndividual+'/contexto.json', JSON.stringify(contextoWatson), 'utf8', function (err) {
if (err) {
console.error(err);
}
});
//Send messages to my application
client.sendMessage(emisorMensaje, messageWatson)
})
.catch(err => {
console.log(err);
});
}
client.initialize();
the context.json file is updated, but when it is read the code only reads the first update of the context.json file and not the other updates

This will be because you are using require to read the .json file. For all subsequent requires of an already-required file, the data is cached and reused.
You will need to use fs.readfile and JSON.parse
// detect if context exists
if (fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = fs.readFileSync(carpetaIndividual+'/contexto.json');
// Converting to JSON
var variableContexto = JSON.parse(watsonContexto);
} else {
var variableContexto = {}
}
There is another subtle problem with your code, in that you are relying on
your async call to fs.writeFile completing before you read the file. This will be the case most of the time, but as you don't wait for the fs.writeFile to complete there is the chance that you may try to read the file, before it is written.

File uploads through socket.io (JavaScript & FileReader)

I am creating a chat app (in React Native), but for now, I have made some tests in vanilla JavaScript. The server is a NodeJS-server.
It works with sending text messages, but now I have some questions about sending photos/videos/audio files. I'm doing a lot of research online on what's the best method to do this.
I came up with the idea to use the FileReader API and split up the file into chunks, and sending chunk by chunk via the socket.emit()-function.
This is my code so far (simplified):
Please note that I will create a React Native app, but for now (for testing), I've just created a HTML-file with an upload form.
// index.html
// the page where my upload form is
var reader = {};
var file = {};
var sliceSize = 1000 * 1024;
var socket = io('http://localhost:8080');
const startUpload = e => {
e.preventDefault();
reader = new FileReader();
file = $('#file)[0].files[0]
uploadFile(0)
}
$('#start-upload').on('click', startUpload)
const uploadFile = start => {
var slice = start + sliceSize + 1;
var blob = file.slice(start, slice)
reader.on('loadend', e => {
if (slice < file.size) {
socket.emit('message', JSON.stringify({
fileName: file.name,
fileType: file.type,
fileChunk: e.target.result
})
} else {
console.log('Upload completed!')
}
})
reader.readAsDataURl(blob)
}
// app.js
// my NodeJS server-file
var file;
var files = {};
io.on('connection', socket => {
console.log('User connected!');
// when a message is received
socket.on('message', data => {
file = JSON.parse(data)
if (!files[file.fileName]) {
// this is the first chunk received
// create a new string
files[file.fileName] = '';
}
// append the binary data
files[file.fileName] = files[file.fileName] + file.fileChunk;
})
// on disconnect
socket.on('disconnect', () => {
console.log('User disconnected!');
})
})
I did not include any checks for file type (I'm not at that point yet), I first want to make sure that this is the right thing to do.
Stuff I need to do:
Send a message (like socket.emit('uploaddone', ...)) from the client to the server to notify the server that the upload is done (and the server can emit the complete file to another user).
My questions are:
Is it okay to send chunks of binary data (base64) over a socket, or would it take up to much bandwidth?
Will I lose some quality (photos/videos/audio files) when splitting them up into chunks?
If there is a better way to do this, please let me know. I'm not asking for working code examples, just some guidance in the good direction.

You can send raw bytes over WebSocket, base64 has 33% size overhead.
Also you won't have to JSON.stringify all (and maybe large) body and parse it on client-side.
Will I lose some quality
No, underlying protocol (TCP) delivers data in-order and without corruption.

I realize this answer is a couple of months late, but just for future reference you should look into using the acknowledgment option with socket.io here
// with acknowledgement
let message = JSON.stringify({
fileName: file.name,
fileType: file.type,
fileChunk: e.target.result
})
socket.emit("message", message, (ack) => {
// send next chunk...
});

Node.js how to return an array from within a asynchronous callback for use in another file

File called testing.js
I can do whatever I like with the data in saveWeatherData but cannot call this function and return the data without getting 'undefined'
For example if i tried the below code in saveWeatherData it will print out the summary as expected...
console.log(The summary of the weather today is: ${dataArray[0]});
However I want to use these values within another file such as a server file that when connected to will display weather summary temperature etc.
So I need to return an array with these values in it so that I can call this function and get my data stored in an array for further use.
I know that the reason the array --dataArray is returning undefined is because asynchronous code.
The array is returned before we have gotten the data using the callback.
My question, is there anyway to do what I am trying to do?
I tried my best to explain the problem and what I want to do, hopefully its understandable.
Would I have to use a callback inside of a callback? To callback here to return the data when its been fetched?
I just cant get my head about it and have tried multiple things to try and get the result I am looking for.
My last idea and something i would prefer not to do is the use the 'fs' module to save the data to a text or json file for use in my other files through reading the data from the saved file...
I feel im close but cant get over the last hurdle, so ive decided to ask for a little help, even just point me on the right track and Ill continue to try and figure it out.
Phew...
Thank you for your time!
const request = require("request");
let dataArray = [];
let saveWeatherData = function(weatherData) {
dataArray = weatherData;
return dataArray;
};
let getWeatherData = function(callback) {
request({
url: `https://api.forecast.io/forecast/someexamplekey/1,-1`,
json: true
}, (error, response, body) => {
//Creating array to hold weather data until we can save it using callback...
let array = [];
if (error) {
console.log("Unable to connect with Dark Sky API servers.")
}
else {
console.log(`Successfully connected to Dark Sky API servers!\n`);
array.push(body.currently.summary, body.currently.temperature, body.currently.apparentTemperature, body.currently.windSpeed, body.currently.windBearing);
callback(array);
}
});
};
getWeatherData(saveWeatherData);
module.exports = {
saveWeatherData
};
My Other File...
File called server.js
const http = require("http");
const testing = require("./testing");
function onRequest(request, response){
let data = testing.saveWeatherData();
console.log(`A user made a request: ${request.url}`);
response.writeHead(200, {"context-type": "text/plain"});
response.write("<!DOCTYPE html>");
response.write("<html>");
response.write("<head>");
response.write("<title>Weather</title>");
response.write("</head>");
response.write("<body>");
response.write("Weather summary for today: " + data[0]);
response.write("</body>");
response.write("</html>");
response.end();
}
http.createServer(onRequest).listen(8888);
console.log("Server is now running on port 8888...");

I'm still not sure about what are you trying to do. However, I think you're not exporting what you suppose to be exporting. To avoid the use of so many callbacks you may use async/await.
Change this part of your server.js
async function onRequest(request, response) {
let data = await testing.getWeatherData();
console.log(`A user made a request: ${request.url}`);
response.writeHead(200, { 'context-type': 'text/plain' });
response.write('<!DOCTYPE html>');
response.write('<html>');
response.write('<head>');
response.write('<title>Weather</title>');
response.write('</head>');
response.write('<body>');
response.write('Weather summary for today: ' + data[0]);
response.write('</body>');
response.write('</html>');
response.end();
}
And this of your testing.
let getWeatherData = function() {
return new Promise(resolve =>
request(
{
url: `https://api.darksky.net/forecast/someexamplekey/1,-1`,
json: true
},
(error, response, body) => {
//Creating array to hold weather data until we can save it using callback...
let array = [];
if (error) {
console.log('Unable to connect with Dark Sky API servers.');
} else {
console.log(`Successfully connected to Dark Sky API servers!\n`);
array.push(
body.currently.summary,
body.currently.temperature,
body.currently.apparentTemperature,
body.currently.windSpeed,
body.currently.windBearing
);
resolve(array);
}
}
)
);
};
module.exports = {
getWeatherData
};
It will check for new Weather in each request. If you want to save the result to avoid checking every single time you might need to do something else. But I think for a weather app the important is to keep it updated.

Passing HTML input file as the parameter of Firebase Cloud Function

I am pretty new this area and I started firebase cloud function 2 days ago.
Sorry, I am still a student so I might not understand clearly some documentation.
I tried to figure out how the parameter is passed from my client-side javascript to firebase cloud function.
my cloud function
exports.OCR = functions.https.onCall((req) => {
const vision = require('#google-cloud/vision');
// Creates a client
const client = new vision.ImageAnnotatorClient();
console.log(req);
// Performs label detection on the image file
client
.documentTextDetection(req)
.then((results) => {
console.log("Entered");
console.log(req);
const fullTextAnnotation = results[0].fullTextAnnotation;
console.log(fullTextAnnotation.text);
return results[0].fullTextAnnotation.text;
})
.catch(err => {
console.error('ERROR:', err);
return "error";
});
})
I am using firebase cloud function and Google Vision API.
actually I tried to pass the parameter like this
My client side coe
document.getElementById("fileInput").click();
var file = document.getElementById("fileInput");
var fileInput = document.getElementById('fileInput');
fileInput.addEventListener('change', function (e) {
var file = e.target.files[0];
// Do something with the image file.
var tmppath = URL.createObjectURL(file);
console.log(file);
console.log(tmppath);
//var url = "https://firebasestorage.googleapis.com/v0/b/recette-f3ef5.appspot.com/o/FB1.gif?alt=media&token=28727220-181c-440e-87ae-4808b5c9ba28";
OCR(file)
.then(function(result) {
console.log(result);
}).catch(function(err) {
console.log(err);
});
});
and it did not work. I always got null return when I trigger the function.
So, my question is that how can I pass the file (HTML INPUT TAG) to my cloud function?
p.s: when I tried the code with node the_code.js it works.

According to the Google Cloud Node.js library documentation the documentTextDetection function should receive a JS object like this:
var image = {
source: {imageUri: 'gs://path/to/image.jpg'}
};
vision.documentTextDetection(image).then(response => {
// doThingsWith(response);
}).catch(err => {
console.error(err);
});
The file you are passing to OCR function has probably a different structure than that defined in documentation.
There are some variants to this:
If the key is source, the value should be another object containing
imageUri or filename as a key and a string as a value.
If the key is content, the value should be a Buffer.
So your code should look something like this.
console.log(tmppath);
//var url = "https://firebasestorage.googleapis.com/v0/b/recette-f3ef5.appspot.com/o/FB1.gif?alt=media&token=28727220-181c-440e-87ae-4808b5c9ba28";
image = {source: {imageUri: 'https://firebasestorage.googleapis.com/v0/b/recette-f3ef5.appspot.com/o/FB1.gif?alt=media&token=28727220-181c-440e-87ae-4808b5c9ba28'}}
OCR(image)
Please provide complete error messages and description of what is file..

How can I get a buffer for a file (image) from CollectionFS

I'm trying to insert an image into a pdf I'm creating server-side with PDFkit. I'm using cfs:dropbox to store my files. Before when I was using cvs:filesystem, it was easy to add the images to my pdf's cause they were right there. Now that they're stored remotely, I'm not sure how to add them, since PDFkit does not support adding images with just the url. It will, however, accept a buffer. How can I get a buffer from my CollectionFS files?
So far I have something like this:
var portrait = Portraits.findOne('vS2yFy4gxXdjTtz5d');
readStream = portrait.createReadStream('portraits');
I tried getting the buffer two ways so far:
First using dataMan, but the last command never comes back:
var dataMan = new DataMan.ReadStream(readStream, portrait.type());
var buffer = Meteor.wrapAsync(Function.prototype.bind(dataMan.getBuffer, dataMan))();
Second buffering the stream manually:
var buffer = new Buffer(0);
readStream.on('readable', function() {
buffer = Buffer.concat([buffer, readStream.read()]);
});
readStream.on('end', function() {
console.log(buffer.toString('base64'));
});
That never seems to come back either. I double-checked my doc to make sure it was there and it has a valid url and the image appears when I put the url in my browser. Am I missing something?

I had to do something similar and since there's no answer to this question, here is how I do it:
// take a cfs file and return a base64 string
var getBase64Data = function(file, callback) {
// callback has the form function (err, res) {}
var readStream = file.createReadStream();
var buffer = [];
readStream.on('data', function(chunk) {
buffer.push(chunk);
});
readStream.on('error', function(err) {
callback(err, null);
});
readStream.on('end', function() {
callback(null, buffer.concat()[0].toString('base64'));
});
};
// wrap it to make it sync
var getBase64DataSync = Meteor.wrapAsync(getBase64Data);
// get a cfs file
var file = Files.findOne();
// get the base64 string
var base64str = getBase64DataSync(file);
// get the buffer from the string
var buffer = new Buffer(base64str, 'base64')
Hope it'll help!

We Keep Coding

JavaScript is the programming language of the Web.

Perform OCR on image connected to Azure Blob Trigger - javascript

Related

Watson Assistant context is not updated

File uploads through socket.io (JavaScript & FileReader)

Node.js how to return an array from within a asynchronous callback for use in another file

Passing HTML input file as the parameter of Firebase Cloud Function

How can I get a buffer for a file (image) from CollectionFS

Categories

Resources