HTML with chinese unicode to png? - javascript

I'm trying to render this html document ./tagslegend.html with npm package wkhtmltox:
<!doctype html>
<html>
<head>
<style>
.cmn {
font-family: 'WenQuanYi Micro Hei';
}
</style>
</head>
<body>
<dl>
<dt class="cmn">中文</dt><dd>In mandarin language.</dd>
</dl>
</body>
</html>
Here's the javascript:
const express = require('express');
const fs = require('fs');
const wkhtmltox = require('wkhtmltox');
const app = express();
const converter = new wkhtmltox();
app.get('/tagslegend.png', (request, response) => {
response.status(200).type('png');
converter.image(fs.createReadStream('tagslegend.html'), { format: "png" }).pipe(response);
});
var listener = app.listen(process.env.PORT, function () {
console.log('App listening on port ' + listener.address().port);
});
I expect it to render like my browser would render that same html:
But am instead getting a png like this:
How can I fix this and make it render like the first image?
I have that font installed on the server:
$ fc-list | grep 'Wen'
/app/.fonts/WenQuanYi Micro Hei.ttf: WenQuanYi Micro Hei,文泉驛微米黑,文泉驿微米黑:style=Regular

This looks like an character encoding problem. It seems as if fs.createReadStream() is reading your HTML as ISO-8859-1, when it really should be reading it as UTF-8 — which is odd, since UTF-8 is the default encoding.
I'd make sure tagslegend.html is properly saved as a UTF-8 file. It couldn't hurt to explicitly declare:
<meta charset="utf-8">
...in the <head> section of your HTML as well.

Related

Tesseract.js doesn't recognize Arabic language

I'm using tesseract.js ORC library to read what is written on an image and write it in console or on a text file so I found this library and it's working find with English word or characters but when I tried to read what is written on the image in Arabic language it doesn't work so this is the image that I'm trying to read
and this is my code :-
Head Tag:-
<script src='https://unpkg.com/tesseract.js#v2.1.0/dist/tesseract.min.js'</script>
Body Tag:-
<script>
Tesseract.recognize(
'image.png',
'ara',
{ logger: m => console.log(m) }
).then(({ data: { text } }) => {
})
</script>
You need to handle the result inside the .then() block.
Here's a working example :
<!DOCTYPE html>
<meta charset="utf-8">
<title> OCR TEST </title>
<script src="https://unpkg.com/tesseract.js#v2.1.0/dist/tesseract.min.js"></script>
<output id="result">Processing...</output>
<script>
const output = document.querySelector('output#result');
Tesseract.recognize(
'https://i.imgur.com/mdzmK4w.png', 'ara'
).then(result => {
output.value = result.data.text;
}).catch(err => {
output.value = 'Processing Failed';
console.log(err);
});
</script>
It can take some time to process, especially on the first load because the WASM module and OCR training data will be fetched in the background first. You can see this happening in the Network tab of your Dev Tools.

update pug variable with client side javascript onchange event

I have server side code which serves up a pug file, the pug file has variables parsed to it when rendered by my server side code.
I would like to be able to update that variable in pug when a check box is checked / unchecked.
my server side code:
// load the express module
const express = require('express');
const simpleVarDisplaySite = 'simpleVarDisplay.pug';
const app = express();
app.use(express.urlencoded({extended: false}))
app.use(express.static(__dirname+'/static'));
// simpleVarDisplaySite page
app.get('/', function (req, res) {
console.log(req.url);
let myHeading = 'Simple Pug Page';
let simpleObject = {
'date': {
'day': 'time'
}
}
res.render(simpleVarDisplaySite, {
heading: myHeading,
pugObject: simpleObject,
});
})
app.listen(8082)
my pug file:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Simple Pug</title>
<link rel="stylesheet" href="/styles/MainStyling.css">
<script src="/scripts/pugVarUpdater.js"></script>
</head>
<body>
<form method="POST">
<div>
<header>
<h1 class="HeaderEl" id="heading">#{ heading }</h1>
</header>
<div>
<input type="checkbox" id="simpleCheckBox" onchange="JavaScript:updatePugVarFunc()">
</div>
<br>
each valueDict, date in pugObject
<div>
<div>
<span>#{date}</span>
</div>
<br>
each time, day in valueDict
<div>
<span>#{day}</span>
</div>
<div>
<span>#{time}</span>
</div>
</div>
</div>
</form>
</body>
</html>
my client-side js when checkbox is checked / unchecked:
function updatePugVarFunc() {
const simpleVarDisplaySite = 'simpleVarDisplay.pug';
let newSimpleObject = {
'new_date': {
'new_day': 'new_time'
}
}
alert(newSimpleObject)
let myNewHeading = 'My New Simple Heading'
alert(myNewHeading)
document.body.innerHTML = simpleVarDisplaySite({
heading: myNewHeading,
pugObject: newSimpleObject,
});
}
I would like to have pug variable: pugObject updated when the checkbox is checked / unchecked onChange event, and see the updates in my browser, which I have tried with:
document.body.innerHTML = simpleVarDisplaySite({
heading: myNewHeading,
pugObject: newSimpleObject,
});
But that does not work.
I know that client-side is handled rather differently from server-side, but I am soo hoping that there is a solution to my specific problem.
Please note that I am a nood with javascript and pug, and I am very open to any and all suggestions to better achieve my goal.
My File structure:
server.js
views
simpleVarDisplay.pug
static
scripts
pugVarUpdater.js
PS:
I have had a look at: use client-side js variable in pug
And that did not help me.
Your assistance and guidance is much appreciated.

test html page with jest

I'm trying to setup a test for prototype.js.
I would like to avoid starting a local server to use cypress / puppeteer / etc. My goal here is just use jest.
My idea was to have for each test a minimal index.html like the following:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<script src="https://ajax.googleapis.com/ajax/libs/prototype/1.7.2.0/prototype.js"></script>
</head>
<body>
<span id="el">foo foo</span>
</body>
</html>
and I tried first to run jest as
const fs = require('fs');
const path = require('path');
const html = fs.readFileSync(path.resolve(__dirname, './index.html'), 'utf8');
jest.dontMock('fs');
describe('$', function () {
beforeEach(() => {
document.documentElement.innerHTML = html.toString();
});
afterEach(jest.resetModules);
it('finds a node', function () {
expect($('el')).toBeInTheDocument()
});
});
but $ as all the prototype.js global stuff is not available.
ouhhh .... looks like wrapping the expect into a window.onload is enough, which makes also sense.

Render unicode text with wkhtmltoimage

Trying to use wkhtmltox to turn an HTML file to an image:
./server.js
const express = require('express');
const fs = require('fs');
const wkhtmltox = require('wkhtmltox');
const app = express();
const converter = new wkhtmltox();
app.get('/tagslegend.png', (request, response) => {
response.status(200).type('png');
converter.image(fs.createReadStream('tagslegend.html'), { format: "png" }).pipe(response);
});
var listener = app.listen(process.env.PORT, function () {
console.log('App listening on port ' + listener.address().port);
});
And ./tagslegend.html:
<!doctype html>
<html>
<body>
<dl>
<dt>中文</dt><dd>In mandarin language.</dd>
</dl>
</body>
</html>
I'm expecting back an image of the above HTML, e.g. (how my browser would render it):
Instead I get back this:
How can I render that HTML to a png dynamically with the correct chinese characters and serve it to clients?
Add
<meta charset="utf-8">
to the <head> of the HTML document

Read a local HTML file from CasperJS start()

I am writing a simple casperjs script to fill a rather complex form on a website. The HTML code of the website is a bit messy and I don't want to go through the navigation steps to reach the page everytime when I am testing my script.
I have the form page saved as HTML file but I couldn't even properly load a testing HTML file into casperjs. Here is the code, file and result:
var casper = require('casper').create();
casper.start('file://test.html').then(function() {
this.echo('started')
this.echo(this.getPageContent())
});
casper.run(function(){
this.echo('ended');
casper.done();
});
The test file:
<html>
<head>
<meta charset="utf-8">
<title>My page</title>
</head>
<body>
<h1 class="page-title">Hello</h1>
<ul>
<li>one</li>
<li>two</li>
<li>three</li>
</ul>
<footer><p>2012 myself</p></footer>
</body>
</html>
The execution result:
C:>started
<html><head></head><body></body></html>
ended
Why the tags within the HTML body are gone?
All works fine, with an absolute path:
var casper = require('casper').create();
casper.start('file:///home/root2/pjs/test.html').then(function() {
this.echo('started')
this.echo(this.getPageContent())
});
casper.run(function(){
this.echo('ended');
casper.done();
});
The execution result:
started
<html><head>
<meta charset="utf-8">
<title>My page</title>
</head>
<body>
<h1 class="page-title">Hello</h1>
<ul>
<li>one</li>
<li>two</li>
<li>three</li>
</ul>
<footer><p>2012 myself</p></footer>
</body></html>
ended
You can also try to specify an absolute path like this:
file:///C://Full/Path/To/test.html
FYI, you can use these functions to get an absolute file uri for a relative one:
function getAbsoluteFilePath(relativePath) {
return "file:///" + currentDir() + relativePath;
};
// Courtesy https://github.com/casperjs/casperjs/issues/141
function currentDir() {
var sep = "/";
var pathParts = fs.absolute(casper.test.currentTestFile).split(sep);
pathParts.pop();
return pathParts.join(sep) + sep;
}
To use that in your scenario, use this:
// ...
casper.start(getAbsoluteFilePath('test.html')).then(function() {
this.echo('started')
this.echo(this.getPageContent())
});
// ...
You can make use of the fs module to get the absolute path of the working directory, and then concatenate the protocol "file://" and the relativePath.
E.g.
const fs = require('fs');
var casper = require('casper').create();
// your casper logic here
console.log(getAbsoluteFilePath('test.html'));
casper.run();
function getAbsoluteFilePath(relativePath) {
return "file://" + fs.workingDirectory + '/' + relativePath;
};

Categories