I am using tessearct.js library in my angular code.
I want to preserve the white spaces, the indentation as it is. How to do it?
Currently I am using this piece of code to do it.
async doOCR {
const worker = createWorker({
logger: m => console.log(m),
});
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const value = await worker.recognize(this.selectedFile);
}
I am looking a method to do it on client side only, that's why not using its python library.
You can give it a try after version (3.04), they have added the preserve_interword_spaces`. You can try this and check if this works:
async doOCR {
const worker = createWorker({
logger: m => console.log(m),
});
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
// there is no proper documentation, but they have added this flag
// to run it as a command
await worker.setParameters({
preserve_interword_spaces: 1,
});
const value = await worker.recognize(this.selectedFile);
}
Related
There are some old solutions to resolve this question, the one is in github, the other is in stackoverflow.
puppeteer has _client property in lower version.
The solution in lower vision as follows:
page._client.send('Network.setBypassServiceWorker', {bypass: true})
The puppeteer version is 18.0.5, so the Page has not _client property.
So the same solution in higher vison as follows:
const client = await page.target().createCDPSession();
await client.send("Network.setBypassServiceWorker", { bypass: true });
But it not working.
So how to resolve this problem?
We should add a new line await client.send("Network.enable") to enable the network. The code as follows:
...
const client = await page.target().createCDPSession();
await client.send("Network.enable"); // Must enable network.
await client.send("Network.setBypassServiceWorker", { bypass: true });
await page.setRequestInterception(true);
...
So we can handle the response in page.on().
...
page.on("response", async (res) => {
// do somethings.
})
...
I'm trying to build telegram bot to parse page on use request. My parsing code works fine inside one async function, but completeky falls on its face if I try to put it inside another async function.
Here is the relevant code I have:
const puppeteer = require('puppeteer');
const fs = require('fs/promises');
const { Console } = require('console');
async function start(){
async function searcher(input) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const url = ; //here is a long url combining logic, that works fine
await page.goto(url);
const currentUrl = requestPage.url();
console.log(currentUrl); //returns nothing.
//here is some long parsing logic
await browser.close();
return combinedResult;
}
//here is a bot code
const { Telegraf } = require('telegraf');
const bot = new Telegraf('my bot ID');
bot.command('start', ctx => {
console.log(ctx.from);
bot.telegram.sendMessage(ctx.chat.id, 'Greatings message', {});
bot.telegram.sendMessage(ctx.chat.id, 'request prompt ', {});
})
bot.on('text', (ctx) => {
console.log(ctx.message.text);
const queryOutput = searcher(ctx.message.text);
bot.telegram.sendMessage(ctx.chat.id, queryOutput, {});
});
bot.launch()
}
start();
Here is an error message:
/Users/a.rassanov/Desktop/Fetch/node_modules/puppeteer/lib/cjs/puppeteer/common/Connection.js:218
return Promise.reject(new Error(`Protocol error (${method}): Session closed. Most likely the ${this._targetType} has been closed.`));
^
Error: Protocol error (Page.navigate): Session closed. Most likely the page has been closed.
I'm very new to this, and your help is really appriciated.
I am trying to build a scraper to monitor web projects automatically.
So far so good, the script is running, but now I want to add a feature that automatically analyses what libraries I used in the projects. The most powerful script for this job is wappalyser. They have a node package (https://www.npmjs.com/package/wappalyzer) and it's written that you can use it combined with pupperteer.
I managed to run pupperteer and to log the source code of the sites in the console, but I don't get the right way to pass the source code to the wappalyzer analyse function.
Do you guys have a hint for me?
I tryed this code but a am getting a TypeError: url.split is not a function
function getLibarys(url) {
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto(url);
// get source code with puppeteer
const html = await page.content();
const wappalyzer = new Wappalyzer();
(async function () {
try {
await wappalyzer.init()
// Optionally set additional request headers
const headers = {}
const site = await wappalyzer.open(page, headers)
// Optionally capture and output errors
site.on('error', console.error)
const results = await site.analyze()
console.log(JSON.stringify(results, null, 2))
} catch (error) {
console.error(error)
}
await wappalyzer.destroy()
})()
await browser.close()
})()
}
Fixed it by using the sample code from wappalyzer.
function getLibarys(url) {
const Wappalyzer = require('wappalyzer');
const options = {
debug: false,
delay: 500,
headers: {},
maxDepth: 3,
maxUrls: 10,
maxWait: 5000,
recursive: true,
probe: true,
proxy: false,
userAgent: 'Wappalyzer',
htmlMaxCols: 2000,
htmlMaxRows: 2000,
noScripts: false,
noRedirect: false,
};
const wappalyzer = new Wappalyzer(options)
;(async function() {
try {
await wappalyzer.init()
// Optionally set additional request headers
const headers = {}
const site = await wappalyzer.open(url, headers)
// Optionally capture and output errors
site.on('error', console.error)
const results = await site.analyze()
console.log(JSON.stringify(results, null, 2))
} catch (error) {
console.error(error)
}
await wappalyzer.destroy()
})()
}
I do not know if you still need an answer to this. But this is what a wappalyzer collaborator told me:
Normally you'd run Wappalyzer like this:
const Wappalyzer = require('wappalyzer')
const wappalyzer = new Wappalyzer()
await wappalyzer.init() // Launches a Puppeteer instance
const site = await wappalyzer.open(url)
If you want to use your own browser instance, you can skip wappalyzer.init() and assign the instance to wappalyzer.browser:
const Wappalyzer = require('wappalyzer')
const wappalyzer = new Wappalyzer()
wappalyzer.browser = await puppeteer.launch() // Use your own Puppeteer launch logic
const site = await wappalyzer.open(url)
You can find the discussion here.
Hope this helps.
I need to integrate the puppeteer-jest test framework with TestRail using TestRail API. But for that, I need to know what tests are failed and what of the tests are passed
I Search some information in the official GitHub Repository and in the Jest site. But nothing about it.
Test:
describe('Single company page Tests:', () => {
let homePage;
beforeAll(async () => {
homePage = await addTokenToBrowser(browser);
}, LOGIN_FLOW_MAX_TIME);
it('Open the company page from the list', async done => {
await goto(homePage, LIST_PAGE_RELATIVE_PATH);
await listPage.clickSearchByCompanyName(homePage);
await addCompanyNamePopup.isPopupDisplayed(homePage);
await addCompanyNamePopup.fillCompanyName(homePage, companies.century.link);
await addCompanyNamePopup.clickNext(homePage);
await addCompanyNamePopup.fillListName(homePage, listNames[0]);
await addCompanyNamePopup.clickSave(homePage);
await addCompanyNamePopup.clickViewList(homePage);
const nextPage = await clickCompanyName(homePage, browser, companies.century.name);
await companyPage.isOverviewTabPresent(nextPage);
await companyPage.isPeopleTabPresent(nextPage);
await companyPage.isSocialTabPresent(nextPage);
await companyPage.isFinanceTabPresent(nextPage);
await companyPage.isLeaseTabPresent(nextPage);
await homePage.close();
done();
});
}
I expected to get all passed and failed test cases name and write it to JSON with the name of test cases and the result of them.
Actually, I have nothing of this.
You can use true/false assertion approach I like I do in my github project.
for example, try anchor case to some final selector with simple assert:
describe('E2E testing', () => {
it('[Random Color Picker] color button clickable', async () => {
// Setup
let expected = true;
let expectedCssLocator = '#color-button';
let actual;
// Execute
let actualPromise = await page.waitForSelector(expectedCssLocator);
if (actualPromise != null) {
await page.click(expectedCssLocator);
actual = true;
}
else
actual = false;
// Verify
assert.equal(actual, expected);
});
I'm trying to make a InstagramBot that logs in and then go to some profile, my code worked yesterday for awhile and than it just stopped working .
I've tried to clone my repository from github, but it does'n work either, sometimes it works again, but if I try to create another function, the code just ignore the line of the code that changes the page.
I've also tried to create a new page and then in this new page use the goto function and it worked, but the account doesn keep logged in
The version of puppeteer that I'm using: 1.16.0
The version of node.js that I'm using: v10.15.3
const puppeteer = require('puppeteer');
const BASE_URL = "https://www.instagram.com/accounts/login/?hl=en&source=auth_switcher";
const instagram = {
browser: null,
page: null,
profile_url: null,
initialize: async (profile) => {
instagram.browser = await puppeteer.launch({
headless: false
})
instagram.profile_url = await "https://www.instagram.com/" + profile;
instagram.page = await instagram.browser.newPage();
await instagram.page.goto(BASE_URL, {waitUntil: 'networkidle2'});
},
login: async(username, password) =>{
await instagram.page.waitFor(1000);
await instagram.page.type('input[name="username"]', username);
await instagram.page.type('input[name="password"', password);
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
await console.log(instagram.profile_url);
await instagram.page.goto(instagram.profile_url, {timeout: 0, waitUntil: 'domcontentloaded'}); // the code just ignore this line
await instagram.page.waitFor(1000);
},
getPhotosLinks: async() => {
console.log("Do something here");
}
}
module.exports = instagram;
It doesn't give any error message, just doesn't work
Replace
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
with
await Promise.all([
instagram.page.click('button[type="submit"]');,
instagram.page.waitForNavigation()
]);
and see if it works