I'm using the nodejs xml parser sax-js to get content from xml. The structure of the xml is as follows:
<item>
<title>Some title</title>
<guid isPermaLink="false">http://example.com</guid>
</item>
<item>
<title>VIDEO: Some title</title>
<guid isPermaLink="false">http://example1.com</guid>
</item>
I want all urls under guid whose title donot start with VIDEO.
Currently, it's giving me all the urls.
My code currently is:
'use strict';
var sax = require('sax-js');
var request = require('request');
var href = 'http://some-xml-url.xml';
var urls = [];
var isTextPending = false;
var saxStream = sax.createStream(true);
saxStream.on('error', function (e) {
console.error(e);
});
saxStream.ontext = function (text) {
if(isTextPending) {
urls.push(text);
isTextPending = false;
}
};
saxStream.on('opentag', function (node) {
if(node.name === 'guid' && node.attributes.isPermaLink === 'false') {
isTextPending = true;
}
});
saxStream.end = function () {
}
request(href).pipe(saxStream);
You will need to handle more states that just 'isTextPending'.
Here is an example (note that this also handles 'closetag' event to exclude text between tags from the processing).
'use strict';
var sax = require('sax-js');
var request = require('request');
var href = 'http://some-xml-url.xml';
var urls = [];
var tagName = undefined;
var isValidGuid = false;
var isValidTitle = false;
var guidUrl = undefined;
var saxStream = sax.createStream(true);
saxStream.on('error', function (e) {
console.error(e);
});
saxStream.ontext = function (text) {
if (tagName === 'guid' && isValidGuid) {
guidUrl = text;
}
else if (tagName === 'title') {
isValidTitle = !(text.indexOf('VIDEO') === 0);
}
else return;
if (guidUrl !== undefined && isValidTitle) {
urls.push(guidUrl);
}
};
saxStream.on('opentag', function (node) {
tagName = node.name;
switch(node.name) {
case 'guid':
isValidGuid = (node.attributes.isPermaLink === 'false');
break
case 'item':
isValidGuid = false;
isValidTitle = false;
guidUrl = undefined;
break;
}
});
saxStream.on('closetag', function (node) {
tagName = undefined;
});
saxStream.end = function () {
console.log('Result: '+JSON.stringify(urls));
};
request(href).pipe(saxStream);
Related
So for some reason, the JavaScript that I'm using is not working on IE - There are errors which I will point out below. If someone knows anything else that I can try or knows how I can manipulate the code to make it more IE friendly, I'd really appreciate it.
Here are the steps that I've taken:
- Use https://babeljs.io/ to convert the whole page to ES2015.
- Added a polyfill script tag from https://polyfill.io/
Lots of code below (Whole general.js file which I've already converted using Babel (Please let me know if you want me to upload the original general.js file)):
Everything below rtd3Confirmation function is supposed to be:
for (var inputElement of rtd3ChangeClass) {
inputElement.addEventListener('change', rtd3Confirmation);
}
but that was before babel converted it.
"use strict";
// Form wrapper variables
var contactFormID = document.getElementById('contactForm');
var formWrapperSpecific = document.getElementById('form-wrapper-specific');
var formWrapperCertainSelection = document.getElementById('form-wrapper-certain-selection');
var formWrapperCertain = document.getElementById('form-wrapper-certain');
var formWrapperConfirm = document.getElementById('rtd3Confirm'); // Alert variables
var stateAlertID = document.getElementById('stateWarning');
var stateQuery = document.querySelector('#stateWarning b#stateName');
var resident = document.getElementById('resident');
var is_submitted = document.getElementsByClassName('is-submitted'); // Right to Know variables
var rtk5_selection = document.getElementById('rtk5');
var rtk5declaration = document.getElementById('rtk5Declaration'); // Right to Delete variables
var rtdChange = document.getElementById('rtd3');
var rtd3ChangeClass = document.querySelectorAll(".rtd3_change"); // Array for states
var states = []; // Once the DOM has loaded, call functions
document.addEventListener('DOMContentLoaded', function () {
formHandler();
residentAlert();
rtdCheckboxSelection();
rtd3Confirmation();
rtk5Declaration();
get_states();
}); // Show/hide form depending if state is included in array on dropdown selection
function formHandler() {
contactFormID.style.display = 'block';
if (resident == null) return;
if (!states.includes(resident.value)) contactFormID.style.display = 'none';
resident.addEventListener('change', formHandler);
} // Show/hide state alert for non-residents
function residentAlert() {
if (resident !== null) {
resident.addEventListener('change', function () {
// If value in states array is true, show the form
if (!states.includes(resident.value)) {
stateAlertID.style.display = 'block';
stateQuery.textContent = resident.options[resident.selectedIndex].text;
} else {
stateAlertID.style.display = 'none';
}
});
}
} // Show states dropdown depending on PHP variables
function get_states() {
var data_states = contactFormID.getAttribute('data-states').match(/\w{1,}/g);
data_states.forEach(function (state, i) {
return states[i] = state;
});
} // If RTK5 is selected, show declaration field and set required tag
function rtk5Declaration() {
if (!rtk5_selection.checked) {
formWrapperSpecific.style.display = 'none';
rtk5declaration.removeAttribute('required');
} else {
formWrapperSpecific.style.display = '';
rtk5declaration.setAttribute('required', 'required');
}
}
rtk5_selection.addEventListener('change', rtk5Declaration); // If RTD3 is selected, show/hide more checkboxes
function rtdCheckboxSelection() {
formWrapperCertain.style.display = rtdChange.checked ? '' : 'none';
document.querySelectorAll('[name="rtd[checked]"]').forEach(function (r) {
return r.addEventListener('change', rtdCheckboxSelection);
});
} // If at least one checkbox inside rtd3 is checked, show confirmation and make required
function rtd3Confirmation() {
if (document.querySelectorAll('.rtd3_change:checked').length) {
formWrapperCertainSelection.style.display = '';
formWrapperConfirm.required = true;
} else {
formWrapperCertainSelection.style.display = 'none';
formWrapperConfirm.required = false;
}
}
var _iteratorNormalCompletion = true;
var _didIteratorError = false;
var _iteratorError = undefined;
try {
for (var _iterator = rtd3ChangeClass[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
var inputElement = _step.value;
inputElement.addEventListener('change', rtd3Confirmation);
}
} catch (err) {
_didIteratorError = true;
_iteratorError = err;
} finally {
try {
if (!_iteratorNormalCompletion && _iterator.return != null) {
_iterator.return();
}
} finally {
if (_didIteratorError) {
throw _iteratorError;
}
}
}
I am new to Automation, I am using Selenium,ruby,capybara to execute this JS script and I get this error message,any help appreciated TIA
Error message
Selenium::WebDriver::Error::UnknownError: unknown error: Runtime.evaluate threw exception: SyntaxError: Invalid or unexpected token
page.execute_script('(function() {
function renderField($el, mode) {
var limitMet,
field = $el.data(\'add-field\'),
section = $el.data(\'section\');
window.DADI.editor.freeSections.forEach(function(freeSection) {
if (section === freeSection.name) {
freeSection.fields.forEach(function(sectionField) {
if(field === sectionField.source) {
var count = $(\'#section-\' + section).find(\'[data-field="\'+field+\'"]\').length;
if (sectionField.max && count >= sectionField.max) {
limitMet = true;
}
}
}.bind(this))
}
}.bind(this))
if (!limitMet) {
if (!window.DADI.editor.types[field]) return false;
var template = window.DADI.editor.types[field]._local.layouts.article[0].replace(\'.dust\', \'\');
var html;
if (template) {
var templateData = window.DADI.editor.types[field];
templateData.params = window.DADI.editor.params,
templateData.free = true;
templateData.fieldName = field;
window.DADI.render.render(\'fields/\' + template, \'#section-\' + section, templateData, {mode: mode}, function (err, out) {
if (err) {
html = err;
} else {
html = $(out);
if ($(\'.selectize\', html) && $(\'.selectize\', html).length) {
dadiSelect($(\'.selectize\', html));
}
if (html.attr(\'data-ql-editable\')) {
createEditor(html, 0);
html.focus();
}
var fieldType = window.DADI.editor.types[field]._remote._publishType;
var handler = window.DADI.editor.handlers[fieldType];
if (handler && (typeof handler.initialiseField === \'function\')) {
handler.initialiseField(html);
}
}
});
return html;
}
}
}
# This takes the button (source) element and drops it into the target area and subsequently renders the appropriate cms fields.
function simulateDragAndDrop(source, target)
{
var $clone = source.clone();
$(target).prepend($clone, target);
if ($clone.hasClass(\'dadiCells-library__element\')) {
renderedHtml = renderField($clone, \'none\');
$clone.replaceWith(renderedHtml);
}
}
var source = $($(\'.dadiCells-library__element\')[1]); #E.g. The hero video button
var target = $(\'#section-hero\'); #The target drop zone.
simulateDragAndDrop(source, target);
})()')
I had a similar issue. What fixed mine was replacing single backslahes / with double backslahes //
I am currently using the code below to detect if a URL is pasted into a contenteditable div. If a URL is pasted, it will automatically be converted into a link (surrounded by a tags).
How would I change this so that if the user pastes an image URL, it would be converted to <img src="https://example.com/image.jpg"> whilst also converting non-image URL's to standard links (surrounded by a tags).
var saveSelection, restoreSelection;
if (window.getSelection && document.createRange) {
saveSelection = function(containerEl) {
var range = window.getSelection().getRangeAt(0);
var preSelectionRange = range.cloneRange();
preSelectionRange.selectNodeContents(containerEl);
preSelectionRange.setEnd(range.startContainer, range.startOffset);
var start = preSelectionRange.toString().length;
return {
start: start,
end: start + range.toString().length
}
};
} else if (document.selection) {
}
function createLink(matchedTextNode) {
var el = document.createElement("a");
el.href = matchedTextNode.data;
el.appendChild(matchedTextNode);
return el;
}
function shouldLinkifyContents(el) {
return el.tagName != "A";
}
function surroundInElement(el, regex, surrounderCreateFunc, shouldSurroundFunc) {
var child = el.lastChild;
while (child) {
if (child.nodeType == 1 && shouldSurroundFunc(el)) {
surroundInElement(child, regex, createLink, shouldSurroundFunc);
} else if (child.nodeType == 3) {
surroundMatchingText(child, regex, surrounderCreateFunc);
}
child = child.previousSibling;
}
}
function surroundMatchingText(textNode, regex, surrounderCreateFunc) {
var parent = textNode.parentNode;
var result, surroundingNode, matchedTextNode, matchLength, matchedText;
while ( textNode && (result = regex.exec(textNode.data)) ) {
matchedTextNode = textNode.splitText(result.index);
matchedText = result[0];
matchLength = matchedText.length;
textNode = (matchedTextNode.length > matchLength) ?
matchedTextNode.splitText(matchLength) : null;
surroundingNode = surrounderCreateFunc(matchedTextNode.cloneNode(true));
parent.insertBefore(surroundingNode, matchedTextNode);
parent.removeChild(matchedTextNode);
}
}
var textbox = $('.editable')[0];
var urlRegex = /http(s?):\/\/($|[^\s]+)/;
function updateLinks() {
var savedSelection = saveSelection(textbox);
surroundInElement(textbox, urlRegex, createLink, shouldLinkifyContents);
restoreSelection(textbox, savedSelection);
}
var $textbox = $(textbox);
$(document).ready(function () {
$textbox.focus();
var keyTimer = null, keyDelay = 1000;
$textbox.keyup(function() {
if (keyTimer) {
window.clearTimeout(keyTimer);
}
keyTimer = window.setTimeout(function() {
updateLinks();
keyTimer = null;
}, keyDelay);
});
});
Did you try to parse the pasted url, and search for ending extension (jpg,gif,png) ?
It should be simple, if the ending is matching one of those, then you wrap the url into an href propriety.
Did You wrote this code by yourself?
Here you can read about strings methods to do this:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String
How can I get the latest page data (HTML & Javascript varaibles) from PhantomJS
e.g page.refresh() or something?
I have an Interval, than checks a variable (on the page) every 200ms. However, this variable and the page content, isn't shown to have changed over time. (even though I know it has)
So I need an efficient way to check the value of a JS variable every 200ms or so,
then once I've discovered that variable has changed value, I want to request the latest page HTML.
How can I do this?
var Error = function (description) {
this.description = description;
return this;
};
var DTO = function (status, content, error) {
this.status = status;
this.content = content;
this.error = error;
return this;
};
function outputAndExit(dto) {
console.log(JSON.stringify(dto));
phantom.exit();
}
//For any uncaught exception, just log it out for .NET to capture
window.onerror = function (errorMsg, url, lineNumber) {
var description = 'window.onerror caught an error: ' +
'errorMsg: ' + errorMsg +
'url: ' + url +
'lineNumber: ' + lineNumber;
outputAndExit(new DTO(false, null, new Error(description)));
};
var GetDynamicPageResult__ = function () {
var obj = new GetDynamicPageResult();
obj.initialize();
return obj;
};
var GetDynamicPageResult = function () {
var self = this;
this.initialize = function () {
this.error = null;
this.isContentReadyForCrawler = false;
this.ticker = null;
this.tickerInterval = 150;
this.tickerElapsed = 0;
this.url = '';
this.loadDependencies();
this.processArgs();
this.openPage();
};
this.loadDependencies = function () {
this.system = require('system'),
this.page = require('webpage').create(),
this.page.injectJs('jquery-1.10.2.min');
this.fs = require('fs');
};
this.processArgs = function () {
if (this.system.args.length == 0) {
outputAndExit(new DTO(false, null, new Error('No arguments given')));
}
//system.args[0] Was the name of this script
this.url = this.system.args[1];
};
this.updateIsContentReadyForCrawler = function () {
var updateIsContentReadyForCrawler = self.page.evaluate(function () {
self.isContentReadyForCrawler = window.isContentReadyForCrawler;
});
};
this.openPage = function () {
self.page.open(this.url, function (status) { //NB: status = 'success' || 'fail'
if (status !== 'success') {
outputAndExit(new DTO(false, null, new Error('page.open received a non-success status')));
}
self.initTicker();
});
};
this.initTicker = function () {
this.ticker = setInterval(self.handleTick, self.tickerInterval);
};
this.handleTick = function () {
self.tickerElapsed += self.tickerInterval;
self.updateIsContentReadyForCrawler();
if (self.isContentReadyForCrawler) {
clearInterval(self.ticker);
var content = self.page.content;
self.finish(true, content, null);
} else {
var tooMuchTimeElapsed = self.tickerElapsed > 7000;
if (tooMuchTimeElapsed) {
clearInterval(self.ticker);
self.finish(false, null, new Error('Too much time elapsed'));
}
}
};
this.finish = function (status, content, error) {
content = content || '';
error = error || {};
outputAndExit(new DTO(status, content, error));
};
};
/**********************************************************************************/
/***************************** Helpers *****************************/
/**********************************************************************************/
var Utility__ = function () {
var obj = new Utility();
obj.initialize();
return obj;
};
var Utility = function () {
var self = this;
this.initialize = function () {
};
this.isEmpty = function (obj) {
var isEmpty = false;
(obj == undefined || obj == null) && (isEmpty = true);
return isEmpty;
};
this.isStringEmpty = function (str) {
var isEmpty = false;
isEmpty(str) && (isEmpty = true);
(isEmpty == false && $.trim(str) == '') && (isEmpty = true);
return isEmpty;
};
};
var getDynamicPageResult = new GetDynamicPageResult__();
I think you are almost there: you need to be using page.evaluate(), but currently only use it to get window.isContentReadyForCrawler. You need to use page.evaluate() to grab the latest HTML too.
I'm going to shamelessly paste in code from another answer (https://stackoverflow.com/a/12044474/841830):
var html = page.evaluate(function () {
var root = document.getElementsByTagName("html")[0];
var html = root ? root.outerHTML : document.body.innerHTML;
return html;
});
I have a sample code:
function getKeyword() {
var instance = this;
var googlePattern = /(www\.google\..*)/;
this.params = function(parameters) {
var result = [];
var params = parameters.split("&");
for(var p in params) {
var kv = params[p].split("=");
result[kv[0]] = kv[1];
}
return result;
};
this.googleKeywords = function(params){
var query = params["q"];
var pattern = /"(.*?)"|(\w+)/g;
return decodeURIComponent(query).replace(/\+/g, " ").match(pattern);
};
this.parseReferrer = function(){
var result = [];
var pathAndParams = document.referrer.split("?");
if(pathAndParams.length == 2) {
var path = pathAndParams[0];
var params = this.params(pathAndParams[1]);
if(path.search(googlePattern) > 0) {
result = this.googleKeywords(params);
}
}
return result;
};
return this.parseReferrer();
}
And then:
<script type="text/javascript">
if (document.referrer && document.referrer != "") {
if (document.referrer.search(/google\.*/i) != -1){
var keyword = getKeyword();
alert(keyword);
} else {
alert('Not search from google');
}
} else {
alert('Not referrer');
}
</script>
Ex: when i search with keyword is "iphone 5", result not show alert("iphone 5") ? How to fix it ?
The JavaScript for in construct loops over more than just the entries in the array. If you want to use for in you need to make sure that you're only processing the actual parameters. This is easiest accomplished by checking for hasOwnProperty:
this.params = function(parameters) {
var result = [];
var params = parameters.split("&");
for(var p in params) {
if (params.hasOwnProperty(p))
{
var kv = params[p].split("=");
result[kv[0]] = kv[1];
}
}
return result;
};
Alternatively you can use a regular for loop over the array:
this.params = function(parameters) {
var result = [];
var params = parameters.split("&");
for(var i=0; i < params.length; i++) {
var kv = params[i].split("=");
result[kv[0]] = kv[1];
}
return result;
};