Running a python file on the button click in Node JS - javascript

I am trying to run a python file (which is actually running a Deep learning model) on a button click using Node JS. I am trying to achieve this using input form in html and routes in index.js file. But this is causing this error after running for a while:
I just want to run the python file in the background, no arguments, no input or output.
This is my index.html file:
<form action="/runpython" method="POST">
<button type="submit">Run python</button>
</form>
And this is my index.js file:
function callName(req, res) {
var spawn = require("child_process").spawn;
var process = spawn("python", ["denoising.py"]);
process.stdout.on("data", function (data) {
res.send(data.toString());
});
}
app.post("/runpython", callName);
Note: This works fine if I have simple print statement in my .py file
print("Hello World!")
But running below code in .py file creates an issue
"""# import modules"""
"""# loading previously trained model"""
import noisereduce as nr
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
from keras.models import load_model
import soundfile as sf
model = load_model(
r'model/denoiser_batchsize_5_epoch_100_sample_2000_org_n_n.hdf5', compile=True)
"""# testing on real world audio
"""
# function of moving point average used for minimizing distortion in denoised audio.
def moving_average(x, w):
return np.convolve(x, np.ones(w), 'valid') / w
# audio , sr = librosa.load(r'real_world_data/noise speech.wav' , res_type='kaiser_fast')
audio, sr = librosa.load(r'real_world_data/winona.wav', res_type='kaiser_fast')
# audio, sr = librosa.load(r'real_world_data/babar.wav', res_type='kaiser_fast')
# audio, sr = librosa.load(r'real_world_data/sarfaraz_eng.wav', res_type='kaiser_fast')
print(audio)
print(len(audio))
ipd.Audio(data=audio, rate=22050)
real_audio_spec = np.abs(librosa.stft(audio))
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(
real_audio_spec, ref=np.max), y_axis='log', x_axis='time', ax=ax)
ax.set_title('Power spectrogram input real audio ')
fig.colorbar(img, ax=ax, format="%+2.0f dB")
ipd.Audio(data=audio, rate=22050)
start = 0
end = 65536
print(len(audio))
print(len(audio)/22050)
split_range = int(len(audio) / 65536)
print(split_range)
predicted_noise = []
input_audio = []
for i in range(split_range):
audio_frame = audio[start:end]
input_audio.append(audio_frame)
audio_reshape = np.reshape(audio_frame, (1, 256, 256, 1))
prediction = model.predict(audio_reshape)
prediction = prediction.flatten()
predicted_noise.append([prediction])
start = start + 65536
end = end + 65536
predicted_noise = np.asarray(predicted_noise).flatten()
input_audio = np.asarray(input_audio).flatten()
real_pred_noise_spec = np.abs(librosa.stft(predicted_noise))
"""## input audio to model"""
ipd.Audio(data=input_audio, rate=22050)
sf.write('input_audio.wav', input_audio.astype(np.float32), 22050, 'PCM_16')
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(
real_pred_noise_spec, ref=np.max), y_axis='log', x_axis='time', ax=ax)
ax.set_title('Power spectrogram pred noise of real audio ')
fig.colorbar(img, ax=ax, format="%+2.0f dB")
ipd.Audio(data=predicted_noise, rate=22050)
sf.write('predicted_noise.wav', predicted_noise.astype(
np.float32), 22050, 'PCM_16')
ipd.Audio(data=moving_average(predicted_noise, 8), rate=22050)
denoised_final_audio = input_audio - predicted_noise
real_denoised_audio_spec = np.abs(librosa.stft(denoised_final_audio))
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(
real_denoised_audio_spec, ref=np.max), y_axis='log', x_axis='time', ax=ax)
ax.set_title('Power spectrogram final denoised real audio ')
fig.colorbar(img, ax=ax, format="%+2.0f dB")
ipd.Audio(data=denoised_final_audio, rate=22050)
sf.write('denoised_final_audio_by_model.wav',
denoised_final_audio.astype(np.float32), 22050, 'PCM_16')
"""## moving point average of the real world denoised signal"""
real_world_mov_avg = moving_average(denoised_final_audio, 4)
print(real_world_mov_avg)
print(len(real_world_mov_avg))
ipd.Audio(data=real_world_mov_avg, rate=22050)
"""## noise reduce library"""
# !pip install noisereduce
"""### nr on real world audio"""
# if you cant import it. than you need to install it using 'pip install noisereduce'
"""#### using noise reduce directly on the real world audio to see how it works on it. """
reduced_noise_direct = nr.reduce_noise(
y=audio.flatten(), sr=22050, stationary=False)
ipd.Audio(data=reduced_noise_direct, rate=22050)
sf.write('denoised_input_audio_direct_by_noisereduce_no_model.wav',
reduced_noise_direct.astype(np.float32), 22050, 'PCM_16')
"""#### using noise reduce on model denoised final output. to make it more clean."""
# perform noise reduction
reduced_noise = nr.reduce_noise(y=real_world_mov_avg.flatten(
), sr=22050, y_noise=predicted_noise, stationary=False)
# wavfile.write("mywav_reduced_noise.wav", rate, reduced_noise)
ipd.Audio(data=reduced_noise, rate=22050)
sf.write('denoised_final_audio_by_model_than_noisereduce_applied.wav',
reduced_noise.astype(np.float32), 22050, 'PCM_16')
print("python code executed")
If there is any alternative, then please let me know. I am new to Node JS and this is the only workable method I found

Why are you using res.send(data.toString());, I don't see any use of this line in your code. Try removing the mentioned code and run again.

Related

How to create an HTML page that says Hello World n times in n different colors from python,

I want to create an HTML page that says Hello World n times in n different colors where n is read from a config file in a different repository
and Spawn the page via python program
it would be more helpful we can use Javascript for the colors. I am able to display the hello world n number of times, but I dont know how to change the colors.
Here is the code I have written so far:
import ConfigParser
import webbrowser
configParser = ConfigParser.RawConfigParser()
configParser.read("/home/suryaveer/check.conf")
num = configParser.get('userinput-config', 'num')
num2 = int(num)
hello = """"hello world """
hello2 = hello*num2
message = """<html><head>
</head><body><p>"""+hello2+"""</p></body>
</html>"""
f = open('x.html', 'w')
f.write(message*num2)
f.close()
webbrowser.open("file:///home/suryaveer/x.html")
In order to style each "hello world" differently, they must be in different HTML blocks. You can achieve this wrapping each instance of "hello world" in a span.
hello = """"<span>hello world </span>"""
This will add no visible content to your page, but will allow each instance to be accessed and styled individually.
If you want to do it all in Python, you could add an inline style to the span when you create it.
colors = [color1, color2, ... colorN] # List of n colors
hello2 = ""
for c in colors:
hello2 += """"<span style='color:{};'>hello world </span>""".format(c)
Let me know if you need more help or want to know about other options (Javascript)
Finally I solved my problem
```
import ConfigParser
import webbrowser
import random
def test():
r = lambda: random.randint(0, 255)
return('#%02X%02X%02X' % (r(),r(),r()))
configParser = ConfigParser.RawConfigParser()
configParser.read("/home/suryaveer/check.conf")
num = configParser.get('userinput-config', 'num')
num2 = int(num)
message = """<p style='color:{};'>hello world</p>"""
print "number read from file : " + str(num2)
f = open('out.html', 'w')
for i in range(0, num2):
print message
f.write(message.format(test()))
f.close()
webbrowser.open("file:///home/suryaveer/out.html")
```

Remove high frequency sound from streaming audio node js

I have a small app that accepts incoming audio stream from the internet and I'm trying to find the frequency of a tone or continuous beep. At the time of the tone / beep it is the only thing that would be playing. The rest of the audio is either silence or talking. I'm using the node-pitchfinder npm module to find the tone and when I use a sample audio clip I made of 2,000Hz the app prints out the frequency within one or two Hz. When I pull the audio stream online I keep getting results like 17,000 Hz. My guess is that there is some "noise" in the audio signal and that's what the node-pitchfinder module is picking up.
Is there any way I can filter out that noise in real time to get an accurate frequency?
The streaming audio file is: http://relay.broadcastify.com/fq85hty701gnm4z.mp3
Code below:
const fs = require('fs');
const fsa = require('fs-extra');
const Lame = require('lame');
const Speaker = require('speaker');
const Volume = require('pcm-volume');
const Analyser = require('audio-analyser')
const request = require('request')
const Chunker = require('stream-chunker');
const { YIN } = require('node-pitchfinder')
const detectPitch = YIN({ sampleRate: 44100})
//const BUFSIZE = 64;
const BUFSIZE = 500;
var decoder = new Lame.Decoder();
decoder.on('format', function(format){onFormat(format)});
var chunker = Chunker(BUFSIZE);
chunker.pipe(decoder);
var options = {
url: 'http://relay.broadcastify.com/fq85hty701gnm4z.mp3',
headers: {
"Upgrade-Insecure-Requests": 1,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15"
}
}
var audio_stream = request(options);
//var audio_stream = fs.createReadStream('./2000.mp3');
audio_stream.pipe(chunker);
function onFormat(format)
{
//if (volume == "undefined")
volume = 1.0;
vol = new Volume(volume);
speaker = new Speaker(format);
analyser = createAnalyser(format);
analyser.on('data', sample);
console.log(format);
vol.pipe(speaker);
vol.pipe(analyser);
decoder.pipe(vol);
vol.setVolume(volume);
}
function createAnalyser(format)
{
return new Analyser({
fftSize: 8,
bufferSize: BUFSIZE,
'pcm-stream': {
channels: format.channels,
sampleRate: format.sampleRate,
bitDepth: format.bitDepth
}
});
}
var logFile = 'log.txt';
var logOptions = {flag: 'a'};
function sample()
{
if (analyser) {
const frequency = detectPitch(analyser._data)
console.log(frequency)
}
}
My goal is to find the most dominant audio frequency in a chunk of data so I can figure out the tone.
I found some code that supposedly does this with python
def getFreq( pkt ):
#Use FFT to determine the peak frequency of the last chunk
thefreq = 0
if len(pkt) == bufferSize*swidth:
indata = np.array(wave.struct.unpack("%dh"%(len(pkt)/swidth), pkt))*window
# filter out everything outside of our bandpass Hz
bp = np.fft.rfft(indata)
minFilterBin = (bandPass[0]/(sampleRate/bufferSize)) + 1
maxFilterBin = (bandPass[1]/(sampleRate/bufferSize)) - 1
for i in range(len(bp)):
if i < minFilterBin:
bp[i] = 0
if i > maxFilterBin:
bp[i] = 0
# Take the fft and square each value
fftData = abs(bp)**2
# find the maximum
which = fftData[1:].argmax() + 1
# Compute the magnitude of the sample we found
dB = 10*np.log10(1e-20+abs(bp[which]))
#avgdB = 10*np.log10(1e-20+abs(bp[which - 10:which + 10].mean()))
if dB >= minDbLevel:
# use quadratic interpolation around the max
if which != len(fftData)-1:
warnings.simplefilter("error")
try:
y0, y1, y2 = np.log(fftData[which-1:which+2:])
x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0)
except RuntimeWarning:
return(-1)
# find the frequency and output it
warnings.simplefilter("always")
thefreq = (which + x1) * sampleRate/bufferSize
else:
thefreq = which * sampleRate/bufferSize
else:
thefreq = -1
return(thefreq)
Original answer:
I can not provide you with a solution but (hopefully) give you enough advice to solve the problem.
I would recommend that you save a part of the stream you want to analyze to a file and then take a look at the file with a spectrum analyzer (e.g. with Audacity). This allows you to determine if the 17kHz signal is present in the audio stream.
If the 17 kHz signal is present in the audio stream then you can filter the audio stream with a low pass filter (e.g. audio-biquad with type lowpass and frequency at somewhere above 2 kHz).
If the 17 kHz signal is not present in the audio then you could try to increase the buffer size BUFSIZE (currently set to 500 in your code). In the example on node-pitchfinder's GitHub page they use a complete audio file for pitch detection. Depending on how the pitch detection algorithm is implemented the result might be different for larger chunks of audio data (i.e. a few seconds) compared to very short chunks (500 samples is around 11 ms at sample rate 44100). Start with a large value for BUFSIZE (e.g. 44100 -> 1 second) and see if it makes a difference.
Explanation of the python code: The code uses FFT (fast fourier transform) to find out which frequencies are present in the audio signal and then searches for the frequency with the highest value. This usually works well for simple signals like a 2 kHz sine wave. You could use dsp.js which provides an FFT implementation if you want to implement it in javascript. However, it is quite a challenge to get this right without some knowledge of digital signal processing theory.
As a side note: the YIN algorithm does not use FFT, it is based on autocorrelation.
Update
The following script uses the fft data of audio-analyser and searches for the maximum frequency. This approach is very basic and only works well for signals where just one frequency is very dominant. The YIN algorithm is much better suited for pitch detection than this example.
const fs = require('fs');
const Lame = require('lame');
const Analyser = require('audio-analyser')
const Chunker = require('stream-chunker');
var analyser;
var fftSize = 4096;
var decoder = new Lame.Decoder();
decoder.on('format', format => {
analyser = createAnalyser(format);
decoder.pipe(analyser);
analyser.on('data', processSamples);
console.log(format);
});
var chunker = Chunker(fftSize);
var audio_stream = fs.createReadStream('./sine.mp3');
audio_stream.pipe(chunker);
chunker.pipe(decoder);
function createAnalyser(format) {
return new Analyser({
fftSize: fftSize,
frequencyBinCount: fftSize / 2,
sampleRate: format.sampleRate,
channels: format.channels,
bitDepth: format.bitDepth
});
}
function processSamples() {
if (analyser) {
var fftData = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(fftData);
var maxBin = fftData.indexOf(Math.max(...fftData));
var thefreq = maxBin * analyser.sampleRate / analyser.fftSize;
console.log(maxBin + " " + thefreq);
}
}

Pull variable value from javascript source using BeautifulSoup4 Python

I'm newbie in python programming. I'm learning beautifulsoup to scrap website.
I want to extract and store the value of "stream" to my variable.
My Python code as follows :
import bs4 as bs #Importing BeautifulSoup4 Python Library.
import urllib.request
import requests
import json
import re
headers = {'User-Agent':'Mozilla/5.0'}
url = "http://thoptv.com/partners/mhdTVlive/Core.php?level=1200&channel=Dsports_HD"
page = requests.get(url)
soup = bs.BeautifulSoup(page.text,"html.parser")
pattern = re.compile('var stream = (.*?);')
scripts = soup.find_all('script')
for script in scripts:
if(pattern.match(str(script.string))):
data = pattern.match(script.string)
links = json.loads(data.groups()[0])
print(links)
This is the source javascript code to get the stream url value.
https://content.jwplatform.com/libraries/oncyToRO.js'>if( navigator.userAgent.match(/android/i)||
navigator.userAgent.match(/webOS/i)||
navigator.userAgent.match(/iPhone/i)||
navigator.userAgent.match(/iPad/i)||
navigator.userAgent.match(/iPod/i)||
navigator.userAgent.match(/BlackBerry/i)||
navigator.userAgent.match(/Windows Phone/i)) {var stream =
"http://ssrigcdnems01.cdnsrv.jio.com/jiotv.live.cdn.jio.com/Dsports_HD/Dsports_HD_800.m3u8?jct=ibxIPxc6rkq1yIUJb4RlEV&pxe=1504146411&st=AQIC5wM2LY4SfczRaEwgGl4Dyvly_3HihdlD_Oduojk5Kxs.AAJTSQACMDIAAlNLABQtNjUxNDEwODczODgxNzkyMzg5OQACUzEAAjYw";}else{var
stream =
"http://hd.simiptv.com:8080//index.m3u8?key=VIoVSsGRLRouHWGNo1epzX&exp=932213423&domain=thoptv.stream&id=461";}jwplayer("THOPTVPlayer").setup({"title":
'thoptv.stream',"stretching":"exactfit","width": "100%","file":
none,"height": "100%","skin": "seven","autostart": "true","logo":
{"file":"https://i.imgur.com/EprI2uu.png","margin":"-0",
"position":"top-left","hide":"false","link":"http://mhdtvlive.co.in"},"androidhls":
true,});jwplayer("THOPTVPlayer").onError(function(){jwplayer().load({file:"http://content.jwplatform.com/videos/7RtXk3vl-52qL9xLP.mp4",image:"http://content.jwplatform.com/thumbs/7RtXk3vl-480.jpg"});jwplayer().play();});jwplayer("THOPTVPlayer").onComplete(function(){window.location
= window.location.href;});jwplayer("THOPTVPlayer").onPlay(function(){clearTimeout(theTimeout);});
I need to extract the url from stream.
var stream = "http://ssrigcdnems01.cdnsrv.jio.com/jiotv.live.cdn.jio.com/Dsports_HD/Dsports_HD_800.m3u8?jct=ibxIPxc6rkq1yIUJb4RlEV&pxe=1504146411&st=AQIC5wM2LY4SfczRaEwgGl4Dyvly_3HihdlD_Oduojk5Kxs.AAJTSQACMDIAAlNLABQtNjUxNDEwODczODgxNzkyMzg5OQACUzEAAjYw";}
Rather then thinking complicated with regex, if the link is the only dynamically changing part, you can split the string with some known separating tokens.
x = """
https://content.jwplatform.com/libraries/oncyToRO.js'>if( navigator.userAgent.match(/android/i)|| navigator.userAgent.match(/webOS/i)|| navigator.userAgent.match(/iPhone/i)|| navigator.userAgent.match(/iPad/i)|| navigator.userAgent.match(/iPod/i)|| navigator.userAgent.match(/BlackBerry/i)|| navigator.userAgent.match(/Windows Phone/i)) {var stream = "http://ssrigcdnems01.cdnsrv.jio.com/jiotv.live.cdn.jio.com/Dsports_HD/Dsports_HD_800.m3u8?jct=ibxIPxc6rkq1yIUJb4RlEV&pxe=1504146411&st=AQIC5wM2LY4SfczRaEwgGl4Dyvly_3HihdlD_Oduojk5Kxs.AAJTSQACMDIAAlNLABQtNjUxNDEwODczODgxNzkyMzg5OQACUzEAAjYw";}else{var stream = "http://hd.simiptv.com:8080//index.m3u8?key=VIoVSsGRLRouHWGNo1epzX&exp=932213423&domain=thoptv.stream&id=461";}jwplayer("THOPTVPlayer").setup({"title": 'thoptv.stream',"stretching":"exactfit","width": "100%","file": none,"height": "100%","skin": "seven","autostart": "true","logo": {"file":"https://i.imgur.com/EprI2uu.png","margin":"-0", "position":"top-left","hide":"false","link":"http://mhdtvlive.co.in"},"androidhls": true,});jwplayer("THOPTVPlayer").onError(function(){jwplayer().load({file:"http://content.jwplatform.com/videos/7RtXk3vl-52qL9xLP.mp4",image:"http://content.jwplatform.com/thumbs/7RtXk3vl-480.jpg"});jwplayer().play();});jwplayer("THOPTVPlayer").onComplete(function(){window.location = window.location.href;});jwplayer("THOPTVPlayer").onPlay(function(){clearTimeout(theTimeout);});
"""
left1, right1 = x.split("Phone/i)) {var stream =")
left2, right2 = right1.split(";}else")
print(left2)
# "http://ssrigcdnems01.cdnsrv.jio.com/jiotv.live.cdn.jio.com/Dsports_HD/Dsports_HD_800.m3u8?jct=ibxIPxc6rkq1yIUJb4RlEV&pxe=1504146411&st=AQIC5wM2LY4SfczRaEwgGl4Dyvly_3HihdlD_Oduojk5Kxs.AAJTSQACMDIAAlNLABQtNjUxNDEwODczODgxNzkyMzg5OQACUzEAAjYw"
pattern.match() matches the pattern from the beginning of the string. Try using pattern.search() instead - it will match anywhere within the string.
Change your for loop to this:
for script in scripts:
data = pattern.search(script.text)
if data is not None:
stream_url = data.groups()[0]
print(stream_url)
You can also get rid of the surrounding quotes by changing the regex pattern to:
pattern = re.compile('var stream = "(.*?)";')
so that the double quotes are not included in the group.
You might also have noticed that there are two possible stream variables depending on the accessing user agent. For tablet like devices the first would be appropriate, while all other user agents should use the second stream. You can use pattern.findall() to get all of them:
>>> pattern.findall(script.text)
['"http://ssrigcdnems01.cdnsrv.jio.com/jiotv.live.cdn.jio.com/Dsports_HD/Dsports_HD_800.m3u8?jct=LEurobVVelOhbzOZ6EkTwr&pxe=1571716053&st=AQIC5wM2LY4SfczRaEwgGl4Dyvly_3HihdlD_Oduojk5Kxs.*AAJTSQACMDIAAlNLABQtNjUxNDEwODczODgxNzkyMzg5OQACUzEAAjYw*"', '"http://hd.simiptv.com:8080//index.m3u8?key=vaERnLJswnWXM8THmfvDq5&exp=944825312&domain=thoptv.stream&id=461"']
this code works for me
import bs4 as bs #Importing BeautifulSoup4 Python Library.
import urllib.request
import requests
import json
headers = {'User-Agent':'Mozilla/5.0'}
url = "http://thoptv.com/partners/mhdTVlive/Core.php?
level=1200&channel=Dsports_HD"
page = requests.get(url)
soup = bs.BeautifulSoup(page.text,"html.parser")
scripts = soup.find_all('script')
out = list()
for c, i in enumerate(scripts): #go over list
text = i.text
if(text[:2] == "if"): #if the (if) comes first
for count, t in enumerate(text): # then we have reached the correct item in the list
if text[count] == "{" and text[count + 1] == "v" and text[count + 5] == "s": # and if this is here that stream is set
tmp = text[count:] # add this to the tmp varible
break # and end
co = 0
for m in tmp: #loop over the results from prev. result
if m == "\"" and co == 0: #if string is starting
co = 1 #set count to "true" 1
elif m == "\"" and co == 1: # if it is ending stop
print(''.join(out)) #results
break
elif co == 1:
# as long as we are looping over the rigth string
out.append(m) #add to out list
pass
result = ''.join(out) #set result
it basicly filters the string manuely.
but if we use user1767754 method (brilliant by the way) we will end up something like this:
import bs4 as bs #Importing BeautifulSoup4 Python Library.
import urllib.request
import requests
import json
headers = {'User-Agent':'Mozilla/5.0'}
url = "http://thoptv.com/partners/mhdTVlive/Core.php?level=1200&channel=Dsports_HD"
page = requests.get(url)
soup = bs.BeautifulSoup(page.text,"html.parser")
scripts = soup.find_all('script')
x = scripts[3].text
left1, right1 = x.split("Phone/i)) {var stream =")
left2, right2 = right1.split(";}else")
print(left2)

python uuid5 equivalent in javascript nodejs

I am trying to convert python code to node js. Need help in converting this code to JS.
```
import uuid
uuid_salt = '9909fa72-b690-55dd-ab71-a987953bb438'
x = 'hello'
uuid_salt = uuid.UUID(uuid_salt)
salted_uuid = lambda x: str(uuid.uuid5(uuid_salt, x))
print salted_uuid(x)
```
Expected Output - 3e735408-7f83-53cf-b7ce-f9ef69e5ca43
I tried writing this way but output does not match
var uuid_salt = '9909fa72-b690-55dd-ab71-a987953bb438'
var x = 'hello'
var hmac = crypto.createHmac('sha1', uuid_salt);
hmac.setEncoding('hex');
hmac.end(x, function () {
hash = hmac.read();
console.log('hash >>> ', hash);
});
here is the actual python function copy paste from library
def uuid5(namespace, name):
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
from hashlib import sha1
hash = sha1(namespace.bytes + name).digest()
return UUID(bytes=hash[:16], version=5)
I guess the bytes part is missing i tried created bytes array in JavaScript/Node but still the output is off.
Please help.
Thanks in advance!

Python: 'Incorrect padding' error

I am trying to base64 encode matplotlib generated plots and write them to a html page on GAE.
Later, digitize the plots
Finally, converting the html page to pdf using xhtml2pdf library.
The code worked well if plots are created by jqplot. However once switched to matplotlib, I am having an error called Incorrect padding, which I do not know how to solve. Thanks for any suggestions.
Step 1
Python CODE:
import matplotlib
import matplotlib.pyplot as plt
import StringIO
import urllib, base64
plt.figure(1)
plt.hist([2,3,4,5,6,7], bins=3)
fig = plt.gcf()
imgdata = StringIO.StringIO()
fig.savefig(imgdata, format='png')
imgdata.seek(0) # rewind the data
uri_1 = 'data:image/png;base64,'
uri_2 = urllib.quote(base64.b64encode(imgdata.buf))
uri_2 += "=" * ((4 - len(uri_2 ) % 4) % 4)
uri_3 = uri_1 + uri_2
uri_4 = '<img id="chart1" src = "%s"/>' % (uri_3)
Step 2
Javascript
var n_plot = $('img[id^="chart"]').size();
i=1;
var imgData = [];
while(i <= n_plot){
//sometimes the plots are generated under jqplot
try{
imgData.push($('#chart'+i).jqplotToImageStr({}));
i=i+1
}
// This case is generated by matplotlib
catch(e){
imgData.push($('#chart'+i).attr('src'));
i=i+1
}
}
imgData_json = JSON.stringify(imgData)
Step 3
Python
pdf = pisa.CreatePDF(imgData_json, file(filename, "wb"))
That's the problem:
uri_2 += "=" * ((4 - len(uri_2 ) % 4) % 4)
Why do you think the url needs extra padding? The output of b64encode will already be padded if neccessary, adding extra padding will only confuse the decoder. Some may ignore the extra padding, most however will just produce an error.
Oh, and also for data urls it's unnecessary to quote the string - base64 doesn't contain characters that really need to be escaped. That's only needed if you need to use base64 in a regular url, as + and = can cause problems, but that's not the case for data urls.

Categories