How to calculate md5 hash of a file using javascript - javascript

Is there a way to calculate the MD5 hash of a file before the upload to the server using Javascript?

While there are JS implementations of the MD5 algorithm, older browsers are generally unable to read files from the local filesystem.
I wrote that in 2009. So what about new browsers?
With a browser that supports the FileAPI, you can read the contents of a file - the user has to have selected it, either with an <input> element or drag-and-drop. As of Jan 2013, here's how the major browsers stack up:
FF 3.6 supports FileReader, FF4 supports even more file based functionality
Chrome has supported the FileAPI since version 7.0.517.41
Internet Explorer 10 has partial FileAPI support
Opera 11.10 has partial support for FileAPI
Safari - I couldn't find a good official source for this, but this site suggests partial support from 5.1, full support for 6.0. Another article reports some inconsistencies with the older Safari versions
How?
See the answer below by Benny Neugebauer which uses the MD5 function of CryptoJS

I've made a library that implements incremental md5 in order to hash large files efficiently.
Basically you read a file in chunks (to keep memory low) and hash it incrementally.
You got basic usage and examples in the readme.
Be aware that you need HTML5 FileAPI, so be sure to check for it.
There is a full example in the test folder.
https://github.com/satazor/SparkMD5

it is pretty easy to calculate the MD5 hash using the MD5 function of CryptoJS and the HTML5 FileReader API. The following code snippet shows how you can read the binary data and calculate the MD5 hash from an image that has been dragged into your Browser:
var holder = document.getElementById('holder');
holder.ondragover = function() {
return false;
};
holder.ondragend = function() {
return false;
};
holder.ondrop = function(event) {
event.preventDefault();
var file = event.dataTransfer.files[0];
var reader = new FileReader();
reader.onload = function(event) {
var binary = event.target.result;
var md5 = CryptoJS.MD5(binary).toString();
console.log(md5);
};
reader.readAsBinaryString(file);
};
I recommend to add some CSS to see the Drag & Drop area:
#holder {
border: 10px dashed #ccc;
width: 300px;
height: 300px;
}
#holder.hover {
border: 10px dashed #333;
}
More about the Drag & Drop functionality can be found here: File API & FileReader
I tested the sample in Google Chrome Version 32.

The following snippet shows an example, which can archive a throughput of 400 MB/s while reading and hashing the file.
It is using a library called hash-wasm, which is based on WebAssembly and calculates the hash faster than js-only libraries. As of 2020, all modern browsers support WebAssembly.
const chunkSize = 64 * 1024 * 1024;
const fileReader = new FileReader();
let hasher = null;
function hashChunk(chunk) {
return new Promise((resolve, reject) => {
fileReader.onload = async(e) => {
const view = new Uint8Array(e.target.result);
hasher.update(view);
resolve();
};
fileReader.readAsArrayBuffer(chunk);
});
}
const readFile = async(file) => {
if (hasher) {
hasher.init();
} else {
hasher = await hashwasm.createMD5();
}
const chunkNumber = Math.floor(file.size / chunkSize);
for (let i = 0; i <= chunkNumber; i++) {
const chunk = file.slice(
chunkSize * i,
Math.min(chunkSize * (i + 1), file.size)
);
await hashChunk(chunk);
}
const hash = hasher.digest();
return Promise.resolve(hash);
};
const fileSelector = document.getElementById("file-input");
const resultElement = document.getElementById("result");
fileSelector.addEventListener("change", async(event) => {
const file = event.target.files[0];
resultElement.innerHTML = "Loading...";
const start = Date.now();
const hash = await readFile(file);
const end = Date.now();
const duration = end - start;
const fileSizeMB = file.size / 1024 / 1024;
const throughput = fileSizeMB / (duration / 1000);
resultElement.innerHTML = `
Hash: ${hash}<br>
Duration: ${duration} ms<br>
Throughput: ${throughput.toFixed(2)} MB/s
`;
});
<script src="https://cdn.jsdelivr.net/npm/hash-wasm"></script>
<!-- defines the global `hashwasm` variable -->
<input type="file" id="file-input">
<div id="result"></div>

HTML5 + spark-md5 and Q
Assuming your'e using a modern browser (that supports HTML5 File API), here's how you calculate the MD5 Hash of a large file (it will calculate the hash on variable chunks)
function calculateMD5Hash(file, bufferSize) {
var def = Q.defer();
var fileReader = new FileReader();
var fileSlicer = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
var hashAlgorithm = new SparkMD5();
var totalParts = Math.ceil(file.size / bufferSize);
var currentPart = 0;
var startTime = new Date().getTime();
fileReader.onload = function(e) {
currentPart += 1;
def.notify({
currentPart: currentPart,
totalParts: totalParts
});
var buffer = e.target.result;
hashAlgorithm.appendBinary(buffer);
if (currentPart < totalParts) {
processNextPart();
return;
}
def.resolve({
hashResult: hashAlgorithm.end(),
duration: new Date().getTime() - startTime
});
};
fileReader.onerror = function(e) {
def.reject(e);
};
function processNextPart() {
var start = currentPart * bufferSize;
var end = Math.min(start + bufferSize, file.size);
fileReader.readAsBinaryString(fileSlicer.call(file, start, end));
}
processNextPart();
return def.promise;
}
function calculate() {
var input = document.getElementById('file');
if (!input.files.length) {
return;
}
var file = input.files[0];
var bufferSize = Math.pow(1024, 2) * 10; // 10MB
calculateMD5Hash(file, bufferSize).then(
function(result) {
// Success
console.log(result);
},
function(err) {
// There was an error,
},
function(progress) {
// We get notified of the progress as it is executed
console.log(progress.currentPart, 'of', progress.totalParts, 'Total bytes:', progress.currentPart * bufferSize, 'of', progress.totalParts * bufferSize);
});
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/q.js/1.4.1/q.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/spark-md5/2.0.2/spark-md5.min.js"></script>
<div>
<input type="file" id="file"/>
<input type="button" onclick="calculate();" value="Calculate" class="btn primary" />
</div>

You need to to use FileAPI. It is available in the latest FF & Chrome, but not IE9.
Grab any md5 JS implementation suggested above. I've tried this and abandoned it because JS was too slow (minutes on large image files). Might revisit it if someone rewrites MD5 using typed arrays.
Code would look something like this:
HTML:
<input type="file" id="file-dialog" multiple="true" accept="image/*">
JS (w JQuery)
$("#file-dialog").change(function() {
handleFiles(this.files);
});
function handleFiles(files) {
for (var i=0; i<files.length; i++) {
var reader = new FileReader();
reader.onload = function() {
var md5 = binl_md5(reader.result, reader.result.length);
console.log("MD5 is " + md5);
};
reader.onerror = function() {
console.error("Could not read the file");
};
reader.readAsBinaryString(files.item(i));
}
}

Apart from the impossibility to get
file system access in JS, I would not
put any trust at all in a
client-generated checksum. So
generating the checksum on the server
is mandatory in any case. – Tomalak
Apr 20 '09 at 14:05
Which is useless in most cases. You want the MD5 computed at client side, so that you can compare it with the code recomputed at server side and conclude the upload went wrong if they differ. I have needed to do that in applications working with large files of scientific data, where receiving uncorrupted files were key. My cases was simple, cause users had the MD5 already computed from their data analysis tools, so I just needed to ask it to them with a text field.

If sha256 is also fine:
async sha256(file: File) {
// get byte array of file
let buffer = await file.arrayBuffer();
// hash the message
const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
// convert ArrayBuffer to Array
const hashArray = Array.from(new Uint8Array(hashBuffer));
// convert bytes to hex string
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
return hashHex;
}

To get the hash of files, there are a lot of options. Normally the problem is that it's really slow to get the hash of big files.
I created a little library that get the hash of files, with the 64kb of the start of the file and the 64kb of the end of it.
Live example: http://marcu87.github.com/hashme/ and library: https://github.com/marcu87/hashme

hope you have found a good solution by now. If not, the solution below is an ES6 promise implementation based on js-spark-md5
import SparkMD5 from 'spark-md5';
// Read in chunks of 2MB
const CHUCK_SIZE = 2097152;
/**
* Incrementally calculate checksum of a given file based on MD5 algorithm
*/
export const checksum = (file) =>
new Promise((resolve, reject) => {
let currentChunk = 0;
const chunks = Math.ceil(file.size / CHUCK_SIZE);
const blobSlice =
File.prototype.slice ||
File.prototype.mozSlice ||
File.prototype.webkitSlice;
const spark = new SparkMD5.ArrayBuffer();
const fileReader = new FileReader();
const loadNext = () => {
const start = currentChunk * CHUCK_SIZE;
const end =
start + CHUCK_SIZE >= file.size ? file.size : start + CHUCK_SIZE;
// Selectively read the file and only store part of it in memory.
// This allows client-side applications to process huge files without the need for huge memory
fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
};
fileReader.onload = e => {
spark.append(e.target.result);
currentChunk++;
if (currentChunk < chunks) loadNext();
else resolve(spark.end());
};
fileReader.onerror = () => {
return reject('Calculating file checksum failed');
};
loadNext();
});

There is a couple scripts out there on the internet to create an MD5 Hash.
The one from webtoolkit is good, http://www.webtoolkit.info/javascript-md5.html
Although, I don't believe it will have access to the local filesystem as that access is limited.

This is another hash-wasm example, but using the streams API, instead of having to set FileReader:
async function calculateSHA1(file: File) {
const hasher = await createSHA1()
const hasherStream = new WritableStream<Uint8Array>({
start: () => {
hasher.init()
// you can set UI state here also
},
write: chunk => {
hasher.update(chunk)
// you can set UI state here also
},
close: () => {
// you can set UI state here also
},
})
await file.stream().pipeTo(hasherStream)
return hasher.digest('hex')
}

I don't believe there is a way in javascript to access the contents of a file upload. So you therefore cannot look at the file contents to generate an MD5 sum.
You can however send the file to the server, which can then send an MD5 sum back or send the file contents back .. but that's a lot of work and probably not worthwhile for your purposes.

Related

How to write a large amount of random bytes to file

I need to generate some files of varying sizes to test an application, I thought the easiest way to do so would be to write a Node script to do so.
I wrote the following code but the process crashes when the file size exceeds my memory.
const fs = require("fs");
const crypto = require('crypto');
const gb = 1024 * 1024 * 1024;
const data = crypto.randomBytes(gb * 5);
fs.writeFile('bytes.bin', data, (err) => {
if (err) throw err;
console.log('The file has been saved!');
});
On top of your memory issue you will also have an issue with the crypto module as the amount of bytes it can generate is limited.
You will need to use fs.createWriteStream to generate and write the data in chunks rather than generating it in one go.
Here is a modified version of some code from the Node documentation on streams to stream chunks of random bytes to a file:
const fs = require("fs");
const crypto = require('crypto');
const fileName = "random-bytes.bin";
const fileSizeInBytes = Number.parseInt(process.argv[2]) || 1000;
console.log(`Writing ${fileSizeInBytes} bytes`)
const writer = fs.createWriteStream(fileName)
writetoStream(fileSizeInBytes, () => console.log(`File created: ${fileName}`));
function writetoStream(bytesToWrite, callback) {
const step = 1000;
let i = bytesToWrite;
write();
function write() {
let ok = true;
do {
const chunkSize = i > step ? step : i;
const buffer = crypto.randomBytes(chunkSize);
i -= chunkSize;
if (i === 0) {
// Last time!
writer.write(buffer, callback);
} else {
// See if we should continue, or wait.
// Don't pass the callback, because we're not done yet.
ok = writer.write(buffer);
}
} while (i > 0 && ok);
if (i > 0) {
// Had to stop early!
// Write some more once it drains.
writer.once('drain', write);
}
}
}
There are also online tools which let you generate files of your required size with less setup. The files are also generated on your system so they don't have to be downloaded over the wire.

Correctly determine correct file type client side in javascript [duplicate]

I have read this and this questions which seems to suggest that the file MIME type could be checked using JavaScript on client side. Now, I understand that the real validation still has to be done on server side. I want to perform a client side checking to avoid unnecessary wastage of server resource.
To test whether this can be done on client side, I changed the extension of a JPEG test file to .png and choose the file for upload. Before sending the file, I query the file object using a JavaScript console:
document.getElementsByTagName('input')[0].files[0];
This is what I get on Chrome 28.0:
File {webkitRelativePath: "", lastModifiedDate: Tue Oct 16 2012
10:00:00 GMT+0000 (UTC), name: "test.png", type: "image/png", size:
500055…}
It shows type to be image/png which seems to indicate that the checking is done based on file extension instead of MIME type. I tried Firefox 22.0 and it gives me the same result. But according to the W3C spec, MIME Sniffing should be implemented.
Am I right to say that there is no way to check the MIME type with JavaScript at the moment? Or am I missing something?
You can easily determine the file MIME type with JavaScript's FileReader before uploading it to a server. I agree that we should prefer server-side checking over client-side, but client-side checking is still possible. I'll show you how and provide a working demo at the bottom.
Check that your browser supports both File and Blob. All major ones should.
if (window.FileReader && window.Blob) {
// All the File APIs are supported.
} else {
// File and Blob are not supported
}
Step 1:
You can retrieve the File information from an <input> element like this (ref):
<input type="file" id="your-files" multiple>
<script>
var control = document.getElementById("your-files");
control.addEventListener("change", function(event) {
// When the control has changed, there are new files
var files = control.files,
for (var i = 0; i < files.length; i++) {
console.log("Filename: " + files[i].name);
console.log("Type: " + files[i].type);
console.log("Size: " + files[i].size + " bytes");
}
}, false);
</script>
Here is a drag-and-drop version of the above (ref):
<div id="your-files"></div>
<script>
var target = document.getElementById("your-files");
target.addEventListener("dragover", function(event) {
event.preventDefault();
}, false);
target.addEventListener("drop", function(event) {
// Cancel default actions
event.preventDefault();
var files = event.dataTransfer.files,
for (var i = 0; i < files.length; i++) {
console.log("Filename: " + files[i].name);
console.log("Type: " + files[i].type);
console.log("Size: " + files[i].size + " bytes");
}
}, false);
</script>
Step 2:
We can now inspect the files and tease out headers and MIME types.
✘ Quick method
You can naïvely ask Blob for the MIME type of whatever file it represents using this pattern:
var blob = files[i]; // See step 1 above
console.log(blob.type);
For images, MIME types come back like the following:
image/jpeg
image/png
...
Caveat: The MIME type is detected from the file extension and can be fooled or spoofed. One can rename a .jpg to a .png and the MIME type will be be reported as image/png.
✓ Proper header-inspecting method
To get the bonafide MIME type of a client-side file we can go a step further and inspect the first few bytes of the given file to compare against so-called magic numbers. Be warned that it's not entirely straightforward because, for instance, JPEG has a few "magic numbers". This is because the format has evolved since 1991. You might get away with checking only the first two bytes, but I prefer checking at least 4 bytes to reduce false positives.
Example file signatures of JPEG (first 4 bytes):
FF D8 FF E0 (SOI + ADD0)
FF D8 FF E1 (SOI + ADD1)
FF D8 FF E2 (SOI + ADD2)
Here is the essential code to retrieve the file header:
var blob = files[i]; // See step 1 above
var fileReader = new FileReader();
fileReader.onloadend = function(e) {
var arr = (new Uint8Array(e.target.result)).subarray(0, 4);
var header = "";
for(var i = 0; i < arr.length; i++) {
header += arr[i].toString(16);
}
console.log(header);
// Check the file signature against known types
};
fileReader.readAsArrayBuffer(blob);
You can then determine the real MIME type like so (more file signatures here and here):
switch (header) {
case "89504e47":
type = "image/png";
break;
case "47494638":
type = "image/gif";
break;
case "ffd8ffe0":
case "ffd8ffe1":
case "ffd8ffe2":
case "ffd8ffe3":
case "ffd8ffe8":
type = "image/jpeg";
break;
default:
type = "unknown"; // Or you can use the blob.type as fallback
break;
}
Accept or reject file uploads as you like based on the MIME types expected.
Demo
Here is a working demo for local files and remote files (I had to bypass CORS just for this demo). Open the snippet, run it, and you should see three remote images of different types displayed. At the top you can select a local image or data file, and the file signature and/or MIME type will be displayed.
Notice that even if an image is renamed, its true MIME type can be determined. See below.
Screenshot
// Return the first few bytes of the file as a hex string
function getBLOBFileHeader(url, blob, callback) {
var fileReader = new FileReader();
fileReader.onloadend = function(e) {
var arr = (new Uint8Array(e.target.result)).subarray(0, 4);
var header = "";
for (var i = 0; i < arr.length; i++) {
header += arr[i].toString(16);
}
callback(url, header);
};
fileReader.readAsArrayBuffer(blob);
}
function getRemoteFileHeader(url, callback) {
var xhr = new XMLHttpRequest();
// Bypass CORS for this demo - naughty, Drakes
xhr.open('GET', '//cors-anywhere.herokuapp.com/' + url);
xhr.responseType = "blob";
xhr.onload = function() {
callback(url, xhr.response);
};
xhr.onerror = function() {
alert('A network error occurred!');
};
xhr.send();
}
function headerCallback(url, headerString) {
printHeaderInfo(url, headerString);
}
function remoteCallback(url, blob) {
printImage(blob);
getBLOBFileHeader(url, blob, headerCallback);
}
function printImage(blob) {
// Add this image to the document body for proof of GET success
var fr = new FileReader();
fr.onloadend = function() {
$("hr").after($("<img>").attr("src", fr.result))
.after($("<div>").text("Blob MIME type: " + blob.type));
};
fr.readAsDataURL(blob);
}
// Add more from http://en.wikipedia.org/wiki/List_of_file_signatures
function mimeType(headerString) {
switch (headerString) {
case "89504e47":
type = "image/png";
break;
case "47494638":
type = "image/gif";
break;
case "ffd8ffe0":
case "ffd8ffe1":
case "ffd8ffe2":
type = "image/jpeg";
break;
default:
type = "unknown";
break;
}
return type;
}
function printHeaderInfo(url, headerString) {
$("hr").after($("<div>").text("Real MIME type: " + mimeType(headerString)))
.after($("<div>").text("File header: 0x" + headerString))
.after($("<div>").text(url));
}
/* Demo driver code */
var imageURLsArray = ["http://media2.giphy.com/media/8KrhxtEsrdhD2/giphy.gif", "http://upload.wikimedia.org/wikipedia/commons/e/e9/Felis_silvestris_silvestris_small_gradual_decrease_of_quality.png", "http://static.giantbomb.com/uploads/scale_small/0/316/520157-apple_logo_dec07.jpg"];
// Check for FileReader support
if (window.FileReader && window.Blob) {
// Load all the remote images from the urls array
for (var i = 0; i < imageURLsArray.length; i++) {
getRemoteFileHeader(imageURLsArray[i], remoteCallback);
}
/* Handle local files */
$("input").on('change', function(event) {
var file = event.target.files[0];
if (file.size >= 2 * 1024 * 1024) {
alert("File size must be at most 2MB");
return;
}
remoteCallback(escape(file.name), file);
});
} else {
// File and Blob are not supported
$("hr").after( $("<div>").text("It seems your browser doesn't support FileReader") );
} /* Drakes, 2015 */
img {
max-height: 200px
}
div {
height: 26px;
font: Arial;
font-size: 12pt
}
form {
height: 40px;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<form>
<input type="file" />
<div>Choose an image to see its file signature.</div>
</form>
<hr/>
As stated in other answers, you can check the mime type by checking the signature of the file in the first bytes of the file.
But what other answers are doing is loading the entire file in memory in order to check the signature, which is very wasteful and could easily freeze your browser if you select a big file by accident or not.
/**
* Load the mime type based on the signature of the first bytes of the file
* #param {File} file A instance of File
* #param {Function} callback Callback with the result
* #author Victor www.vitim.us
* #date 2017-03-23
*/
function loadMime(file, callback) {
//List of known mimes
var mimes = [
{
mime: 'image/jpeg',
pattern: [0xFF, 0xD8, 0xFF],
mask: [0xFF, 0xFF, 0xFF],
},
{
mime: 'image/png',
pattern: [0x89, 0x50, 0x4E, 0x47],
mask: [0xFF, 0xFF, 0xFF, 0xFF],
}
// you can expand this list #see https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern
];
function check(bytes, mime) {
for (var i = 0, l = mime.mask.length; i < l; ++i) {
if ((bytes[i] & mime.mask[i]) - mime.pattern[i] !== 0) {
return false;
}
}
return true;
}
var blob = file.slice(0, 4); //read the first 4 bytes of the file
var reader = new FileReader();
reader.onloadend = function(e) {
if (e.target.readyState === FileReader.DONE) {
var bytes = new Uint8Array(e.target.result);
for (var i=0, l = mimes.length; i<l; ++i) {
if (check(bytes, mimes[i])) return callback("Mime: " + mimes[i].mime + " <br> Browser:" + file.type);
}
return callback("Mime: unknown <br> Browser:" + file.type);
}
};
reader.readAsArrayBuffer(blob);
}
//when selecting a file on the input
fileInput.onchange = function() {
loadMime(fileInput.files[0], function(mime) {
//print the output to the screen
output.innerHTML = mime;
});
};
<input type="file" id="fileInput">
<div id="output"></div>
For anyone who's looking to not implement this themselves, Sindresorhus has created a utility that works in the browser and has the header-to-mime mappings for most documents you could want.
https://github.com/sindresorhus/file-type
You could combine Vitim.us's suggestion of only reading in the first X bytes to avoid loading everything into memory with using this utility (example in es6):
import fileType from 'file-type'; // or wherever you load the dependency
const blob = file.slice(0, fileType.minimumBytes);
const reader = new FileReader();
reader.onloadend = function(e) {
if (e.target.readyState !== FileReader.DONE) {
return;
}
const bytes = new Uint8Array(e.target.result);
const { ext, mime } = fileType.fromBuffer(bytes);
// ext is the desired extension and mime is the mimetype
};
reader.readAsArrayBuffer(blob);
Here is a Typescript implementation that supports webp.
This is based on the JavaScript answer by Vitim.us.
interface Mime {
mime: string;
pattern: (number | undefined)[];
}
// tslint:disable number-literal-format
// tslint:disable no-magic-numbers
const imageMimes: Mime[] = [
{
mime: 'image/png',
pattern: [0x89, 0x50, 0x4e, 0x47]
},
{
mime: 'image/jpeg',
pattern: [0xff, 0xd8, 0xff]
},
{
mime: 'image/gif',
pattern: [0x47, 0x49, 0x46, 0x38]
},
{
mime: 'image/webp',
pattern: [0x52, 0x49, 0x46, 0x46, undefined, undefined, undefined, undefined, 0x57, 0x45, 0x42, 0x50, 0x56, 0x50],
}
// You can expand this list #see https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern
];
// tslint:enable no-magic-numbers
// tslint:enable number-literal-format
function isMime(bytes: Uint8Array, mime: Mime): boolean {
return mime.pattern.every((p, i) => !p || bytes[i] === p);
}
function validateImageMimeType(file: File, callback: (b: boolean) => void) {
const numBytesNeeded = Math.max(...imageMimes.map(m => m.pattern.length));
const blob = file.slice(0, numBytesNeeded); // Read the needed bytes of the file
const fileReader = new FileReader();
fileReader.onloadend = e => {
if (!e || !fileReader.result) return;
const bytes = new Uint8Array(fileReader.result as ArrayBuffer);
const valid = imageMimes.some(mime => isMime(bytes, mime));
callback(valid);
};
fileReader.readAsArrayBuffer(blob);
}
// When selecting a file on the input
fileInput.onchange = () => {
const file = fileInput.files && fileInput.files[0];
if (!file) return;
validateImageMimeType(file, valid => {
if (!valid) {
alert('Not a valid image file.');
}
});
};
<input type="file" id="fileInput">
If you just want to check if the file uploaded is an image you can just try to load it into <img> tag an check for any error callback.
Example:
var input = document.getElementsByTagName('input')[0];
var reader = new FileReader();
reader.onload = function (e) {
imageExists(e.target.result, function(exists){
if (exists) {
// Do something with the image file..
} else {
// different file format
}
});
};
reader.readAsDataURL(input.files[0]);
function imageExists(url, callback) {
var img = new Image();
img.onload = function() { callback(true); };
img.onerror = function() { callback(false); };
img.src = url;
}
This is what you have to do
var fileVariable =document.getElementsById('fileId').files[0];
If you want to check for image file types then
if(fileVariable.type.match('image.*'))
{
alert('its an image');
}
As Drake states this could be done with FileReader. However, what I present here is a functional version. Take in consideration that the big problem with doing this with JavaScript is to reset the input file. Well, this restricts to only JPG (for other formats you will have to change the mime type and the magic number):
<form id="form-id">
<input type="file" id="input-id" accept="image/jpeg"/>
</form>
<script type="text/javascript">
$(function(){
$("#input-id").on('change', function(event) {
var file = event.target.files[0];
if(file.size>=2*1024*1024) {
alert("JPG images of maximum 2MB");
$("#form-id").get(0).reset(); //the tricky part is to "empty" the input file here I reset the form.
return;
}
if(!file.type.match('image/jp.*')) {
alert("only JPG images");
$("#form-id").get(0).reset(); //the tricky part is to "empty" the input file here I reset the form.
return;
}
var fileReader = new FileReader();
fileReader.onload = function(e) {
var int32View = new Uint8Array(e.target.result);
//verify the magic number
// for JPG is 0xFF 0xD8 0xFF 0xE0 (see https://en.wikipedia.org/wiki/List_of_file_signatures)
if(int32View.length>4 && int32View[0]==0xFF && int32View[1]==0xD8 && int32View[2]==0xFF && int32View[3]==0xE0) {
alert("ok!");
} else {
alert("only valid JPG images");
$("#form-id").get(0).reset(); //the tricky part is to "empty" the input file here I reset the form.
return;
}
};
fileReader.readAsArrayBuffer(file);
});
});
</script>
Take in consideration that this was tested on latest versions of Firefox and Chrome, and on IExplore 10.
For a complete list of mime types see Wikipedia.
For a complete list of magic number see Wikipedia.
I needed to check for a few more file types.
Following up the excellent answer given by Drakes, I came up with the below code after I found this website with a very extensive table of file types and their headers. Both in Hex and String.
I also needed an asynchronous function to deal with many files and other problems related to the project I'm working that does not matter here.
Here is the code in vanilla javascript.
// getFileMimeType
// #param {Object} the file object created by the input[type=file] DOM element.
// #return {Object} a Promise that resolves with the MIME type as argument or undefined
// if no MIME type matches were found.
const getFileMimeType = file => {
// Making the function async.
return new Promise(resolve => {
let fileReader = new FileReader();
fileReader.onloadend = event => {
const byteArray = new Uint8Array(event.target.result);
// Checking if it's JPEG. For JPEG we need to check the first 2 bytes.
// We can check further if more specific type is needed.
if(byteArray[0] == 255 && byteArray[1] == 216){
resolve('image/jpeg');
return;
}
// If it's not JPEG we can check for signature strings directly.
// This is only the case when the bytes have a readable character.
const td = new TextDecoder("utf-8");
const headerString = td.decode(byteArray);
// Array to be iterated [<string signature>, <MIME type>]
const mimeTypes = [
// Images
['PNG', 'image/png'],
// Audio
['ID3', 'audio/mpeg'],// MP3
// Video
['ftypmp4', 'video/mp4'],// MP4
['ftypisom', 'video/mp4'],// MP4
// HTML
['<!DOCTYPE html>', 'text/html'],
// PDF
['%PDF', 'application/pdf']
// Add the needed files for your case.
];
// Iterate over the required types.
for(let i = 0;i < mimeTypes.length;i++){
// If a type matches we return the MIME type
if(headerString.indexOf(mimeTypes[i][0]) > -1){
resolve(mimeTypes[i][1]);
return;
}
}
// If not is found we resolve with a blank argument
resolve();
}
// Slice enough bytes to get readable strings.
// I chose 32 arbitrarily. Note that some headers are offset by
// a number of bytes.
fileReader.readAsArrayBuffer(file.slice(0,32));
});
};
// The input[type=file] DOM element.
const fileField = document.querySelector('#file-upload');
// Event to detect when the user added files.
fileField.onchange = event => {
// We iterate over each file and log the file name and it's MIME type.
// This iteration is asynchronous.
Array.from(fileField.files, async file => {
console.log(file.name, await getFileMimeType(file));
});
};
Notice that in the getFileMimeType function you can employ 2 approaches to find the correct MIME type.
Search the bytes directly.
Search for Strings after converting the bytes to string.
I used the first approach with JPEG because what makes it identifiable are the first 2 bytes and those bytes are not readable string characters.
With the rest of the file types I could check for readable string character signatures. For example:
[video/mp4] -> 'ftypmp4' or 'ftypisom'
If you need to support a file that is not on the Gary Kessler's list, you can console.log() the bytes or converted string to find a proper signature for the obscure file you need to support.
Note1: The Gary Kessler's list has been updated and the mp4 signatures are different now, you should check it when implementing this.
Note2: the Array.from is designed to use a .map like function as it second argument.
Here's a minimal typescript/promise util for the browser;
export const getFileHeader = (file: File): Promise<string> => {
return new Promise(resolve => {
const headerBytes = file.slice(0, 4); // Read the first 4 bytes of the file
const fileReader = new FileReader();
fileReader.onloadend = (e: ProgressEvent<FileReader>) => {
const arr = new Uint8Array(e?.target?.result as ArrayBufferLike).subarray(
0,
4,
);
let header = '';
for (let i = 0; i < arr.length; i++) {
header += arr[i].toString(16);
}
resolve(header);
};
fileReader.readAsArrayBuffer(headerBytes);
});
};
Use like so in your validation (I needed a PDF check);
// https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern
const pdfBytePattern = "25504446"
const fileHeader = await getFileHeader(file)
const isPdf = fileHeader === pdfBytePattern // => true
Here is an extension of Roberto14's answer that does the following:
THIS WILL ONLY ALLOW IMAGES
Checks if FileReader is available and falls back to extension checking if it is not available.
Gives an error alert if not an image
If it is an image it loads a preview
** You should still do server side validation, this is more a convenience for the end user than anything else. But it is handy!
<form id="myform">
<input type="file" id="myimage" onchange="readURL(this)" />
<img id="preview" src="#" alt="Image Preview" />
</form>
<script>
function readURL(input) {
if (window.FileReader && window.Blob) {
if (input.files && input.files[0]) {
var reader = new FileReader();
reader.onload = function (e) {
var img = new Image();
img.onload = function() {
var preview = document.getElementById('preview');
preview.src = e.target.result;
};
img.onerror = function() {
alert('error');
input.value = '';
};
img.src = e.target.result;
}
reader.readAsDataURL(input.files[0]);
}
}
else {
var ext = input.value.split('.');
ext = ext[ext.length-1].toLowerCase();
var arrayExtensions = ['jpg' , 'jpeg', 'png', 'bmp', 'gif'];
if (arrayExtensions.lastIndexOf(ext) == -1) {
alert('error');
input.value = '';
}
else {
var preview = document.getElementById('preview');
preview.setAttribute('alt', 'Browser does not support preview.');
}
}
}
</script>
For Png files you can do even more checking than just checking for some magic header bytes, as Png files have a particular file format that you can check.
TLDR: there are a series of chunks that must be in a specific order, and each chunk has a crc error correction code that you can check if it is valid.
https://en.wikipedia.org/wiki/Portable_Network_Graphics#File_format
I have made a little library that checks that the chunk layout is correct, and it checks that the crc code for each chunk is valid. Ready to consume as a npm package here:
https://www.npmjs.com/package/png-validator
Short answer is no.
As you note the browsers derive type from the file extension. Mac preview also seems to run off the extension. I'm assuming its because its faster reading the file name contained in the pointer, rather than looking up and reading the file on disk.
I made a copy of a jpg renamed with png.
I was able to consistently get the following from both images in chrome (should work in modern browsers).
ÿØÿàJFIFÿþ;CREATOR: gd-jpeg v1.0 (using IJG JPEG v62), quality = 90
Which you could hack out a String.indexOf('jpeg') check for image type.
Here is a fiddle to explore http://jsfiddle.net/bamboo/jkZ2v/1/
The ambigious line I forgot to comment in the example
console.log( /^(.*)$/m.exec(window.atob( image.src.split(',')[1] )) );
Splits the base64 encoded img data, leaving on the image
Base64 decodes the image
Matches only the first line of the image data
The fiddle code uses base64 decode which wont work in IE9, I did find a nice example using VB script that works in IE http://blog.nihilogic.dk/2008/08/imageinfo-reading-image-metadata-with.html
The code to load the image was taken from Joel Vardy, who is doing some cool image canvas resizing client side before uploading which may be of interest https://joelvardy.com/writing/javascript-image-upload

Correct MD5 hash in javascript for all filetypes

Using the library here: https://github.com/blueimp/JavaScript-MD5
I am attempting to correctly hash files in MD5 using javascript.
So far I get correct hashes for text files but if I attempt to hash an image file I get an incorrect hash.
This could be due to how the javascript FileReader reads the larger image files. I have tried readAsBinaryString(), readAsArrayBuffer() and readAsText() none of which provide the correct hash with the given library.
How should I be reading the file for this to provide a correct hash for all filetypes, is there a more appropriate library that works for all filetypes I should be using?
HTML:
<input id="file-to-hash" type=file>
<button onclick="hashFile()">Hash</button>
Javascript:
function hashFile() {
var file = document.getElementById('file-to-hash').files[0];
var reader = new FileReader();
reader.readAsArrayBuffer(file);
reader.onload = readSuccess;
}
function readSuccess(evt){
fileContents = evt.target.result;
var hash = md5(fileContents);
}
There is now the SubtleCrypto API and its subtle.digest method.
You won't be able to get an MD5 hash from this API, because MD5 is not considered secure anymore.
But you'll be able to get an hash with other (more-secure) algorithms, such as SHA.
function getHash(buffer, algo = "SHA-256") {
return crypto.subtle.digest(algo, buffer)
.then(hash => {
// here hash is an arrayBuffer, so we'll convert it to its hex version
let result = '';
const view = new DataView(hash);
for (let i = 0; i < hash.byteLength; i += 4) {
result += ('00000000' + view.getUint32(i).toString(16)).slice(-8);
}
return result;
});
}
f.onchange = e => {
const fR = new FileReader();
fR.onload = e => getHash(fR.result)
.then(hash => console.log(hash))
// Chrome only accept it from an secure origin
.catch(e => {
if (e.code === 9) {
console.log(`Be sure to be on the https page :
https://stackoverflow.com/questions/44036218/`)
} else {
console.log(e.message)
}
})
fR.readAsArrayBuffer(f.files[0]);
}
<input type="file" id="f">

Calculate SHA-1 checksum of local html5 video file using JavaScript

When a video on my local storage—let's say it's currently located at file:///home/user/video.m4v—is opened by dragging it into a new tab in Chrome, how can I calculate the SHA-1 checksum for the file using JavaScript?
Purpose:
I am planning to write a Chrome extension which will store the calculated checksum of videos (files with extensions matching a pattern) as localStorage objects in order to save the playback position of video upon tab close and then restore it when the file is loaded again, even if the location or filename of the video is changed.
You need a crypto library for this. A well known one is Google CryptoJS.
I found this as an specific example for your task: https://gist.github.com/npcode/11282867
After including the crypto-js source:
function sha1sum() {
var oFile = document.getElementById('uploadFile').files[0];
var sha1 = CryptoJS.algo.SHA1.create();
var read = 0;
var unit = 1024 * 1024;
var blob;
var reader = new FileReader();
reader.readAsArrayBuffer(oFile.slice(read, read + unit));
reader.onload = function(e) {
var bytes = CryptoJS.lib.WordArray.create(e.target.result);
sha1.update(bytes);
read += unit;
if (read < oFile.size) {
blob = oFile.slice(read, read + unit);
reader.readAsArrayBuffer(blob);
} else {
var hash = sha1.finalize();
console.log(hash.toString(CryptoJS.enc.Hex)); // print the result
}
}
}
I wouldn't recommend to calculate a hash over the whole video file as it can be pretty resource consuming depending on the file size. Maybe you can use just the meta information or reconsider about the filename and filepath again?
Web APIs have progressed considerably since I asked this question. Calculating a hex digest is now possible using the built-in SubtleCrypto.digest().
TS Playground link
function u8ToHex (u8: number): string {
return u8.toString(16).padStart(2, '0');
}
/** Ref: https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest#supported_algorithms */
const supportedAlgorithms = [
'SHA-1',
'SHA-256',
'SHA-384',
'SHA-512',
] as const;
type SupportedAlgorithm = typeof supportedAlgorithms[number];
type Message = string | Blob | BufferSource;
async function hexDigest (
algorithm: SupportedAlgorithm,
message: Message,
): Promise<string> {
let buf: BufferSource;
if (typeof message === 'string') buf = new TextEncoder().encode(message);
else if (message instanceof Blob) buf = await message.arrayBuffer();
else buf = message;
const hash = await crypto.subtle.digest(algorithm, buf);
return [...new Uint8Array(hash)].map(u8ToHex).join('');
}

resize image before upload via background transfer in winjs

I would like to resize an image picked from the gallery of the phone before uploading it via background transfer so far I have:-
filePicker.pickSingleFileAsync().then(function (file) {
uploadSingleFileAsync(uri, file);
}).done(null, displayException);
function uploadSingleFileAsync(uri, file) {
if (!file) {
displayError("Error: No file selected.");
return;
}
return file.getBasicPropertiesAsync().then(function (properties) {
if (properties.size > maxUploadFileSize) {
displayError("Selected file exceeds max. upload file size (" + (maxUploadFileSize / (1024 * 1024)) +
" MB).");
return;
}
var upload = new UploadOperation();
//tried this to compress the file but it doesnt work obviously not right for the object
//file = file.slice(0, Math.round(file.size / 2));
upload.start(uri, file);
// Persist the upload operation in the global array.
uploadOperations.push(upload);
});
}
and the rest then uploads the file. I tried adding in .slice but it doesn't work (im guessing because file is an object rather than) and i'm not sure how to compress this type of file object. I can't seem to find any examples or advice on msdn or the windows dev forums, I can obviously resize the photos once they are on the server but I would rather users are not waiting longer than they have to for their files to upload.
Do I need to save the image before I can manipulate it? Any advice would be greatly appreciated!
** EDIT *
my upload singlefileasync now looks like:-
function uploadSingleFileAsync(uri, file) {
if (!file) {
displayError("Error: No file selected.");
return;
}
return file.getBasicPropertiesAsync().then(function (properties) {
if (properties.size > maxUploadFileSize) {
displayError("Selected file exceeds max. upload file size (" + (maxUploadFileSize / (1024 * 1024)) +
" MB).");
return;
}
// Exception number constants. These constants are defined using values from winerror.h,
// and are compared against error.number in the exception handlers in this scenario.
// This file format does not support the requested operation; for example, metadata or thumbnails.
var WINCODEC_ERR_UNSUPPORTEDOPERATION = Helpers.convertHResultToNumber(0x88982F81);
// This file format does not support the requested property/metadata query.
var WINCODEC_ERR_PROPERTYNOTSUPPORTED = Helpers.convertHResultToNumber(0x88982F41);
// There is no codec or component that can handle the requested operation; for example, encoding.
var WINCODEC_ERR_COMPONENTNOTFOUND = Helpers.convertHResultToNumber(0x88982F50);
// Keep objects in-scope across the lifetime of the scenario.
var FileToken = "";
var DisplayWidthNonScaled = 0;
var DisplayHeightNonScaled = 0;
var ScaleFactor = 0;
var UserRotation = 0;
var ExifOrientation = 0;
var DisableExifOrientation = false;
// Namespace and API aliases
var FutureAccess = Windows.Storage.AccessCache.StorageApplicationPermissions.futureAccessList;
var LocalSettings = Windows.Storage.ApplicationData.current.localSettings.values;
//FileToken = FutureAccess.add(file);
FileToken = Windows.Storage.AccessCache.StorageApplicationPermissions.futureAccessList.add(file);
id("myImage").src = window.URL.createObjectURL(file, { oneTimeOnly: true });
id("myImage").alt = file.name;
// Use BitmapDecoder to attempt to read EXIF orientation and image dimensions.
return loadSaveFileAsync(file)
function resetPersistedState() {
LocalSettings.remove("scenario2FileToken");
LocalSettings.remove("scenario2Scale");
LocalSettings.remove("scenario2Rotation");
}
function resetSessionState() {
// Variables width and height reflect rotation but not the scale factor.
FileToken = "";
DisplayWidthNonScaled = 0;
DisplayHeightNonScaled = 0;
ScaleFactor = 1;
UserRotation = Windows.Storage.FileProperties.PhotoOrientation.normal;
ExifOrientation = Windows.Storage.FileProperties.PhotoOrientation.normal;
DisableExifOrientation = false;
}
function loadSaveFileAsync(file) {
// Keep data in-scope across multiple asynchronous methods.
var inputStream;
var outputStream;
var encoderId;
var pixels;
var pixelFormat;
var alphaMode;
var dpiX;
var dpiY;
var outputFilename;
var ScaleFactor = 0.5;
new WinJS.Promise(function (comp, err, prog) { comp(); }).then(function () {
// On Windows Phone, this call must be done within a WinJS Promise to correctly
// handle exceptions, for example if the file is read-only.
return FutureAccess.getFileAsync(FileToken);
}).then(function (inputFile) {
return inputFile.openAsync(Windows.Storage.FileAccessMode.read);
}).then(function (stream) {
inputStream = stream;
return Windows.Graphics.Imaging.BitmapDecoder.createAsync(inputStream);
}).then(function (decoder) {
var transform = new Windows.Graphics.Imaging.BitmapTransform();
// Scaling occurs before flip/rotation, therefore use the original dimensions
// (no orientation applied) as parameters for scaling.
// Dimensions are rounded down by BitmapEncoder to the nearest integer.
transform.scaledHeight = decoder.pixelHeight * ScaleFactor;
transform.scaledWidth = decoder.pixelWidth * ScaleFactor;
transform.rotation = Helpers.convertToBitmapRotation(UserRotation);
// Fant is a relatively high quality interpolation mode.
transform.interpolationMode = Windows.Graphics.Imaging.BitmapInterpolationMode.fant;
// The BitmapDecoder indicates what pixel format and alpha mode best match the
// natively stored image data. This can provide a performance and/or quality gain.
pixelFormat = decoder.bitmapPixelFormat;
alphaMode = decoder.bitmapAlphaMode;
dpiX = decoder.dpiX;
dpiY = decoder.dpiY;
// Get pixel data from the decoder. We apply the user-requested transforms on the
// decoded pixels to take advantage of potential optimizations in the decoder.
return decoder.getPixelDataAsync(
pixelFormat,
alphaMode,
transform,
Windows.Graphics.Imaging.ExifOrientationMode.respectExifOrientation,
Windows.Graphics.Imaging.ColorManagementMode.colorManageToSRgb
);
}).then(function (pixelProvider) {
pixels = pixelProvider.detachPixelData();
// The destination file was passed as an argument to loadSaveFileAsync().
outputFilename = file.name;
switch (file.fileType) {
case ".jpg":
encoderId = Windows.Graphics.Imaging.BitmapEncoder.jpegEncoderId;
break;
case ".bmp":
encoderId = Windows.Graphics.Imaging.BitmapEncoder.bmpEncoderId;
break;
case ".png":
default:
encoderId = Windows.Graphics.Imaging.BitmapEncoder.pngEncoderId;
break;
}
return file.openAsync(Windows.Storage.FileAccessMode.readWrite);
}).then(function (stream) {
outputStream = stream;
// BitmapEncoder expects an empty output stream; the user may have selected a
// pre-existing file.
outputStream.size = 0;
return Windows.Graphics.Imaging.BitmapEncoder.createAsync(encoderId, outputStream);
}).then(function (encoder) {
// Write the pixel data onto the encoder. Note that we can't simply use the
// BitmapTransform.ScaledWidth and ScaledHeight members as the user may have
// requested a rotation (which is applied after scaling).
encoder.setPixelData(
pixelFormat,
alphaMode,
DisplayWidthNonScaled * ScaleFactor,
DisplayHeightNonScaled * ScaleFactor,
dpiX,
dpiY,
pixels
);
return encoder.flushAsync();
}).then(function () {
WinJS.log && WinJS.log("Successfully saved a copy: " + outputFilename, "sample", "status");
}, function (error) {
WinJS.log && WinJS.log("Failed to update file: " + error.message, "sample", "error");
resetSessionState();
resetPersistedState();
}).then(function () {
// Finally, close each stream to release any locks.
inputStream && inputStream.close();
outputStream && outputStream.close();
}).then(function () {
var upload = new UploadOperation();
upload.start(uri, file);
// Persist the upload operation in the global array.
uploadOperations.push(upload);
});
}
But I am getting an error when I reach this line return
file.openAsync(Windows.Storage.FileAccessMode.readWrite);
saying that I do not have write access? How do I get write access or move it so that I can have write access?
To resize an image you can use the image encoding APIs in WinRT, namely that in Windows.Graphics.Imaging. I suggest you look at scenario 2 of the Simple Imaging Sample (http://code.msdn.microsoft.com/windowsapps/Simple-Imaging-Sample-a2dec2b0) which shows how to do all manners of transforms on an image. Changing the dimensions is included there, so it'll just be a matter of chopping out the parts you don't need.
I have a discussion about all this in my free ebook, Programming Windows Store Apps with HTML, CSS, and JavaScript, 2nd Edition, in Chapter 13, section "Image Manipulation and Encoding". In there I try to separate out the main steps in the process into something a little more digestible, and provide an additional sample.
The process of encoding can look rather involved (lots of chained promises), but it's quite straightforward and is exactly what an email program would do to reduce the size of attached images, for instance. In any case, you should end up with another StorageFile with a smaller image that you can then pass to the uploader. I would recommend using your Temporary app data for such files, and be sure to clean them up when the upload is complete.

Categories