We often need to store files in our databases, but mongoDB doesn’t let you store any file larger than 16 Mb in a normal document. Instead, MongoDB has a functionality specifically for storing large files and it goes by the name GridFS.
GridFS divides the file into chunks and then stores them in the database.
GridFS stores files in buckets, which is a group of MongoDB collections consisting of file chunks and file information. GridFS has these collections:
Files - (this stores file metadata)
Chunks - (this stores binary file chunks)
When we create a GridFS bucket, it automatically creates the collection files and chunks within the bucket. The GridFS bucket is by default named fs.
If your file system has limits on the number of files.
When you need to access a portion of a large file without loading the whole file.
When you want to store and sync files and metadata across distributed systems.
In this article we will discuss
Creating GridFS bucket
Uploading files
Retrieving file information
Downloading files
Renaming Files
Deleting a file
Deleting a bucket
In this article, we will be utilizing the cloud version of MongoDB, also known as Atlas.
Create your node.js project and get MongoDB Atlas URI to connect to your nodejs client.
You can refer to this article on How to get MongoDB Atlas URI.
Create a node project and initialize npm in it,
1 2 3
mkdir gridfs - tutorial cd gridfs - tutorial npm init - y
Install npm modules needed for our app:
Express (framework to design API)
Mongoose (ORM for MongoDB)
Multer (module to ease file upload)
Multer-gridfs-storage (module to implement gridfs with multer)
npm i -S express mongoose dotenv multer multer-gridfs-storage
Install nodemon as dev-dependency
npm i --save-dev nodemon
Create app.js file and write down the code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
const express = require("express");
const app = express();
const mongoose = require("mongoose");
const multer = require("multer");
const {
GridFsStorage
} = require("multer-gridfs-storage");
require("dotenv")
.config();
const mongouri = 'mongodb+srv://User1:' + process.env.MONGO_PASS + '@cluster0.wakey.mongodb.net/myFirstDatabase?retryWrites=true&w=majority';
try {
mongoose.connect(mongouri, {
useUnifiedTopology: true,
useNewUrlParser: true
});
} catch (error) {
handleError(error);
}
process.on('unhandledRejection', error => {
console.log('unhandledRejection', error.message);
});
//creating bucket
let bucket;
mongoose.connection.on("connected", () => {
var db = mongoose.connections[0].db;
bucket = new mongoose.mongo.GridFSBucket(db, {
bucketName: "newBucket"
});
console.log(bucket);
});
//to parse json content
app.use(express.json());
//to parse body from url
app.use(express.urlencoded({
extended: false
}));
app.listen(process.env.PORT, function () {
console.log(`Application live on localhost:{process.env.PORT}`);
});
Here we are setting up the MongoDB server with Mongoose, then we are creating the bucket when we acquire connection with MongoDB Atlas, with function.
1
2
3
4
5
6
7
8
let bucket;
mongoose.connection.on("connected", () => {
var db = mongoose.connections[0].db;
bucket = new mongoose.mongo.GridFSBucket(db, {
bucketName: "newBucket"
});
console.log(bucket);
});
We have also passed the variable bucketName with value newBucket, hence our bucket will be created with the name newBucket. Note: if you don’t pass the bucketName then MongoDB will give it the default name of fs.
Let’s set up the multer-gridfs module, link it to our GridFS, and then use it in endpoint /upload to upload files to gridfs.
Add endpoint code to app.js as follows:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
const express = require("express");
const app = express();
const mongoose = require("mongoose");
const multer = require("multer");
const {
GridFsStorage
} = require("multer-gridfs-storage");
var crypto = require('crypto');
var path = require('path');
require("dotenv")
.config();
const mongouri = 'mongodb+srv://User1:' + process.env.MONGO_PASS + '@cluster0.wakey.mongodb.net/myFirstDatabase?retryWrites=true&w=majority';
try {
mongoose.connect(mongouri, {
useUnifiedTopology: true,
useNewUrlParser: true
});
} catch (error) {
handleError(error);
}
process.on('unhandledRejection', error => {
console.log('unhandledRejection', error.message);
});
//creating bucket
let bucket;
mongoose.connection.on("connected", () => {
var client = mongoose.connections[0].client;
var db = mongoose.connections[0].db;
bucket = new mongoose.mongo.GridFSBucket(db, {
bucketName: "newBucket"
});
console.log(bucket);
});
app.use(express.json());
app.use(express.urlencoded({
extended: false
}));
const storage = new GridFsStorage({
url: mongouri,
file: (req, file) => {
return new Promise((resolve, reject) => {
const filename = file.originalname;
const fileInfo = {
filename: filename,
bucketName: "newBucket"
};
resolve(fileInfo);
});
}
});
const upload = multer({
storage
});
app.post("/upload", upload.single("file"), (req, res) => {
res.status(200)
.send("File uploaded successfully");
});
const PORT = process.env.PORT || 8080;
app.listen(PORT, () => {
console.log(`Application live on localhost:{process.env.PORT}`);
});
Here we are using the GridFsStorage function in the multer-gridfs-storage module to connect to GridFS storage. We are then assigning that storage to variable upload, then using that upload as middleware in our endpoint /upload which we will use to listen to post requests consisting of files to be uploaded.
Let’s test this endpoint in Postman.
Fire up Postman and create a POST request to /upload with a file.
You should receive output to the above request as:
Now let’s create an endpoint for retrieving file information and download file.
Write code in app.js as follows:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
const express = require("express");
const app = express();
const mongoose = require("mongoose");
const multer = require("multer");
const {
GridFsStorage
} = require("multer-gridfs-storage");
require("dotenv")
.config();
const mongouri = 'mongodb+srv://User1:' + process.env.MONGO_PASS + '@cluster0.wakey.mongodb.net/myFirstDatabase?retryWrites=true&w=majority';
try {
mongoose.connect(mongouri, {
useUnifiedTopology: true,
useNewUrlParser: true
});
} catch (error) {
handleError(error);
}
process.on('unhandledRejection', error => {
console.log('unhandledRejection', error.message);
});
//creating bucket
let bucket;
mongoose.connection.on("connected", () => {
var client = mongoose.connections[0].client;
var db = mongoose.connections[0].db;
bucket = new mongoose.mongo.GridFSBucket(db, {
bucketName: "newBucket"
});
console.log(bucket);
});
app.use(express.json());
app.use(express.urlencoded({
extended: false
}));
const storage = new GridFsStorage({
url: mongouri,
file: (req, file) => {
return new Promise((resolve, reject) => {
const filename = file.originalname;
const fileInfo = {
filename: filename,
bucketName: "newBucket"
};
resolve(fileInfo);
});
}
});
const upload = multer({
storage
});
app.get("/fileinfo/:filename", (req, res) => {
const file = bucket
.find({
filename: req.params.filename
})
.toArray((err, files) => {
if (!files || files.length === 0) {
return res.status(404)
.json({
err: "no files exist"
});
}
bucket.openDownloadStreamByName(req.params.filename)
.pipe(res);
});
});
app.post("/upload", upload.single("file"), (req, res) => {
res.status(200)
.send("File uploaded successfully");
});
const PORT = process.env.PORT || 8080;
app.listen(PORT, () => {
console.log(`Application live on localhost:{process.env.PORT}`);
});
Here we have created the endpoint /fileinfo/:filename to search for a file and download it if it exists in the database. We are passing filename as a parameter to the path and then we are using bucket.find to find if the file exists in the database. If it exists, we create a download stream and pipeline it to respond; otherwise we respond with an error.
Let’s test this endpoint by acquiring info about the file we stored in the previous step, by creating a GET request in Postman as:
And you must get output as follows:
Which shows the downloaded pdf.
Similarly, renaming a file can be done by calling:
bucket.rename(ObjectId("616ef2f1062f0a454f0379b6"), "newFileName");
Deleting a file can be done similarly by:
bucket.delete(ObjectId("616ef2f1062f0a454f0379b6"));
And we can also drop a bucket by running:
bucket.drop()
That’s how you can implement GridFS in your node/express application.
Even though GridFS is a great tool, it has a few limitations.
Processes a single file at a time.
Serving files along a dataset can affect the performance of the database. It’s better to use a separate MongoDB server for storing files.
GridFS does not provide a way to do an automatic update of a file. So, you have to implement versioning and manage it yourself.