Saturday, April 27, 2013
NodeJS - simulating a multi-file upload request
Story:
I was building a NodeJS client application that connects to a backend Rest API. One of the functionalities requires uploading multiple files to the server.
Concept:
We will need to upload the files via a POST request to the server. The request will look like the following:
POST /cgi-bin/qtest HTTP/1.1
Host: aram
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: en-us,en;q=0.5
Accept-Encoding: gzip,deflate
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
Keep-Alive: 300
Connection: keep-alive
Referer: http://aram/~martind/banner.htm
Content-Type: multipart/form-data; boundary=---------------------------287032381131322
Content-Length: 582
-----------------------------287032381131322
Content-Disposition: form-data; name="datafile1"; filename="r.gif"
Content-Type: image/gif
GIF87a.............,...........D..;
-----------------------------287032381131322
Content-Disposition: form-data; name="datafile2"; filename="g.gif"
Content-Type: image/gif
GIF87a.............,...........D..;
-----------------------------287032381131322
Content-Disposition: form-data; name="datafile3"; filename="b.gif"
Content-Type: image/gif
GIF87a.............,...........D..;
-----------------------------287032381131322--
I took this above example from What should a Multipart HTTP request with multiple files look like?
The number 287032381131322 is a random number.
-----------------------------287032381131322 is a boundary that separates individual data fields.
Notice that there's always a "\r\n" appending the -----------------------------287032381131322.
The last -----------------------------287032381131322 will contain an extra "--" at the end.
Implementation:
We will create a function to generate the above request. The files that will be uploaded are called thumbnail, bigThumbnail, and photo.
I will be using the NodeJS library "Sequence" to make the code to follow a sequential order. Don't worry about it if you don't understand it.
Here's the code:
FileProvider.prototype.save = function(req, callback) {
var files = req.files;
var title = req.body.title;
if(files.length == 0) {
console.log("nothing uploaded");
return callback(500, null);
}
var thumbnail = files.thumbnail;
var bigThumbnail = files.bigThumbnail;
var photo = files.photo;
var boundary = Math.random();
var post_data = [];
post_data.push(new Buffer(httpRequest.encodeFieldPart(boundary, 'title', title), 'ascii'));
var sequence = Futures.sequence();
sequence.then(function(next) {
post_data.push(new Buffer(httpRequest.encodeFilePart(boundary, thumbnail.type, 'thumbnail', thumbnail.name), 'ascii'));
var file_reader = fs.createReadStream(thumbnail.path, {encoding: 'binary'});
var file_contents = '';
file_reader.on('data', function(data){
file_contents += data;
});
file_reader.on('end', function(){
post_data.push(new Buffer(file_contents, 'binary'));
post_data.push(new Buffer("--" + boundary + "\r\n"), 'ascii');
next();
});
})
.then(function(next) {
post_data.push(new Buffer(httpRequest.encodeFilePart(boundary, photo.type, 'digitalImage', photo.name), 'ascii'));
var file_reader = fs.createReadStream(photo.path, {encoding: 'binary'});
var file_contents = '';
file_reader.on('data', function(data){
file_contents += data;
});
file_reader.on('end', function(){
post_data.push(new Buffer(file_contents, 'binary'));
post_data.push(new Buffer("--" + boundary + "--\r\n"), 'ascii');
next();
});
})
.then(function(next) {
var dataLength = 0;
for(var i = 0; i < post_data.length; i++) {
dataLength += post_data[i].length;
}
var options = {
host: constants.SERVER_HOST,
port: constants.SERVER_PORT,
path: constants.SERVER_UPLOAD_PATH,
method: 'POST',
headers: {
'Content-Type': 'multipart/form-data; boundary=' + boundary,
'Content-Length': dataLength,
'Cookie': req.session.user.authCookie
}
};
httpRequest.post(req, options, post_data, function(res, data) {
var response = null;
if(res.statusCode == 200) {
try {
response = JSON.parse(data);
} catch (err) {
console.log("FileProvider.save error: " + err);
}
callback(res.statusCode, response);
} else {
callback(res.statusCode, null);
}
});
});
};
Helper Library:
HttpRequest.prototype.post = function(request, options, postData, callback) {
// disable socket pooling
options.agent = false;
var req = http.request(options, function(res) {
console.log('RESPONSE STATUS: ' + res.statusCode);
console.log('RESPONSE HEADERS: ' + JSON.stringify(res.headers));
res.setEncoding('utf8');
var responseData = '';
res.on('data', function (data) {
responseData += data;
});
res.on('end', function () {
callback(res, responseData);
});
});
req.setTimeout(100000, function() {
console.log('problem with request: timeout');
callback(408, null);
});
req.on('error', function(e) {
console.log('problem with request: ' + e.message);
callback(599, null);
});
if(postData instanceof Array) {
for (var i = 0; i < postData.length; i++) {
req.write(postData[i]);
}
} else {
req.write(postData);
}
req.end();
};
HttpRequest.prototype.encodeFieldPart = function(boundary,name,value) {
var return_part = "--" + boundary + "\r\n";
return_part += "Content-Disposition: form-data; name=\"" + name + "\"\r\n\r\n";
return_part += value + "\r\n";
return return_part;
}
HttpRequest.prototype.encodeFilePart = function(boundary,type,name,filename) {
var return_part = "--" + boundary + "\r\n";
return_part += "Content-Disposition: form-data; name=\"" + name + "\"; filename=\"" + filename + "\"\r\n";
return_part += "Content-Type: " + type + "\r\n\r\n";
return return_part;
}
Thursday, April 25, 2013
HTML CSS Sticky footer
If you are constantly developing websites, one thing that you may face constantly is sticky footer. Here it is:
Sticky Footer CSS
html {
height: 100%;
min-height: 100%;
}
#body_wrapper {
min-height: 100%;
height: auto !important;
margin-bottom: -150px;
}
#footer {
background: #000000;
color: #fffffff;
height: 150px;
}
You will want to change the height of you footer (150px above).
Sticky Footer HTML
<html>
<body>
<div id="dbody_wrapper">body</div>
<div id="footer">footer</div>
</body>
</html>
Sticky Footer CSS
html {
height: 100%;
min-height: 100%;
}
#body_wrapper {
min-height: 100%;
height: auto !important;
margin-bottom: -150px;
}
#footer {
background: #000000;
color: #fffffff;
height: 150px;
}
You will want to change the height of you footer (150px above).
Sticky Footer HTML
<html>
<body>
<div id="dbody_wrapper">body</div>
<div id="footer">footer</div>
</body>
</html>
Wednesday, April 24, 2013
MySQL - extract domain from url
Let's say you have a table called Post that has a column called "url" and you want to extract the domain component and save it in a new column called "domain".
update Post set domain = (select (SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX((url), '://', -1),'/',1),'www.', -1)))Note that the above will not get rid of the subdomain if it's not "www". For example, if the url is "blog.developer24hours.com/mysql-extract-domain-from-url", then the above SQL will give you "blog.developer24hours.com".
Thursday, April 18, 2013
MySQL generating a random integer between a min and a max
Say you want to generate a number between a and b, you can do the following:
select (FLOOR(a + RAND() * (b - a) ))
select (FLOOR(a + RAND() * (b - a) ))
Wednesday, April 17, 2013
Using Munin to monitor EC2 instances on Amazon
After playing around with CloudWatch, I find the interface very confusing to use. The biggest problem is EC2 instance are described by AMI image ID rather than my pre-defined machine tag name (Let me know in the comments below if you can figure out how).
Restart the Munin node.
Now in the Master Munin node, edit vi /etc/munin/munin.conf. Search for
[localhost.localdomain]
address 127.0.0.1
use_node_name yes
Change it to
[api1.monetize24hours.com]
address ip-00-000-000-000.ec2.internal
This value must match the host name you defined in the Munin node above. The address is the ec2 private address of the Munin node. This is how Munin master will aggregate and report the data.
Showing Data on Webpages
Make sure the Munin master can connect to your Munin nodes.
If it doesn't connect, add port 4949 for the Munin node's security group.
You can find the Munin master's security group name by clicking on the Security Group and checking the Details tab. If looks something like "sg-e0000000".
Now edit /etc/munin/munin.conf to tweak the log and graph generation directories.
Wait for 5 to 10 minutes. The Perl cron will gather data.
Access the graphs by
We will use .htaccess in the following example.
Change AllowOverride None to AllowOverride All
Restart apache.
I was looking at a few monitoring tools (Nagis, Cacti, Munin, Zabbix) and decided to try to out Munin. The biggest motivator for me is that Instagram is also using Munin.
Let's begin by spinning an Ubuntu instance as the Munin master.
Installing Munin Master and Munin Nodes:
Install munin and munin-node
apt-get install munin
apt-get install munin-node
Install apache (for viewing reports from the Web)
apt-get install apache2
For all the instances you want to monitor, install Munin Node.
apt-get install munin-nodeFor these node instances, we will edit munin-node.conf
vi /etc/munin/munin-node.confChange the host_name. Name this to be something descriptive so you will know what this machine is. The master node will report using this name.
host_name {api1.monetize24hours.com}Change allow from
allow ^127\.0\.0\.1$to
allow ^.*$This is saying allow all internal IPs to connect to. Since AWS elastic address changes all the time, it's better to set it to allow all. Do NOT set it to the instance's external address else you will be charged for data transfer. Make sure all the machines are behind a firewall.
Restart the Munin node.
/etc/init.d/munin-node restartRepeat the settings above for all the Munin nodes.
Now in the Master Munin node, edit vi /etc/munin/munin.conf. Search for
[localhost.localdomain]
address 127.0.0.1
use_node_name yes
Change it to
[api1.monetize24hours.com]
address ip-00-000-000-000.ec2.internal
This value must match the host name you defined in the Munin node above. The address is the ec2 private address of the Munin node. This is how Munin master will aggregate and report the data.
Showing Data on Webpages
Make sure the Munin master can connect to your Munin nodes.
telnet {private_ec2_address} 4949Port 4949 is used for Munin internodes communication
If it doesn't connect, add port 4949 for the Munin node's security group.
You can find the Munin master's security group name by clicking on the Security Group and checking the Details tab. If looks something like "sg-e0000000".
Now edit /etc/munin/munin.conf to tweak the log and graph generation directories.
bdir /var/lib/muninChange the above directories. Create them if they don't exist. Make sure you set the appropriate permissions for the directories.
htmldir /var/www/munin
logdir /var/log/munin
rundir /var/run/munin
Wait for 5 to 10 minutes. The Perl cron will gather data.
Access the graphs by
{public_ec2_address}/muninYou will want to secure the webpages so no one else can access them. Either secure them by ip or username and password.
We will use .htaccess in the following example.
htpasswd -c /etc/apache2/.munin_htpasswd adminCreate /var/www/munin/.htaccess, and put the following:
AuthUserFile /etc/apache2/.munin_htpasswdEdit /etc/apache2/sites-available/default.
AuthGroupFile /dev/null
AuthName EnterPassword
AuthType Basic
Change AllowOverride None to AllowOverride All
Restart apache.
service apache2 restart
Tuesday, April 16, 2013
Using Amazon CloudWatch Command Line Tool to record metrics
Introduction
Amazon CloudWatch provides seamless integration for monitoring AWS resources like EC2 instances, RDS instances, EBS volumes, etc based on CPU utilization, data transfer and disk usage.
There are two types of monitoring: basic and detailed. Basic Monitoring reports at a five-minute frequency. Detailed Monitoring reports at a one-minute frequency while aggregating by AMI ID and instance type.
Monitoring data is retained for two weeks, even if your instance is terminated.
Below are the resources to metrics mapping: (For example, CloudWatch tracks request count and latency of Elastic Load Balancer)
ELB - request count, latency
EBS - read/write latency
RDS - freeable memory, available storage space
SQS - number of messages sent and received
You can also send custom metrics to CloudWatch by using the Put API.
You can view your stats in the AWS Management Console.
For overall status of all AWS services, check AWS Service Health Dashboard.
Setting up Amazon CloudWatch Command Line Tool
Spin up a EC2 instance. (Skip this if you are using your home computer).
Begin by downloading the CloudWatch CLI Tool.
Set AWS_CLOUDWATCH_HOME path in ~/.bashrc
chmod 600 credential-file-path.template
Rename credential-file-path.template to something else (ex. aws_credentials)
Move this file to somewhere else. You may be using this in some other service. For instance, move to /opt/tools/aws.
Add this entry to ~/.bashrc
Publish data points to CloudWatch
CloudWatch allows you to publish data points via PUT requests. CloudWatch only works with data that's in UTC timestamp and within the past two weeks (Only data within two weeks would be retained).
In this example, you will feed CloudWatch with some custom data points.
Execute the following data sets. But substitute the date below to be within a few hours before.
C
Set A (4 data points):
Let's get the data summary:
Login to CloudWatch.
Click on Metrics in the Left Panel. Select Test001 in the "Viewing" Dropdown box.
You can also create alarms based on this metric.
This concludes the tutorial. If you are interested in more advanced tools, check out this post - Using Munin to monitor EC2 instances on Amazon.
Amazon CloudWatch provides seamless integration for monitoring AWS resources like EC2 instances, RDS instances, EBS volumes, etc based on CPU utilization, data transfer and disk usage.
There are two types of monitoring: basic and detailed. Basic Monitoring reports at a five-minute frequency. Detailed Monitoring reports at a one-minute frequency while aggregating by AMI ID and instance type.
Monitoring data is retained for two weeks, even if your instance is terminated.
Below are the resources to metrics mapping: (For example, CloudWatch tracks request count and latency of Elastic Load Balancer)
ELB - request count, latency
EBS - read/write latency
RDS - freeable memory, available storage space
SQS - number of messages sent and received
You can also send custom metrics to CloudWatch by using the Put API.
You can view your stats in the AWS Management Console.
For overall status of all AWS services, check AWS Service Health Dashboard.
Setting up Amazon CloudWatch Command Line Tool
Spin up a EC2 instance. (Skip this if you are using your home computer).
Begin by downloading the CloudWatch CLI Tool.
mkdir /opt/tools/awsInstall zip and unzip the package.
cd /opt/tools/aws
wget http://ec2-downloads.s3.amazonaws.com/CloudWatch-2010-08-01.zip
sudo apt-get install zipCheck if you have JAVA installed
unzip CloudWatch-2010-08-01.zip
java -versionIf not, read Install Java OpenJDK 7 on Amazon EC2 Ubuntu.
Set AWS_CLOUDWATCH_HOME path in ~/.bashrc
export AWS_CLOUDWATCH_HOME=/opt/tools/aws/CloudWatch-1.0.13.4Enter your AWS Access Key ID and Secret Access Key in the file $AWS_CLOUDWATCH_HOME/bin/credential-file-path.template. You can find your credentials in the AWS Management Console.
export PATH=$PATH:$AWS_CLOUDWATCH_HOME/bin
AWSAccessKeyId=
AWSSecretKey=
chmod 600 credential-file-path.template
Rename credential-file-path.template to something else (ex. aws_credentials)
Move this file to somewhere else. You may be using this in some other service. For instance, move to /opt/tools/aws.
Add this entry to ~/.bashrc
export AWS_CREDENTIAL_FILE=/opt/tools/aws/aws_credentialsUpdate ~/.bashrc.
source ~/.bashrcTest the tool:
mon-cmdYou should see the following:
Command Name Description
------------ -----------
help
mon-delete-alarms Delete alarms
mon-describe-alarm-history Describe alarm history
mon-describe-alarms Describe alarms fully.
mon-describe-alarms-for-metric Describe all alarms associated with a single metric
mon-disable-alarm-actions Disable all actions for a given alarm
mon-enable-alarm-actions Enable all actions for a given alarm
mon-get-stats Get metric statistics
mon-list-metrics List user's metrics
mon-put-data Put metric data
mon-put-metric-alarm Create a new alarm or update an existing one
mon-set-alarm-state Manually set the state of an alarm
version Prints the version of the CLI tool and the API.
Publish data points to CloudWatch
CloudWatch allows you to publish data points via PUT requests. CloudWatch only works with data that's in UTC timestamp and within the past two weeks (Only data within two weeks would be retained).
In this example, you will feed CloudWatch with some custom data points.
Execute the following data sets. But substitute the date below to be within a few hours before.
C
Set A (4 data points):
mon-put-data -m RequestLatency -n "Test001" -t 2013-04-16T20:30:00Z -v 87 -u MillisecondsSet B (Instead of sending individual data points, send sum, min, max and sample count):
mon-put-data -m RequestLatency -n "Test001" -t 2013-04-16T20:30:00Z -v 51 -u Milliseconds
mon-put-data -m RequestLatency -n "Test001" -t 2013-04-16T20:30:00Z -v 125 -u Milliseconds
mon-put-data -m RequestLatency -n "Test001" -t 2013-04-16T20:30:00Z -v 235 -u Milliseconds
mon-put-data -m RequestLatency -n "Test001" -t 2013-04-16T21:30:00Z -s "Sum=577,Minimum=65,Maximum=189,SampleCount=5" -u MillisecondsSet C:
mon-put-data -m RequestLatency -n "Test001" -s "Sum=806,Minimum=47,Maximum=328,SampleCount=6" -u MillisecondsThe above are data within latency within three hours. Just think of them as some data points.
Let's get the data summary:
mon-get-stats -m RequestLatency -n "Test001" -s "Average" --start-time 2013-04-16T19:30:00Z --headersResults:
Time Average UnitYou can also see the Visual Representation in the AWS Management Console.
2013-04-16 20:30:00 124.5 Milliseconds
2013-04-16 21:30:00 115.4 Milliseconds
2013-04-16 22:29:00 134.33333333333334 Milliseconds
Login to CloudWatch.
Click on Metrics in the Left Panel. Select Test001 in the "Viewing" Dropdown box.
You can also create alarms based on this metric.
This concludes the tutorial. If you are interested in more advanced tools, check out this post - Using Munin to monitor EC2 instances on Amazon.
Scaling Pinterest from 0 to 1 billion
The following is a very informative link sharing how Pinterest scaled from 0 to 1 billion users in under two years. Throughout the years, they have tried different technologies and abandoned some.
Below are some key points:
Notice that they dropped Cassandra, and Rackspace.
Here's the link:
http://highscalability.com/blog/2013/4/15/scaling-pinterest-from-0-to-10s-of-billions-of-page-views-a.html?utm_source=feedly
Below are some key points:
- an architecture is good when growth can be handled by adding more of the same staff (machines)
- when you push a technology to the limit, it will fail in its own special way
- the stack used are MySQL with sharding, Python, Amazon EC2, S3, Akamai, elastic load balancer, memcache, Redis
Notice that they dropped Cassandra, and Rackspace.
Here's the link:
http://highscalability.com/blog/2013/4/15/scaling-pinterest-from-0-to-10s-of-billions-of-page-views-a.html?utm_source=feedly
Saturday, April 13, 2013
Scaling Instagram
The following slideshow gives very good insights on scaling instagram.
The primary technology used are
Take away:
http://www.scribd.com/doc/89025069/Mike-Krieger-Instagram-at-the-Airbnb-tech-talk-on-Scaling-Instagram
The primary technology used are
- Python with Django
- C
- AWS EC2
- Nginx
- Redis
- Postgres
Take away:
- Don't forget your favicons - 404s may cause you a lot of errors and drag down your applications
- Scaling = replacing all components of a car while driving it at 100mph
- Use http://highscalability.com.
- instrument everything
- having everyone know the code inside out
- horizontal sharding
http://www.scribd.com/doc/89025069/Mike-Krieger-Instagram-at-the-Airbnb-tech-talk-on-Scaling-Instagram
Subscribe to:
Posts (Atom)