Friday, 30 November 2012

HDFS Misc

HDFS configuration

LZO compression

Good overview re LZO set up here

LZO compression notes (on Centos 6.1)


Install these packages:
lzo-2.03-3.1.el6.x86_64.rpm
lzop-1.02-0.9.rc1.el6.x86_64.rpm
Check:
$ rpm -qa | grep lzo
lzo-2.03-3.1.el6.x86_64
lzop-1.02-0.9.rc1.el6.x86_64

# Next unpack the tarball lzo-hadoop.tar.gz 
cd /mnt/nfs/vol1/packages
tar tvfz lzo-hadoop.tar.gz
drwx------ root/root         0 2012-04-12 11:30 native/
drwx------ root/root         0 2012-04-12 11:40 native/Linux-amd64-64/
drwx------ root/root         0 2012-04-12 11:40 native/Linux-amd64-64/lib/
-rwx------ root/root     67841 2012-04-12 11:40 native/Linux-amd64-64/lib/libgplcompression.so.0.0.0
lrwxrwxrwx root/root         0 2012-04-12 11:40 native/Linux-amd64-64/lib/libgplcompression.so.0 -> libgplcompression.so.0.0.0
lrwxrwxrwx root/root         0 2012-04-12 11:40 native/Linux-amd64-64/lib/libgplcompression.so -> libgplcompression.so.0.0.0
-rw------- root/root      1124 2012-04-12 11:40 native/Linux-amd64-64/lib/libgplcompression.la
-rw-r--r-- root/root    104238 2012-04-12 11:40 native/Linux-amd64-64/lib/libgplcompression.a
-rwx------ root/root    257940 2012-04-12 11:40 native/Linux-amd64-64/libtool
-rw------- root/root     18384 2012-04-12 11:40 native/Linux-amd64-64/Makefile
-rwx------ root/root     59681 2012-04-12 11:40 native/Linux-amd64-64/config.status
drwx------ root/root         0 2012-04-12 11:40 native/Linux-amd64-64/impl/
drwx------ root/root         0 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/
-rw------- root/root       332 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/LzoDecompressor.lo
-rw------- root/root     49048 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/LzoDecompressor.o
-rw------- root/root     54400 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/LzoCompressor.o
drwx------ root/root         0 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.libs/
-rw------- root/root     49048 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.libs/LzoDecompressor.o
-rw------- root/root     54400 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.libs/LzoCompressor.o
drwx------ root/root         0 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.deps/
-rw------- root/root         0 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.deps/.dirstamp
-rw------- root/root      3803 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.deps/LzoDecompressor.Plo
-rw------- root/root      3799 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.deps/LzoCompressor.Plo
-rw------- root/root         0 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/.dirstamp
-rw------- root/root       326 2012-04-12 11:40 native/Linux-amd64-64/impl/lzo/LzoCompressor.lo
-rw------- root/root        28 2012-04-12 11:40 native/Linux-amd64-64/impl/stamp-h1
-rw------- root/root      4324 2012-04-12 11:40 native/Linux-amd64-64/impl/config.h
drwx------ root/root         0 2012-04-12 11:40 native/Linux-amd64-64/.libs/
-rwx------ root/root     67841 2012-04-12 11:40 native/Linux-amd64-64/.libs/libgplcompression.so.0.0.0
lrwxrwxrwx root/root         0 2012-04-12 11:40 native/Linux-amd64-64/.libs/libgplcompression.so.0 -> libgplcompression.so.0.0.0
lrwxrwxrwx root/root         0 2012-04-12 11:40 native/Linux-amd64-64/.libs/libgplcompression.so -> libgplcompression.so.0.0.0
lrwxrwxrwx root/root         0 2012-04-12 11:40 native/Linux-amd64-64/.libs/libgplcompression.la -> ../libgplcompression.la
-rw------- root/root      1124 2012-04-12 11:40 native/Linux-amd64-64/.libs/libgplcompression.lai
-rw------- root/root    104238 2012-04-12 11:40 native/Linux-amd64-64/.libs/libgplcompression.a
drwx------ root/root         0 2012-04-12 11:30 native/Linux-amd64-64/src/
drwx------ root/root         0 2012-04-12 11:30 native/Linux-amd64-64/src/com/
drwx------ root/root         0 2012-04-12 11:30 native/Linux-amd64-64/src/com/hadoop/
drwx------ root/root         0 2012-04-12 11:30 native/Linux-amd64-64/src/com/hadoop/compression/
drwx------ root/root         0 2012-04-12 11:38 native/Linux-amd64-64/src/com/hadoop/compression/lzo/
-rw------- root/root      1423 2012-04-12 11:40 native/Linux-amd64-64/src/com/hadoop/compression/lzo/com_hadoop_compression_lzo_LzoDecompressor.h
-rw------- root/root      1398 2012-04-12 11:40 native/Linux-amd64-64/src/com/hadoop/compression/lzo/com_hadoop_compression_lzo_LzoCompressor.h
-rw------- root/root      1123 2012-04-12 11:40 native/Linux-amd64-64/libgplcompression.la
-rw------- root/root     34625 2012-04-12 11:40 native/Linux-amd64-64/config.log
-rw------- root/root     62240 2012-04-12 11:40 hadoop-lzo-0.4.15.jar

cd ${HADOOP_HOME}/lib
tar xvfz lzo-hadoop.tar.gz
# Get the permissions right
chmod 640 hadoop-lzo-0.4.15.jar
cd native
chown –R hadoop:hadoop Linux-amd64-64
cd Linux-amd64-64
find . -type f –exec chmod 640 {} \;
find . -type d –exec chmod 750 {} \;

Note - this caught me out
cd ${HADOOP_HOME}/lib/native/Linux-amd64-64
-bash-4.1$ ln -s ./lib/libgplcompression.so.0.0.0 libgplcompression.so
-bash-4.1$ ln -s ./lib/libgplcompression.so.0.0.0 libgplcompression.so.0
-bash-4.1$ ls -atl libg*
lrwxrwxrwx 1 hadoop hadoop   32 Nov 30 13:43 libgplcompression.so.0 -> ./lib/libgplcompression.so.0.0.0
lrwxrwxrwx 1 hadoop hadoop   32 Nov 30 13:42 libgplcompression.so -> ./lib/libgplcompression.so.0.0.0
-rw-r----- 1 hadoop hadoop 1123 Apr 12  2012 libgplcompression.la


# Add the following to ${HADOOP_HOME}/conf/core-site.xml
io.compression.codecs
org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec
added from build steps of lzo
io.compression.codec.lzo.class
com.hadoop.compression.lzo.LzoCodec
added from build steps of lzo

Wednesday, 14 November 2012

Unix stuff - fsck usb h/d eg

Unix bits

fsck external hard disk (/mnt/usb) that lost power


# df -h


/dev/sdh1             1.8T  1.7T   57G  97% /mnt/usb2
/dev/sdf1             1.8T  196M  1.7T   1% /mnt/usb

# cd /mnt/usb
# ls 
ls: reading directory .: Input/output error
# fdisk -l
# fdisk -l | grep dev


Disk /dev/mapper/RootVolume-OptVol: 34.4 GB, 34359738368 bytes
Disk /dev/sdh: 2000.4 GB, 2000398934016 bytes
/dev/sdh1               1      243202  1953514583+  ee  GPT
Disk /dev/sdf: 2000.4 GB, 2000398934016 bytes
/dev/sdf1               1      243202  1953514583+  ee  GPT

# add device to fstab
# vi /etc/fstab
# cat /etc/fstab


/dev/sdh1             1.8T  1.7T   57G  97% /mnt/usb2
/dev/sdf1             1.8T  196M  1.7T   1% /mnt/usb

# /mnt/usb
# df -h


/dev/sdh1             1.8T  1.7T   57G  97% /mnt/usb2
/dev/sdf1             1.8T  196M  1.7T   1% /mnt/usb

# fsck.ext3 /dev/sdf1

e2fsck 1.41.12 (17-May-2010)
/dev/sdf1 is mounted.  

WARNING!!!  The filesystem is mounted.   If you continue you ***WILL***
cause ***SEVERE*** filesystem damage.

Do you really want to continue (y/n)? yes

/dev/sdf1: recovering journal
/dev/sdf1: clean, 11/122101760 files, 7713452/488378637 blocks

Tuesday, 13 November 2012

Throttling traffic via iptables


Useful link answer to: Throttle Traffic via iptables question

Note from Graham Hargreaves (prob based on someone else) ...


Not very accurate but definitely restricts the flow.
Change modemif variable to be the interface you need to throttle.
The example below never allowed a download to go above 130kbs and set to ambit it never went above 3Mbs (Probably needs some more testing)

To turn on:

#!/bin/bash
modemif=eth0

iptables -t mangle -A POSTROUTING -o $modemif -p tcp -m tos --tos Minimize-Delay -j CLASSIFY --set-class 1:10
iptables -t mangle -A POSTROUTING -o $modemif -p tcp --dport 80 -j CLASSIFY --set-class 1:10
iptables -t mangle -A POSTROUTING -o $modemif -p tcp --dport 443 -j CLASSIFY --set-class 1:10

tc qdisc add dev $modemif root handle 1: htb default 12
tc class add dev $modemif parent 1:1 classid 1:12 htb rate 50kbit ceil 50kbit



To turn the above off simply run:

tc qdisc del dev eth0 root
iptables


Tuesday, 17 July 2012

Friday, 13 July 2012

Thursday, 12 July 2012

AWS related - notes to self

Uploading files to AWS

A python interface to AWS

e.g. code using the interface

How to decrypting S3 data before EMRing

The below discussion demonstrates how to decrypt S3 data as a bootstrap action to the EMR cluster: https://forums.aws.amazon.com/thread.jspa?threadID=50189

Another example is to use the S3 Java client side encryption in Map/Reduce jobs: http://aws.typepad.com/aws/2011/04/client-side-data-encryption-using-the-aws-sdk-for-java.html

AWS streaming job flow

See link below re how to create a streaming job flow (note: can use gzip + password as part of the streaming job)

Wednesday, 11 July 2012

Installing HA - notes to self

c/o Graham H

Checking the HA


[root@dmmlw-r410-12 ~]# crm_mon


============
Last updated: Tue Jul 10 14:12:10 2012
Stack: openais
Current DC: myserver2 - partition with quorum
Version: 1.1.5-5.el6-01e86afaaa6d4a8c4836f68df80ababd6ca3902f
2 Nodes configured, 2 expected votes
1 Resources configured.
============


Online: [ myserver1 myserver2 ]


shared_ip_one   (ocf::heartbeat:IPaddr):        Started myserver1

Configuration


Install these packages:
cifs-utils-4.8.1-2.el6.x86_64.rpm
cluster-glue-1.0.5-2.el6.x86_64.rpm
cluster-glue-libs-1.0.5-2.el6.x86_64.rpm
corosync-1.2.3-36.el6.x86_64.rpm
corosynclib-1.2.3-36.el6.x86_64.rpm
corosynclib-devel-1.2.3-36.el6.x86_64.rpm
heartbeat-3.0.4-1.el6.x86_64.rpm  #from epel repo
heartbeat-libs-3.0.4-1.el6.x86_64.rpm  #from epel repo
keyutils-1.4-1.el6.x86_64.rpm
libibverbs-1.1.4-2.el6.x86_64.rpm
libmlx4-1.0.1-7.el6.x86_64.rpm
librdmacm-1.0.10-2.el6.x86_64.rpm
libtalloc-2.0.1-1.1.el6.x86_64.rpm
libtool-ltdl-2.2.6-15.5.el6.x86_64.rpm
lm_sensors-libs-3.1.1-10.el6.x86_64.rpm
net-snmp-libs-5.5-31.el6.x86_64.rpm
pacemaker-1.1.5-5.el6.x86_64.rpm
pacemaker-cts-1.1.5-5.el6.x86_64.rpm
pacemaker-libs-1.1.5-5.el6.x86_64.rpm
perl-TimeDate-1.16-11.1.el6.noarch.rpm
PyXML-0.8.4-19.el6.x86_64.rpm
resource-agents-3.0.12-22.el6.x86_64.rpm
net-snmp-5.5-31.el6.x86_64.rpm

sudo rpm -i --nodeps  libvirt-0.8.7-18.el6.x86_64.rpm  libvirt-client-0.8.7-18.el6.x86_64.rpm numactl-2.0.3-9.el6.x86_64.rpm gnutls-utils-2.8.5-4.el6.x86_64.rpm nc-1.84-22.el6.x86_64.rpm libxslt-1.1.26-2.el6.x86_64.rpm netcf-libs-0.1.7-1.el6.x86_64.rpm augeas-libs-0.7.2-6.el6.x86_64.rpm cyrus-sasl-md5-2.1.23-8.el6.x86_64.rpm qpid-cpp-client-0.10-3.el6.x86_64.rpm boost-1.41.0-11.el6.x86_64.rpm boost-1.41.0-11.el6.x86_64.rpm             boost-date-time-1.41.0-11.el6.x86_64.rpm   boost-python-1.41.0-11.el6.x86_64.rpm           boost-test-1.41.0-11.el6.x86_64.rpm  boost-regex-1.41.0-11.el6.x86_64.rpm            boost-graph-1.41.0-11.el6.x86_64.rpm       boost-serialization-1.41.0-11.el6.x86_64.rpm    boost-wave-1.41.0-11.el6.x86_64.rpm boost-iostreams-1.41.0-11.el6.x86_64.rpm   boost-signals-1.41.0-11.el6.x86_64.rpm ebtables-2.0.9-6.el6.x86_64.rpm iscsi-initiator-utils-6.2.0.872-21.el6.x86_64.rpm libicu-4.2.1-9.el6.x86_64.rpm dnsmasq-2.48-4.el6.x86_64.rpm radvd-1.6-1.el6.x86_64.rpm qemu-img-0.12.1.2-2.160.el6.x86_64.rpm yajl-1.0.7-3.el6.x86_64.rpm libcgroup-0.37-2.el6.x86_64.rpm libpciaccess-0.10.9-4.el6.x86_64.rpm 
sudo rpm -i fence-virtd-libvirt-0.2.1-8.el6.x86_64.rpm fence-virtd-0.2.1-8.el6.x86_64.rpm
sudo rpm -i libesmtp-1.0.4-15.el6.x86_64.rpm
sudo rpm -i clusterlib-3.0.12-41.el6.x86_64.rpm
sudo rpm -i openais-1.1.1-7.el6.x86_64.rpm openaislib-1.1.1-7.el6.x86_64.rpm
sudo rpm -i pexpect-2.3-6.el6.noarch.rpm
sudo rpm -i perl-Net-Telnet-3.03-11.el6.noarch.rpm
sudo rpm -i cman-3.0.12-41.el6.x86_64.rpm fence-virt-0.2.1-8.el6.x86_64.rpm fence-agents-3.0.12-23.el6.x86_64.rpm net-snmp-utils-5.5-31.el6.x86_64.rpm ricci-0.16.2-35.el6.x86_64.rpm sg3_utils-1.28-3.el6.x86_64.rpm  sg3_utils-libs-1.28-3.el6.x86_64.rpm oddjob-0.30-5.el6.x86_64.rpm nss-tools-3.12.9-9.el6.x86_64.rpm nss-tools-3.12.9-9.el6.x86_64.rpm modcluster-0.16.2-10.el6.x86_64.rpm
sudo rpm -i pacemaker-1.1.5-5.el6.x86_64.rpm pacemaker-cts-1.1.5-5.el6.x86_64.rpm pacemaker-libs-1.1.5-5.el6.x86_64.rpm

create /etc/corosync/corosync.conf
# Please read the corosync.conf.5 manual page
compatibility: whitetank
totem {
version: 2
secauth: off
threads: 0
interface {
ringnumber: 0
bindnetaddr: 10.x.x.x
#mcastaddr: 226.94.1.1
broadcast: yes
mcastport: 5405
ttl: 1
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
to_syslog: yes
logfile: /var/log/cluster/corosync.log
debug: on
timestamp: on
logger_subsys {
subsys: AMF
debug: off
}
}
amf {
mode: disabled
}
#end of file
############################
run: 
crm configure
paste the below into the new shell:
primitive shared_ip_one IPaddr params ip=10.x.x.0 cidr_netmask="255.255.254.0" nic="bond0"
property stonith-enabled="false"
location share_ip_one_master shared_ip_one 100: myserver1
monitor shared_ip_one 20s:10s
commit
exit