echo "Map Reduce"
echo "------------------"
echo "This section explains creates a minimalistic map reduce job."
echo ""
echo "First set required variables"
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_CLASSPATH=$JAVA_HOME/lib/tools.jar
echo ""
echo "Compile the accompanying wordcount example."
echo "We use the hadoop command instead of an direct call to javac because"
echo "the former sets adds the necessary jar files to the classpath."
hadoop com.sun.tools.javac.Main WordCount.java
echo "Package the example into a jar file"
jar cf wc.jar WordCount*.class
echo "Set output destination"
OUT=/tmp/fileout-$RANDOM
echo "Run the map reduce job invoking the main class of WordCount in the wc.jar file"
hadoop jar wc.jar WordCount $IN $OUT
echo "The output of a map reduce job is a directory with potentially many part-r-xxxx files"
echo "Each of these files contains the output of a reducer."
hdfs dfs -cat $OUT/part-r-00000 | tail -n 2
echo "Remove the test output directory"
hdfs dfs -rm -r $OUT
echo "Remove the test input file"
hdfs dfs -rm $IN