import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
public class WordCount {
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(WordCount.class);
// specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
// specify input and output dirs
String outputname = "output";
conf.setInputPath(new Path("input"));
conf.setOutputPath(new Path(outputname));
// specify a mapper
conf.setMapperClass(WordCountMapper.class);
// specify a reducer
conf.setReducerClass(WordCountReducer.class);
conf.setCombinerClass(WordCountReducer.class);
client.setConf(conf);
// Before running the job, delete the output files
HadoopFS hdfs_rmr = new HadoopFS("-rmr "+outputname);
hdfs_rmr.callCommand();
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
HadoopFS hdfs_cat = new HadoopFS("-cat "+outputname+"/*");
hdfs_cat.callCommand();
for (String line : hdfs_cat.stdout) {
System.out.println(line);
}
}
}