import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; public class WordCount { public static void main(String[] args) { JobClient client = new JobClient(); JobConf conf = new JobConf(WordCount.class); // specify output types conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // specify input and output dirs String outputname = "output"; conf.setInputPath(new Path("input")); conf.setOutputPath(new Path(outputname)); // specify a mapper conf.setMapperClass(WordCountMapper.class); // specify a reducer conf.setReducerClass(WordCountReducer.class); conf.setCombinerClass(WordCountReducer.class); client.setConf(conf); // Before running the job, delete the output files HadoopFS hdfs_rmr = new HadoopFS("-rmr "+outputname); hdfs_rmr.callCommand(); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } HadoopFS hdfs_cat = new HadoopFS("-cat "+outputname+"/*"); hdfs_cat.callCommand(); for (String line : hdfs_cat.stdout) { System.out.println(line); } } }