Hadoop之计算销售数据中城市出现的次数

Java Hadoop的Hello World项目,计算销售数据中,每个城市出现的次数

准备工作

代码实现

  • 配置maven
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-mapreduce-client-core</artifactId>
    <version>3.3.0</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.3.0</version>
    </dependency>
  • 实现SalesMapper
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    package com.meekou;

    import java.io.IOException;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    public class SalesMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
    throws IOException, InterruptedException {
    String valueString = value.toString();
    String[] SingleCountryData = valueString.split(",");
    context.write(new Text(SingleCountryData[7]), one);
    }
    }
  • 实现SalesCountryReducer
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    package com.meekou;

    import java.io.IOException;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    public class SalesCountryReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
    @Override
    protected void reduce(Text t_key, Iterable<IntWritable> values,
    Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
    Text key = t_key;
    int frequencyForCountry = 0;
    for (IntWritable val : values) {
    frequencyForCountry += val.get();
    }
    context.write(key, new IntWritable(frequencyForCountry));
    }
    }
  • 实现主程序
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    package com.meekou;

    import java.io.IOException;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.mapreduce.Job;

    /**
    * Hello world!
    */
    public final class App {
    private App() {
    }

    /**
    * Says hello to the world.
    * @param args The arguments of the program.
    * @throws IOException
    * @throws IllegalArgumentException
    */
    public static void main(String[] args) throws Exception {
    System.out.println("Hello World!");
    System.setProperty("hadoop.home.dir", "C:/Users/xx/Meekou/Meekou.hadoop/hadoop-3.2.2.tar/hadoop-3.2.2");
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "SalePerCountry");
    job.setJarByClass(App.class);
    job.setMapperClass(SalesMapper.class);
    job.setReducerClass(SalesCountryReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("./Java Hadoop Demo/SalesJan2009.csv"));
    FileOutputFormat.setOutputPath(job, new Path("./Java Hadoop Demo/result.txt"));
    job.waitForCompletion(true);
    }
    }
  • 运行结果part-r-00000
    1
    2
    3
    4
    5
    Argentina	1
    Australia 38
    Austria 7
    Bahrain 1
    ....

引用

源代码