首页 / HADOOP / Hadoop Demo 倒排索引
Hadoop Demo 倒排索引
内容导读
互联网集市收集整理的这篇技术教程文章主要介绍了Hadoop Demo 倒排索引,小编现在分享给大家,供广大互联网技能从业者学习和参考。文章包含2826字,纯文字阅读大概需要5分钟。
内容图文
![Hadoop Demo 倒排索引](/upload/InfoBanner/zyjiaocheng/1070/6a4b5f49e4004c70a03b9e06f89bc3df.jpg)
package com.asin.hdp.inverted;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class InvertedIndexCombine {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(InvertedIndexCombine.class);
job.setMapperClass(invertedMapper.class);
job.setCombinerClass(invertedCombine.class);
job.setReducerClass(invertedReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("e:/a.txt"));
FileInputFormat.addInputPath(job, new Path("e:/b.txt"));
FileInputFormat.addInputPath(job, new Path("e:/c.txt"));
FileOutputFormat.setOutputPath(job, new Path("e:/outputCombine"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static class invertedMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
FileSplit split = (FileSplit) context.getInputSplit();
Path path = split.getPath();
String name = path.getName().replace("e:/", "");
StringTokenizer token = new StringTokenizer(value.toString(), " ");
while (token.hasMoreTokens()) {
context.write(new Text(name + "\t" + token.nextToken()), new Text("1"));
}
}
}
public static class invertedCombine extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = key.toString();
String[] split = line.split("\t");
int sum = 0;
for (Text text : values) {
sum += Integer.parseInt(text.toString());
}
context.write(new Text(split[1]), new Text(split[0] + ":" + sum));
}
}
public static class invertedReduce extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String val = "";
for (Text text : values) {
val += text + "\t";
}
context.write(new Text(key), new Text(val));
}
}
}
原文:http://www.cnblogs.com/datadev/p/7052895.html
内容总结
以上是互联网集市为您收集整理的Hadoop Demo 倒排索引全部内容,希望文章能够帮你解决Hadoop Demo 倒排索引所遇到的程序开发问题。 如果觉得互联网集市技术教程内容还不错,欢迎将互联网集市网站推荐给程序员好友。
内容备注
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 gblab@vip.qq.com 举报,一经查实,本站将立刻删除。
内容手机端
扫描二维码推送至手机访问。