nosql:oracle_nosql_hadoop_integration
Unterschiede
Hier werden die Unterschiede zwischen zwei Versionen angezeigt.
Nächste ÜberarbeitungBeide Seiten der Revision | |||
nosql:oracle_nosql_hadoop_integration [2014/09/28 21:52] – angelegt gpipperr | nosql:oracle_nosql_hadoop_integration [2014/09/28 22:02] – gpipperr | ||
---|---|---|---|
Zeile 1: | Zeile 1: | ||
+ | =====Die Oracle NoSQL per MapReduce mit Hadoop verwenden==== | ||
+ | |||
+ | Mit der Hadoop Compatiblen InputFormat Klasse **" | ||
+ | |||
+ | |||
+ | {{ : | ||
+ | |||
+ | |||
+ | Die einzige Schwierigkeit besteht darin Hadoop den richtigen Klassenpfad zu übergeben. | ||
+ | Als letzte Lösung hilft oft nur die " | ||
+ | |||
+ | |||
+ | Leider ist mir nicht gelungen die Klassenpfad mit " | ||
+ | |||
+ | Folgeder Fehler beim Mapper Task: | ||
+ | < | ||
+ | Error: java.lang.RuntimeException: | ||
+ | </ | ||
+ | |||
+ | |||
+ | Daher die kvclient.jar mit meine Klassen in einen Jar File verpackt. | ||
+ | |||
+ | |||
+ | ===Beispiel: | ||
+ | |||
+ | **Mapper Klasse**: | ||
+ | |||
+ | <code java> | ||
+ | |||
+ | package gpi.hadoop; | ||
+ | |||
+ | import java.io.IOException; | ||
+ | |||
+ | import java.util.StringTokenizer; | ||
+ | |||
+ | import oracle.kv.Key; | ||
+ | |||
+ | import org.apache.hadoop.io.IntWritable; | ||
+ | import org.apache.hadoop.io.Text; | ||
+ | import org.apache.hadoop.mapreduce.Mapper; | ||
+ | import org.apache.hadoop.mapreduce.Mapper.Context; | ||
+ | |||
+ | // input Key - input Value - output Key - output Value | ||
+ | public class PWDStoreMapper extends Mapper< | ||
+ | |||
+ | |||
+ | static IntWritable oneValue = new IntWritable(1); | ||
+ | | ||
+ | @Override | ||
+ | // input Key - input Value - output Value | ||
+ | public void map(Text KVKey, Text valueArg, Context context) throws IOException, | ||
+ | | ||
+ | // read on key of the store | ||
+ | String keyName; | ||
+ | | ||
+ | Key key = Key.fromString(KVKey.toString()); | ||
+ | | ||
+ | // Convert back to canonical format | ||
+ | keyName = new StringBuffer(Key.createKey(key.getMajorPath()).toString()).toString(); | ||
+ | | ||
+ | | ||
+ | |||
+ | } | ||
+ | } | ||
+ | |||
+ | </ | ||
+ | |||
+ | |||
+ | Reducer Klasse: | ||
+ | |||
+ | <code java> | ||
+ | package gpi.hadoop; | ||
+ | |||
+ | import java.io.IOException; | ||
+ | |||
+ | import java.util.Iterator; | ||
+ | |||
+ | import org.apache.hadoop.io.IntWritable; | ||
+ | import org.apache.hadoop.io.Text; | ||
+ | import org.apache.hadoop.mapreduce.Reducer; | ||
+ | |||
+ | |||
+ | public class PWDStoreReducer extends Reducer< | ||
+ | |||
+ | private IntWritable totalWordCount = new IntWritable(); | ||
+ | |||
+ | @Override | ||
+ | public void reduce(Text KVKey, Iterable< | ||
+ | InterruptedException { | ||
+ | int keycount = 0; | ||
+ | for (IntWritable count : counts) { | ||
+ | keycount += 1; | ||
+ | } | ||
+ | context.write(KVKey, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | </ | ||
+ | |||
+ | **Job Definition ** | ||
+ | |||
+ | <code java> | ||
+ | package gpi.hadoop; | ||
+ | |||
+ | import oracle.kv.hadoop.KVInputFormat; | ||
+ | |||
+ | import org.apache.hadoop.conf.Configuration; | ||
+ | import org.apache.hadoop.conf.Configured; | ||
+ | import org.apache.hadoop.fs.Path; | ||
+ | import org.apache.hadoop.io.IntWritable; | ||
+ | import org.apache.hadoop.io.Text; | ||
+ | import org.apache.hadoop.mapreduce.Job; | ||
+ | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | ||
+ | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; | ||
+ | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | ||
+ | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; | ||
+ | import org.apache.hadoop.util.GenericOptionsParser; | ||
+ | import org.apache.hadoop.util.Tool; | ||
+ | import org.apache.hadoop.util.ToolRunner; | ||
+ | |||
+ | public class PWDStoreHadoop | ||
+ | |||
+ | |||
+ | public static void main(String[] args) throws Exception { | ||
+ | | ||
+ | Configuration conf = new Configuration(); | ||
+ | | ||
+ | String[] otherArgs = new GenericOptionsParser(conf, | ||
+ | | ||
+ | // create a new Configuration | ||
+ | Job job = Job.getInstance(conf); | ||
+ | job.setJobName(" | ||
+ | |||
+ | //main driver Class | ||
+ | job.setJarByClass(PWDStoreHadoopT.class); | ||
+ | |||
+ | // set the Input Format Classe | ||
+ | job.setInputFormatClass(KVInputFormat.class); | ||
+ | KVInputFormat.setKVStoreName(" | ||
+ | //Parameter for the Input Format Class | ||
+ | String [] kvhostList = {" | ||
+ | KVInputFormat.setKVHelperHosts(kvhostList); | ||
+ | |||
+ | //set the Mapper | ||
+ | job.setMapperClass(PWDStoreMapper.class); | ||
+ | |||
+ | // Reducer | ||
+ | job.setReducerClass(PWDStoreReducer.class); | ||
+ | job.setOutputFormatClass(TextOutputFormat.class); | ||
+ | FileOutputFormat.setOutputPath(job, | ||
+ | | ||
+ | //set Output Class | ||
+ | job.setOutputKeyClass(Text.class); | ||
+ | job.setOutputValueClass(IntWritable.class); | ||
+ | |||
+ | // Execute job and return status | ||
+ | job.submit(); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | |||
+ | </ | ||
+ | |||
+ | |||
+ | ====Quellen==== | ||
+ | |||
+ | |||
+ | * http:// | ||
+ | * http:// | ||
+ | |||
nosql/oracle_nosql_hadoop_integration.txt · Zuletzt geändert: 2014/09/28 22:04 von gpipperr