using System;
using System.IO;
using org.apache.hadoop.conf;
using org.apache.hadoop.fs;
using org.apache.hadoop.io;
using org.apache.hadoop.mapreduce;
using org.apache.hadoop.mapreduce.lib.input;
using org.apache.hadoop.mapreduce.lib.output;
public class WordCount { public static class TokenizerMapper extends Mapper
2.2 Apache Spark入门
2.2.1 安装Spark
下载Spark安装包。
解压安装包。
配置环境变量。
2.2.2 编写Spark程序
using System;
using System.IO;
using Microsoft.Spark;
using Microsoft.Spark.Sql;
using Microsoft.Spark.Sql.Dataframe;
public class SparkWordCount { public static void Main(string[] args) { SparkContext sc = new SparkContext(); DataFrame df = sc.TextFile("path/to/your/file.txt"); DataFrame wordCounts = df .SelectExpr("explode(split(value, \" \")) as word") .GroupBy("word") .Count(); wordCounts.Show(); }
}