51 lines
2 KiB
Java
51 lines
2 KiB
Java
package it.polimi.middleware.projects.flink;
|
|
|
|
import org.apache.flink.api.common.functions.MapFunction;
|
|
import org.apache.flink.api.common.functions.ReduceFunction;
|
|
import org.apache.flink.api.common.typeinfo.TypeHint;
|
|
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
|
import org.apache.flink.api.java.tuple.Tuple1;
|
|
import org.apache.flink.api.java.tuple.Tuple2;
|
|
|
|
import org.apache.flink.api.java.DataSet;
|
|
import org.apache.flink.api.java.ExecutionEnvironment;
|
|
import org.apache.flink.api.java.utils.ParameterTool;
|
|
|
|
|
|
public class KMeans {
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
final ParameterTool params = ParameterTool.fromArgs(args);
|
|
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
|
|
|
|
// Read CSV input
|
|
DataSet<Tuple1<Double>> csvInput = env.readCsvFile(params.get("input")).types(Double.class);
|
|
|
|
// Convert CSV to internal format
|
|
DataSet<Double> input = csvInput
|
|
.map(point -> point.f0);
|
|
|
|
// DEBUG Means all the points
|
|
DataSet<Tuple1<Double>> mean = input
|
|
.map(new MapFunction<Double, Tuple2<Double, Integer>>() {
|
|
public Tuple2<Double, Integer> map(Double value) {
|
|
return new Tuple2<Double, Integer>(value, 1);
|
|
}
|
|
})
|
|
.reduce(new ReduceFunction<Tuple2<Double, Integer>>() {
|
|
public Tuple2<Double, Integer> reduce(Tuple2<Double, Integer> a, Tuple2<Double, Integer> b) {
|
|
return new Tuple2<Double, Integer>(a.f0 + b.f0, a.f1 + b.f1);
|
|
}
|
|
})
|
|
.map(new MapFunction<Tuple2<Double, Integer>, Tuple1<Double>>() {
|
|
public Tuple1<Double> map(Tuple2<Double, Integer> value) {
|
|
return new Tuple1<Double>(value.f0 / value.f1);
|
|
}
|
|
});
|
|
|
|
mean.writeAsCsv(params.get("output", "output.csv"));
|
|
|
|
env.execute("K-Means clustering");
|
|
}
|
|
}
|