This repository has been archived on 2019-08-08. You can view files and clone it, but cannot push or open issues or pull requests.
s9-mtds-prj-flink/src/main/java/it/polimi/middleware/projects/flink/KMeans.java
Geoffrey Frogeye de6a881428 Basic scaffold
Generate, read, means, write all the points
2019-01-23 23:14:35 +01:00

51 lines
2 KiB
Java

package it.polimi.middleware.projects.flink;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.utils.ParameterTool;
public class KMeans {
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Read CSV input
DataSet<Tuple1<Double>> csvInput = env.readCsvFile(params.get("input")).types(Double.class);
// Convert CSV to internal format
DataSet<Double> input = csvInput
.map(point -> point.f0);
// DEBUG Means all the points
DataSet<Tuple1<Double>> mean = input
.map(new MapFunction<Double, Tuple2<Double, Integer>>() {
public Tuple2<Double, Integer> map(Double value) {
return new Tuple2<Double, Integer>(value, 1);
}
})
.reduce(new ReduceFunction<Tuple2<Double, Integer>>() {
public Tuple2<Double, Integer> reduce(Tuple2<Double, Integer> a, Tuple2<Double, Integer> b) {
return new Tuple2<Double, Integer>(a.f0 + b.f0, a.f1 + b.f1);
}
})
.map(new MapFunction<Tuple2<Double, Integer>, Tuple1<Double>>() {
public Tuple1<Double> map(Tuple2<Double, Integer> value) {
return new Tuple1<Double>(value.f0 / value.f1);
}
});
mean.writeAsCsv(params.get("output", "output.csv"));
env.execute("K-Means clustering");
}
}