How to remove duplicate lines from csv or txt file?
The answer is quite straightforward: you basically need BufferedReader and BufferedWriter, and this also works for large files quite well.
def removeDuplicatesFromFile(fileName : String) {
val reader = new BufferedReader(new FileReader(fileName))
val lines = new mutable.HashSet[String]()
var line: String = null
while ({line = reader.readLine; line != null}) {
lines.add(line)
}
reader.close
val writer = new BufferedWriter(new FileWriter(fileName))
for (unique <- lines) {
writer.write(unique)
writer.newLine()
}
writer.close
}