Common Lines Finder Java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
public class CommonLines {
public static void main(String[] args) {
String fileName = "C:\\Users\\Sri Vishnu\\Downloads\\testDupLines.txt";
System.out.println(fileName);
HashMap<String, ArrayList<BatchInfo>> map = new HashMap<>();
int lineNumber = 0;
int minBatchSize =3;
ArrayList<BatchInfo> lastOccurence=new ArrayList<BatchInfo>();
double threshold = 0.9; // Change this value to adjust the similarity threshold
StringBuilder currentBatch = new StringBuilder();
StringBuilder tempBatch = new StringBuilder();
ArrayList<BatchInfo> lineOccurrences = new ArrayList<>();
try (BufferedReader br = new BufferedReader(new FileReader(fileName))) {
String line;
String fileText = "";
while ((line = br.readLine()) != null) {
fileText += line + "\n";
}
for (int i = 1; i <= fileText.lines().count(); i++) {
String lineToAppend = getLine(fileText, i);
tempBatch.append(lineToAppend).append("\n");
if(tempBatch.toString().trim().replace("\n", "").length()==0)
{
tempBatch.setLength(0);
continue;
}
ArrayList<BatchInfo> occurrencePositions = findSimilarOccurrence(tempBatch.toString(), fileText,i+1, threshold);
if (occurrencePositions.size()>0) {
lastOccurence=occurrencePositions;
currentBatch.setLength(0);
currentBatch.append(tempBatch);
} else {
lineOccurrences.add(new BatchInfo(i-(int)currentBatch.toString().lines().count(),currentBatch.toString(),"While"));
lineOccurrences.addAll(lastOccurence);
if(currentBatch.toString().trim().length()==0 || lineOccurrences.size()<2 || (int)currentBatch.toString().lines().count()<=minBatchSize) {
currentBatch.setLength(0);
lineOccurrences.clear();
tempBatch.setLength(0);
lineOccurrences.clear();
continue;
}
boolean foundMatch = false;
for (String existingKey : map.keySet()) {
if (isSimilar(currentBatch.toString(), existingKey, threshold)) {
// map.get(existingKey).addAll(lineOccurrences);
foundMatch = true;
break;
}
}
if (!foundMatch) {
String key = currentBatch.toString();
if (map.containsKey(key)) {
map.get(key).addAll(lineOccurrences);
} else {
map.put(key, new ArrayList<>(lineOccurrences));
}
}
currentBatch.setLength(0);
lineOccurrences.clear();
tempBatch.setLength(0);
lineOccurrences.clear();
// System.out.println("Reseted");
}
}
if (currentBatch.length() > 0) {
String key = currentBatch.toString();
if (map.containsKey(key)) {
map.get(key).addAll(lineOccurrences);
} else {
map.put(key, new ArrayList<>(lineOccurrences));
}
}
for (String key : map.keySet()) {
System.out.println("BATCH Starts:::: ");
System.out.println("Total Occurances => "+map.get(key).size());
for(BatchInfo batchinfo:map.get(key)) {
System.out.println("Lines: \n"+batchinfo.getSimilarOccurance());
System.out.println("At:: "+batchinfo.getLineNumber()+"\n\n\n\n\n");
}
System.out.println("BATCH ENDS:::: ");
}
} catch (Exception e) {
System.err.println("Error: " + e.getMessage());
}
String htmlString = toHtmlTable(map); // Assuming 'map' is your HashMap
try (FileWriter file = new FileWriter("C:\\Users\\Sri Vishnu\\Downloads\\commonlines.html")) {
file.write(htmlString);
System.out.println("Successfully wrote to the file.");
} catch (IOException e) {
System.out.println("An error occurred.");
e.printStackTrace();
}
}
private static String getLine(String text, int lineNumber) {
int start = 0;
for (int i = 1; i < lineNumber; i++) {
start = text.indexOf('\n', start) + 1;
}
int end = text.indexOf('\n', start);
if (end == -1) {
end = text.length();
}
return text.substring(start, end);
}
private static int levenshteinDistance(String s1, String s2) {
int m = s1.length();
int n = s2.length();
int[][] dp = new int[m + 1][n + 1];
for (int i = 0; i <= m; i++) {
dp[i][0] = i;
}
for (int j = 0; j <= n; j++) {
dp[0][j] = j;
}
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (s1.charAt(i - 1) == s2.charAt(j - 1)) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = Math.min(dp[i - 1][j], Math.min(dp[i][j - 1], dp[i - 1][j - 1])) + 1;
}
}
}
return dp[m][n];
}
private static boolean isSimilar(String s1, String s2, double threshold) {
int distance = levenshteinDistance(s1, s2);
double similarity = 1.0 - ((double) distance / Math.max(s1.length(), s2.length()));
return similarity >= threshold;
}
public static String toHtmlTable(HashMap<String, ArrayList<BatchInfo>> map) {
StringBuilder htmlTable = new StringBuilder();
htmlTable.append("<html>\n<head>\n<style>\n");
htmlTable.append("table {border-collapse: collapse; width: 100%; margin-bottom: 20px;}\n");
htmlTable.append("th, td {border: 1px solid #ddd; padding: 8px;}\n");
htmlTable.append("th {padding-top: 12px; padding-bottom: 12px; text-align: left; background-color: #4CAF50; color: white;}\n");
htmlTable.append("</style>\n</head>\n<body>\n");
for (Map.Entry<String, ArrayList<BatchInfo>> entry : map.entrySet()) {
htmlTable.append("<p>").append(entry.getKey()).append("</p></br>");
htmlTable.append("<table>\n");
htmlTable.append("<tr><th>Similar Occurrence</th><th>Line Number</th></tr>\n");
for (BatchInfo batchInfo : entry.getValue()) {
htmlTable.append("<td>").append(batchInfo.getSimilarOccurance()).append("</td>");
htmlTable.append("<td>").append(batchInfo.getLineNumber()).append("</td></tr>\n");
}
htmlTable.append("</table>\n");
htmlTable.append("</br></br></br></br>");
}
htmlTable.append("</body>\n</html>");
return htmlTable.toString();
}
private static ArrayList<BatchInfo> findSimilarOccurrence(String tempBatch, String fileText, int startLine, double threshold) {
ArrayList<BatchInfo> occurances=new ArrayList<BatchInfo>();
int numLines = (int) tempBatch.lines().count();
for (int i = startLine; i <= fileText.lines().count() - numLines + 1; i++) {
String linesToCompare = fileText.lines().skip(i - 1).limit(numLines).collect(Collectors.joining("\n"));
if (isSimilar(tempBatch, linesToCompare, threshold)) {
/* for (int j = 1; j <= numLines; j++) {
String fromBatch = getLine(tempBatch, j);
String fromCompare = getLine(linesToCompare, j);
System.out.println("fromBatch : "+fromBatch);
System.out.println("fromCompare : "+fromCompare);
if(!isSimilar(fromBatch, fromCompare, threshold)) {
System.out.println("Here :"+fromCompare);
continue;
}
}*/
/*System.out.println("tempBatch Matched:: "+tempBatch);
System.out.println("linesToCompare Matched:: "+linesToCompare);
System.out.println("numLines :: "+numLines);
System.out.println("I ::"+i);*/
occurances.add(new BatchInfo(i, linesToCompare,"findSimilarOccurrence"));
// System.out.println("tempBatch:: "+tempBatch+" linesToCompare: "+linesToCompare);
i+=numLines;
// System.out.println("updated I : "+i);
}
}
return occurances;
}
static class BatchInfo {
int lineNumber;
String similarOccurance;
BatchInfo(int lineNumber, String similarOccurance,String calledFrom) {
// System.out.println("similarOccurance : "+similarOccurance);
// System.out.println("calledFrom :"+calledFrom);
this.lineNumber = lineNumber;
this.similarOccurance = similarOccurance;
}
public int getLineNumber() {
return lineNumber;
}
public void setLineNumber(int lineNumber) {
this.lineNumber = lineNumber;
}
public String getSimilarOccurance() {
return similarOccurance;
}
public void setSimilarOccurance(String similarOccurance) {
this.similarOccurance = similarOccurance;
}
}
}
Comments
Post a Comment