Web-page Ranking in Java
PageRank.java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
public class PageRank {
private static final String TEST_FILE_NAME = "CS.txt";
private static final int ITERATIONS = 10;
private static final int SEARCH_RESULT_LIMIT = 7;
private static final double D = 0.15;
private Map pageByUrl = new HashMap<>();
public PageRank(String fileName) throws RuntimeException {
try {
BufferedReader br = new BufferedReader(new FileReader(fileName));
Page currPage = null;
Map> links = new HashMap<>();
String line = "";
while ((line = br.readLine()) != null) {
if ("PAGE".equals(line)) {
currPage = new Page();
currPage.url = br.readLine();
currPage.content = br.readLine().toLowerCase();
pageByUrl.put(currPage.url, currPage);
links.put(currPage.url, new HashSet<>());
}
else if (currPage != null) {
links.get(currPage.url).add(line);
}
}
br.close();
for (Map.Entry> pair : links.entrySet()) {
Page source = pageByUrl.get(pair.getKey());
for (String s : pair.getValue()) {
Page ref = pageByUrl.get(s);
if (ref != null) {
ref.addReferencedBy(source);
source.addRegerence(ref);
}
}
}
}
catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
public void search(String[] terms) {
List pages = hit(terms);
int N = pages.size();
System.out.println("There were " + N + " hits");
for (Page page : pages) {
page.rank = 1.0/N;
}
for (int i = 0; i tempRank = new HashMap<>();
for (Page page : pages) {
double newRank = (1-D)/N;
for (Page refBy : page.referencedBy) {
newRank += D*refBy.rank/refBy.references.size();
}
tempRank.put(page, newRank);
}
for (Page page : pages) {
page.rank = tempRank.get(page);
}
}
pages.sort(new Comparator() {
@Override
public int compare(Page arg0, Page arg1) {
if (arg0.rank - arg1.rank < 0)
return 1;
if (arg0.rank - arg1.rank > 0)
return -1;
return 0;
}
});
System.out.println(String.format("%-10s%-20s", "Rank","URL"));
for (int i = 0; i hit (String[] terms) {
List result = new ArrayList<>();
for (Page page : pageByUrl.values()) {
if (page.isHit(terms)) {
result.add(page);
}
}
return result;
}
class Page {
String url;
String content;
Set referencedBy = new HashSet<>();
Set references = new HashSet<>();
double rank;
public boolean isHit(String[] terms) {
for (String term : terms) {
if (!content.contains(term.toLowerCase()))
return false;
}
return true;
}
public void addReferencedBy(Page page) {
referencedBy.add(page);
}
public void addRegerence(Page page) {
references.add(page);
}
}
public static void main(String[] args) {
PageRank pageRank = new PageRank(TEST_FILE_NAME);
Scanner scanner = new Scanner(System.in);
System.out.println("Enter your search terms: ");
String line = scanner.nextLine();
String[] terms = line.split(" ");
scanner.close();
pageRank.search(terms);
}
}