Instructions
Requirements and Specifications
Source Code
package main
import (
"fmt"
"io"
"strings"
"os"
"time"
// "path"
"net/http"
"golang.org/x/net/html"
)
func processRefs(refs [] string) {
f, err := os.Create("foundUrls.txt")
if err != nil {
fmt.Printf("error creating references file: %s\n", err)
return
}
for _,s := range refs {
f.WriteString(s + "\n")
}
f.Close()
}
func processImages(images [] string) {
f, err := os.Create("foundImages.txt")
if err != nil {
fmt.Printf("error creating images text file: %s\n", err)
return
}
for index,s := range images {
f.WriteString(s + "\n")
dotIndex := strings.LastIndex(s, ".")
qIndex := strings.LastIndex(s, "?")
ext := s[dotIndex+1:]
if qIndex > dotIndex {
ext = s[dotIndex+1:qIndex]
}
response, err := http.Get(s)
if err != nil {
fmt.Printf("error downloading image file: %s\n", err)
continue
}
file, err := os.Create(fmt.Sprintf("%v",index) + "." + ext)
if err != nil {
fmt.Printf("error saving image file: %s\n", err)
continue
}
_, err = io.Copy(file, response.Body)
if err != nil {
fmt.Printf("!!error saving image file: %s\n", err)
continue
}
}
f.Close()
}
func getHtml(url string) (response string, ok bool) {
res, err := http.Get(url)
if err != nil {
fmt.Printf("error making http request: %s\n", err)
return "", false
}
html, err := io.ReadAll(res.Body)
if err != nil {
fmt.Printf("error reading response: %s\n", err)
return "", false
}
return string(html), true
}
func findRefs(url string, htmlContent string) ([]string, []string) {
refs := []string{}
images := []string{}
reader := strings.NewReader(htmlContent)
tokenizer := html.NewTokenizer(reader)
for {
tt := tokenizer.Next()
if tt == html.ErrorToken {
break
}
tag, hasAttr := tokenizer.TagName()
isImage := (string(tag) == "img")
if hasAttr {
for {
attrKey, attrValue, moreAttr := tokenizer.TagAttr()
if string(attrKey) == "href" {
href := string(attrValue)
if strings.HasPrefix(href, "http") {
refs = append(refs, href)
}
}
if isImage && string(attrKey) == "src" {
imageUrl := string(attrValue)
if !strings.HasPrefix(imageUrl, "http") {
imageUrl = strings.TrimSuffix(url, "/") + "/" + strings.TrimPrefix(imageUrl, "/")
}
images = append(images, imageUrl)
}
if !moreAttr {
break
}
}
}
}
return refs, images
}
func main() {
start := time.Now();
urlList := []string {
"https://www.unlv.edu/cs",
"https://www.unlv.edu/engineering",
"https://www.unlv.edu/engineering/advising-center",
"https://www.unlv.edu/engineering/about",
"https://www.unlv.edu/engineering/academic-programs",
"https://www.unlv.edu/ceec",
"https://ece.unlv.edu/",
"https://www.unlv.edu/me",
"https://www.unlv.edu/rotc",
"https://www.unlv.edu/afrotc",
"https://www.unlv.edu/eed",
"https://www.unlv.edu/engineering/mendenhall",
"https://www.unlv.edu/engineering/uas",
"https://www.unlv.edu/engineering/solar",
"https://www.unlv.edu/engineering/techcommercialization",
"https://www.unlv.edu/engineering/railroad",
"https://www.unlv.edu/engineering/future-students",
"https://www.physics.unlv.edu/",
}
refs := []string{}
images := []string{}
for _, url := range urlList {
content, ok := getHtml(url)
if ok {
slashIndex1 := strings.Index(url, "//")
fromHostname := url[slashIndex1+2:]
slashIndex2 := strings.Index(fromHostname, "/")
rootUrl := url[0:slashIndex1] + "//" + fromHostname[0:slashIndex2]
singleRefs, singleImages := findRefs(rootUrl, content)
refs = append(refs, singleRefs...)
images = append(images, singleImages...)
}
}
processRefs(refs)
processImages(images)
elapsed := time.Since(start)
fmt.Printf("Downloads completed in %s \n", elapsed)
}