Files
NetNewsWire/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift

119 lines
2.7 KiB
Swift
Raw Normal View History

2024-09-21 12:16:09 -07:00
//
// File.swift
//
//
// Created by Brent Simmons on 9/21/24.
//
import Foundation
import SAX
public final class HTMLLinkParser {
2024-09-21 21:49:57 -07:00
public private(set) var links = [HTMLLink]()
private let parserData: ParserData
private let baseURL: URL?
2024-09-21 22:10:47 -07:00
public static func htmlLinks(with parserData: ParserData) -> [HTMLLink] {
2024-09-21 12:16:09 -07:00
2024-09-21 21:49:57 -07:00
let parser = HTMLLinkParser(parserData)
parser.parse()
return parser.links
}
init(_ parserData: ParserData) {
self.parserData = parserData
self.baseURL = URL(string: parserData.url)
}
}
private extension HTMLLinkParser {
func parse() {
let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data)
htmlParser.parse()
}
}
2024-09-21 22:10:47 -07:00
extension HTMLLinkParser: SAXHTMLParserDelegate {
2024-09-21 21:49:57 -07:00
2024-09-21 22:10:47 -07:00
private var currentLink: HTMLLink? {
2024-09-21 21:49:57 -07:00
links.last
}
2024-09-21 22:10:47 -07:00
private struct HTMLAttributeName {
static let href = "href"
static let title = "title"
2024-09-21 21:49:57 -07:00
}
2024-09-21 22:10:47 -07:00
private func title(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? {
2024-09-21 21:49:57 -07:00
2024-09-21 22:10:47 -07:00
attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.title)
2024-09-21 21:49:57 -07:00
}
2024-09-21 22:10:47 -07:00
private func urlString(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? {
2024-09-21 21:49:57 -07:00
2024-09-22 11:33:37 -07:00
guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href), !href.isEmpty else {
2024-09-21 21:49:57 -07:00
return nil
}
guard let baseURL, let absoluteURL = URL(string: href, relativeTo: baseURL) else {
assertionFailure("Expected to create URL")
return nil
}
return absoluteURL.absoluteString
}
2024-09-21 22:10:47 -07:00
private func handleLinkAttributes(_ attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) {
2024-09-21 21:49:57 -07:00
guard let currentLink else {
assertionFailure("currentLink must not be nil")
return
}
2024-09-21 22:10:47 -07:00
currentLink.urlString = urlString(with: attributesDictionary)
currentLink.title = title(with: attributesDictionary)
2024-09-21 21:49:57 -07:00
}
2024-09-21 22:10:47 -07:00
private struct HTMLName {
2024-09-21 21:49:57 -07:00
static let a = "a".utf8CString
}
2024-09-21 22:10:47 -07:00
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer<XMLPointer?>?) {
2024-09-21 21:49:57 -07:00
guard SAXEqualTags(name, HTMLName.a) else {
return
}
let link = HTMLLink()
links.append(link)
if let attributesDictionary = saxHTMLParser.attributesDictionary(attributes) {
handleLinkAttributes(attributesDictionary)
}
saxHTMLParser.beginStoringCharacters()
}
2024-09-21 22:10:47 -07:00
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) {
2024-09-21 21:49:57 -07:00
guard SAXEqualTags(name, HTMLName.a) else {
return
}
2024-09-21 22:10:47 -07:00
guard let currentLink else {
assertionFailure("currentLink must not be nil.")
return
}
2024-09-21 21:49:57 -07:00
currentLink.text = saxHTMLParser.currentStringWithTrimmedWhitespace
}
2024-09-21 22:10:47 -07:00
public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) {
2024-09-21 21:49:57 -07:00
// Nothing needed.
2024-09-21 12:16:09 -07:00
}
}