gefavic/gefavic/getfavicon.py

68 lines
1.7 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
from requests_html import HTMLSession
from urllib.parse import urlparse
import re
class favfinder(HTMLParser):
attrs = []
in_head = False
def handle_starttag(self, tag, attrs):
if tag == 'head':
self.in_head = True
return
if tag == 'link' and self.in_head:
for attr in attrs:
if 'rel' in attr and re.search(attr[1],'icon'):
self.attrs = attrs
def handle_endtag(self, tag):
if tag == 'head':
self.in_head = False
def find_in(self,html):
self.feed(html)
if len(self.attrs) > 0:
return True
else:
return False
def get_html(session, url):
res = session.get(url)
return res.html.html
def crop_url(url):
o = urlparse(url)
return "{proto}://{domain}".format(proto=o.scheme, domain=o.netloc)
def complete_url(url, icon_url):
ico = urlparse(icon_url)
org = urlparse(url)
if ico.scheme == '' or ico.netloc == '':
return "{proto}://{domain}{path}".format(
proto=org.scheme,
domain=org.netloc,
path=ico.path
)
return icon_url
def get_icon_url(response):
icon = favfinder()
if icon.find_in(response):
for attr in icon.attrs:
if 'href' in attr:
return attr[1]
return None
def main(options):
session = HTMLSession()
url = crop_url(str(options.url))
response = get_html(session=session, url=url)
icon_url = get_icon_url(response)
print(complete_url(url, icon_url))