64 lines
1.9 KiB
Python
Executable File
64 lines
1.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# http://github.com/mitchweaver/bin
|
|
#
|
|
# get a given album cover from metal-archives.com
|
|
#
|
|
# Usage: ./album 'hellhammer - death fiend'
|
|
#
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import requests
|
|
import sys
|
|
import re
|
|
import urllib
|
|
|
|
def find_album_url(band, album):
|
|
|
|
def get_soup(url):
|
|
data = requests.get(url).text
|
|
return BeautifulSoup(data, "html5lib")
|
|
|
|
url = 'https://www.metal-archives.com/bands/' + band.lower()
|
|
soup = get_soup(url)
|
|
|
|
for link in soup.find_all('a'):
|
|
strlink = str(link)
|
|
# scrape the band page for the discography link
|
|
# we need to do this because MA hides it behind an ID#
|
|
if 'iscography' in strlink and 'omplete' in strlink:
|
|
# sed out the href garbage
|
|
url = strlink.replace('<a href="', '').replace('"><span>Complete discography</span></a>', '')
|
|
# scrape the album page
|
|
for link in get_soup(url).find_all('a'):
|
|
# sed out the reviews
|
|
if not 'reviews' in str(link):
|
|
# look for the album
|
|
if album.lower() in str(link).lower():
|
|
urls = re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+/.*."', str(link))
|
|
return(urls[0].replace('"', ''))
|
|
|
|
def get_album_art(url):
|
|
try:
|
|
def get_soup(url,header):
|
|
return BeautifulSoup(urllib.request.urlopen(urllib.request.Request(url,headers=header)), "html5lib")
|
|
|
|
header = {'User-Agent': 'Mozilla/5.0'}
|
|
image_urls = [a['src'] for a in get_soup(url, header).find_all("img")]
|
|
|
|
image = urllib.request.urlopen(image_urls[1]).read()
|
|
|
|
file = open("./cover.jpg", 'wb')
|
|
file.write(image)
|
|
file.close()
|
|
except:
|
|
print("Error - can't find it?")
|
|
|
|
def main():
|
|
arg = sys.argv[1].split(' - ')
|
|
url = find_album_url(arg[0], arg[1])
|
|
get_album_art(url)
|
|
|
|
if __name__ == "__main__": main()
|