#!/usr/bin/env python3 # # http://github.com/mitchweaver/metal-archives # # get a given album cover from metal-archives.com # # Usage: ./album 'hellhammer - death fiend' # from bs4 import BeautifulSoup import requests import sys import re import urllib def find_album_url(band, album): def get_soup(url): data = requests.get(url).text return BeautifulSoup(data, "html5lib") url = 'https://www.metal-archives.com/bands/' + band.lower() soup = get_soup(url) for link in soup.find_all('a'): strlink = str(link) # scrape the band page for the discography link # we need to do this because MA hides it behind an ID# if 'iscography' in strlink and 'omplete' in strlink: # sed out the href garbage url = strlink.replace('Complete discography', '') # scrape the album page for link in get_soup(url).find_all('a'): # sed out the reviews if not 'reviews' in str(link): # look for the album if album.lower() in str(link).lower(): urls = re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+/.*."', str(link)) return(urls[0].replace('"', '')) def get_album_art(url): try: def get_soup(url,header): return BeautifulSoup(urllib.request.urlopen(urllib.request.Request(url,headers=header)), "html5lib") header = {'User-Agent': 'Mozilla/5.0'} image_urls = [a['src'] for a in get_soup(url, header).find_all("img")] image = urllib.request.urlopen(image_urls[1]).read() file = open("./cover.jpg", 'wb') file.write(image) file.close() except: print("Error - can't find it?") def main(): arg = sys.argv[1].split(' - ') url = find_album_url(arg[0], arg[1]) get_album_art(url) if __name__ == "__main__": main()