#!/usr/bin/env python3 # # http://github.com/mitchweaver/metal-archives # # get releases of a band from metal-archives # # Usage: ./releases 'iron maiden' # from bs4 import BeautifulSoup import requests import urllib import re import sys class Line(): type = "" title = "" year = "" def __init__(self, title, year, type): self.type = type self.title = title self.year = year def concat(self): line = self.title + " - " + self.year + " - (" \ + self.type + ")" return(line.strip()) def error(): print("Error: can't find it?") quit() def get_releases(band): try: ID = "" url = ("https://www.metal-archives.com/bands/" + band.replace(" ", "_").lower()) html = urllib.request.urlopen(url).read() soup = BeautifulSoup(html, "html5lib") match = re.search(r'\bbandId\b.*', soup.get_text()) if match is not None: tmp = match.group(0).split(" = ") tmp = tmp[1][:-1] ID = tmp else: error() url = "https://www.metal-archives.com/band/discography/id/" \ + ID + "/tab/all" html = urllib.request.urlopen(url).read() soup = BeautifulSoup(html, "html5lib") text = soup.get_text() raw_lines = (line.strip() for line in text.splitlines()) lines = [] # get rid of ratings in lines for line in raw_lines: if "%" not in line: lines.append(line) # temps title = "" year = "" type = "" formatted_lines = [] for line in lines: # skip whitespace / garble if len(line) < 2: continue # check if year match = re.match(r'.*[1-3][0-9]{3}', line) if match is not None: year = match.group(0) # get type elif line == "Full-length": type = line elif line == "EP": type = line elif line == "Demo": type = line elif line == "Promo": type = line elif line == "Compilation": type = line # elif line == "Single": type = line # elif line == "Boxed set": type = line # else this must be our title else: title = line if title != "" and year != "" and type != "": formatted_lines.append(Line(title, year, type)) title = year = type = "" for line in formatted_lines: print(line.concat()) except: error() def main(): get_releases(sys.argv[1]) if __name__ == "__main__": main()