initial commit

This commit is contained in:
Mitch Weaver
2018-04-23 17:56:32 +02:00
commit 39eb9205cf
18 changed files with 523 additions and 0 deletions

111
releases Executable file
View File

@@ -0,0 +1,111 @@
#!/usr/bin/env python3
#
# http://github.com/mitchweaver/bin
#
# get releases of a band from metal-archives
#
# Usage: ./releases 'iron maiden'
#
from bs4 import BeautifulSoup
import requests
import urllib
import re
import sys
class Line():
type = ""
title = ""
year = ""
def __init__(self, title, year, type):
self.type = type
self.title = title
self.year = year
def concat(self):
line = self.title + " - " + self.year + " - (" \
+ self.type + ")"
return(line.strip())
def error():
print("Error: can't find it?")
quit()
def get_releases(band):
try:
ID = ""
url = ("https://www.metal-archives.com/bands/" + band.replace(" ", "_").lower())
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, "html5lib")
match = re.search(r'\bbandId\b.*', soup.get_text())
if match is not None:
tmp = match.group(0).split(" = ")
tmp = tmp[1][:-1]
ID = tmp
else:
error()
url = "https://www.metal-archives.com/band/discography/id/" \
+ ID + "/tab/all"
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, "html5lib")
text = soup.get_text()
raw_lines = (line.strip() for line in text.splitlines())
lines = []
# get rid of ratings in lines
for line in raw_lines:
if "%" not in line:
lines.append(line)
# temps
title = ""
year = ""
type = ""
formatted_lines = []
for line in lines:
# skip whitespace / garble
if len(line) < 2: continue
# check if year
match = re.match(r'.*[1-3][0-9]{3}', line)
if match is not None:
year = match.group(0)
# get type
elif line == "Full-length": type = line
elif line == "EP": type = line
elif line == "Demo": type = line
elif line == "Promo": type = line
elif line == "Compilation": type = line
# elif line == "Single": type = line
# elif line == "Boxed set": type = line
# else this must be our title
else: title = line
if title != "" and year != "" and type != "":
formatted_lines.append(Line(title, year, type))
title = year = type = ""
for line in formatted_lines:
print(line.concat())
except:
error()
def main():
get_releases(sys.argv[1])
if __name__ == "__main__": main()