mtsRhea's Blog

Feb 26, 2017 6:46 PM
Anime Relations: Muramasa
This Python code creates a .txt of all unique, case sensitive tags on your exported mal data.
It can be used to find your long forgotten tags, or mis-spellings, or typos.
To use, run in a terminal in the folder containing the exported .gz
update: 28/02/17 now outputs a 2nd .txt w/ BBCode compatible urls [Example]
### Mutsuto, 30/12/16. Update, 07/01/17###
import os
import gzip
import html.parser
html_parser = html.parser.HTMLParser()
def html(my_string):
    unescaped = html_parser.unescape(my_string)
    return unescaped

unique_tags = []
day_tags = []
date_tags = []
year_tags = []

files = [f for f in os.listdir('.') if os.path.isfile(f)]
for f in files:
    if ".xml.gz" in f:
        with gzip.open('{}'.format(f),'rb') as g:
            with open('data.txt','w') as h:
                for line in g:
                    h.write(line.decode('UTF-8'))

with open('data.txt','r') as mal_data:	#open .txt for reading
    for line in mal_data:
        mal_1 = mal_data.readline()
        if '<user_name>' in mal_1:
            username = mal_1.split("<user_name>")[1].split("</user_name>")[0]
        elif '<my_tags>' in mal_1:
            if '<![CDATA[]]></my_tags>' not in mal_1:
                mal_2 = mal_1.split('<![CDATA[')[1].split(']]></my_tags>')[0].split(', ')
                for item in mal_2:
                    mal_3 = item.strip('\n')
                    if mal_3 not in unique_tags:
                       if ("Summer" in mal_3) or ("Winter" in mal_3) or ("Fall" in mal_3) or ("Spring" in mal_3):
                           if mal_3 not in date_tags:
                               date_tags.append(mal_3)
                       elif mal_3.isdigit() :
                           if mal_3 not in year_tags:
                               year_tags.append(mal_3)
                       elif "day" in mal_3:
                               if mal_3 not in day_tags:
                                   day_tags.append(mal_3)
                       else:
                           unique_tags.append(mal_3)

def mal_format(item):
    url_base = "https://myanimelist.net/animelist/" + username + "?status=7&tag="
    url_end = html(item)
    full_url = url_base + url_end
    formatted_item = "[url=" + full_url + "]" + item + "[/url]"
    return formatted_item

def print_to_txt(a,b,c):
    with open(b,'a') as d:
        for item in sorted(a):
            if c == 0:
                d.write(item)
            if c == 1:
                d.write(mal_format(item))
            d.write(', ')#('\n')
        d.write('\n\n')

def output_to_txt(destination,boolean):
    with open(destination,'w') as output_data:	#wipes old data
        pass
    print_to_txt(unique_tags,destination,boolean)
    print_to_txt(date_tags,destination,boolean)
    print_to_txt(year_tags,destination,boolean)
    print_to_txt(day_tags,destination,boolean)

output_to_txt('output.txt',0)
output_to_txt('output_data_BBClinks.txt',1)
Posted by mtsRhea | Feb 26, 2017 6:46 PM | Add a comment
It’s time to ditch the text file.
Keep track of your anime easily by creating your own list.
Sign Up Login