Categories
Python Snippets

Image ALT Scraping for Accessibility Reporting

In my last employment we had someone who’s job it was to update alt tags on images and shareholders and management needed reports on progress so I built a small script to just that as well as another to make suggestions for the alt tag using the google vision API.


import requests
import json
import sys
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options

URLS = [line.rstrip('\n') for line in open('urls')]

options = Options()
	
options.set_headless(True)
	
driver = webdriver.Firefox(options=options)

#driver = webdriver.Firefox()

print(URLS)

imageslist = []

i = 0
while i < len(URLS):
	
	print(URLS[i])
	
	try:
	
		driver.get(URLS[i])
	
		imageLen = driver.execute_script("return document.images.length;")
	
		b = 0
	
		while b < imageLen:
		
			imageslist.append(driver.execute_script("if(document.images[" + str(b) + "].alt == '' | 'null'){ return document.images[" + str(b) + "].src}"))
		
			print(imageslist[b])
		
			b += 1
	
		images = '\n'.join(str(e) for e in imageslist)
	
		with open("images", "a+") as file:
			file.write(URLS[i] + ': ' + images + '\n')
	
		i += 1
	
	except:
		
		print('Failed')
		
		i += 1

driver.quit()

import requests
import json
import sys



IP_API = 'https://api.ipify.org'

API_KEY = 'PASTE GOOGLE API KEY HERE'

POST_URL = 'https://vision.googleapis.com/v1/images:annotate'

URLS = [line.rstrip('\n') for line in open('urls')]

ip_address = requests.get(IP_API).text

print(URLS)

i = 0
while i < len(URLS):
  print(URLS[i])
  resp = requests.post(
        'https://vision.googleapis.com/v1/images:annotate?key=' + API_KEY,
        json = {
          "requests": [{
            "image": {
              "source": {
			    "imageUri": URLS[i]
			    }
            },
            "features": [{
               "type": "LABEL_DETECTION",
              "maxResults": 3
            }]
          }]
        }
    )
  
  data = resp.json()
  
  print(data)
  
  try:
      
      json_data = data["responses"][0]["labelAnnotations"][0]["description"] + " " + data["responses"][0]["labelAnnotations"][1]["description"] + " " + data["responses"][0]["labelAnnotations"][2]["description"]
	  
  except:
    
      json_data = "An Error Occurreed"
  
  print(json_data)
  
  with open("alts", "a+") as file:
    file.write(URLS[i] + '\n' + 'Description: ' + json_data + '\n')

  i += 1