Я получаю сообщение HTTPError: HTTP Error 404: Not Found error при реализации easyOCR для URL-адресов изображений из электронной таблицы Excel. Идея состоит в том, чтобы перебрать каждый URL-адрес в файле Excel и создать выходной файл Excel с извлеченным текстом.
Входной файл выглядит следующим образом: входные данные
Помимо iterrows(), я также использовал range(len()) и столкнулся с той же проблемой. Подскажите, пожалуйста, как решить эту проблему. Заранее спасибо!
def modelling(url):
reader = easyocr.Reader(['en'])
bounds = reader.readtext(url, detail = 0)
return bounds
new_col = []
for index, row in df.iterrows():
img = row['url']
col = modelling(img)
new_col.append(col)
df['new_col'] = new_col
filename = "Output_Data.xlsx"
df.to_excel(filename)
print("Data frame is written to Excel file successfully.")
Журналы ошибок указаны ниже.
<ipython-input-10-013fb5c72447> in modelling(url)
1 def modelling(url):
2 reader = easyocr.Reader(['en'])
----> 3 bounds = reader.readtext(url, detail = 0)
4 return bounds
/usr/local/lib/python3.6/dist-packages/easyocr/easyocr.py in readtext(self, image, decoder, beamWidth, batch_size, workers, allowlist, blocklist, detail, paragraph, min_size, contrast_ths, adjust_contrast, filter_ths, text_threshold, low_text, link_threshold, canvas_size, mag_ratio, slope_ths, ycenter_ths, height_ths, width_ths, add_margin)
345 image: file path or numpy-array or a byte stream object
346 '''
--> 347 img, img_cv_grey = reformat_input(image)
348
349 horizontal_list, free_list = self.detect(img, min_size, text_threshold,\
/usr/local/lib/python3.6/dist-packages/easyocr/utils.py in reformat_input(image)
645 if type(image) == str:
646 if image.startswith('http://') or image.startswith('https://'):
--> 647 tmp, _ = urlretrieve(image , reporthook=printProgressBar(prefix = 'Progress:', suffix = 'Complete', length = 50))
648 img_cv_grey = cv2.imread(tmp, cv2.IMREAD_GRAYSCALE)
649 os.remove(tmp)
/usr/lib/python3.6/urllib/request.py in urlretrieve(url, filename, reporthook, data)
246 url_type, path = splittype(url)
247
--> 248 with contextlib.closing(urlopen(url, data)) as fp:
249 headers = fp.info()
250
/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
/usr/lib/python3.6/urllib/request.py in http_response(self, request, response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
/usr/lib/python3.6/urllib/request.py in error(self, proto, *args)
568 if http_err:
569 args = (dict, 'default', 'http_error_default') + orig_args
--> 570 return self._call_chain(*args)
571
572 # XXX probably also want an abstract factory that knows when it makes
/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
/usr/lib/python3.6/urllib/request.py in http_error_default(self, req, fp, code, msg, hdrs)
648 class HTTPDefaultErrorHandler(BaseHandler):
649 def http_error_default(self, req, fp, code, msg, hdrs):
--> 650 raise HTTPError(req.full_url, code, msg, hdrs, fp)
651
652 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 404: Not Found