利用?OpenCV ConvNets?檢測幾何圖形
[導(dǎo)讀]作者|小白來源?|?小白學(xué)視覺導(dǎo)讀人工智能領(lǐng)域中增長最快的子領(lǐng)域之一是自然語言處理(NLP),它處理計算機與人類(自然)語言之間的交互,特別是如何編程計算機以處理和理解大量自然語言數(shù)據(jù)。自然語言處理通常涉及語音識別、自然語言理解和自然語言生成等。其中,命名實體識別(NER)等信息...
作者 | 小白
來源 | 小白學(xué)視覺
if len(image_arr) > 0:
for index,original_image in enumerate(image_arr):
#to store extracted images extracted_quad = [] image = original_image.copy()
#grayscale only if its not already if len(image.shape) > 2: gray = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY) else: gray = image.copy()
#image preprocessing for quadrilaterals img_dilate = self.do_quad_imageprocessing(gray,self.blocksize,self.thresh_const,self.kernelsize)
if len(img_dilate) > 0:
try: #detect contours cnts = cv2.findContours(img_dilate.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts)
#loop through detected contours for c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, (self.epsilon)* peri, True)
#bounding rec cordinates (x, y, w, h) = cv2.boundingRect(approx)
#get the aspect ratio aspectratio = float(w/h) area = cv2.contourArea(c) if area < self.rec_max_area and area > self.rec_min_area and (aspectratio >= self.aspect_ratio[0] and aspectratio <= self.aspect_ratio[1]):
#check if there are 4 corners in the polygon if len(approx) == 4: cv2.drawContours(original_image,[c], 0, (0,255,0), 2) roi = original_image[y:y h, x:x w] extracted_quad.append(roi)
except Exception as e: print('The following exception occured during quad shape detection: ',e)
self.extracted_img_data.append([original_image,extracted_quad,name_arr[index]])
else: print('No image is found during the extraction process')
images = []
#get the pdf file for x in os.listdir(dirname): if (dirname.split('.')[1]) == 'pdf': pdf_filename = x images_from_path = convert_from_path(os.path.join(dirname),dpi=300, poppler_path = r'C:\Program Files (x86)\poppler-0.68.0_x86\poppler-0.68.0\bin')for image in images_from_path: images.append(np.array(image))
return images
Y_test_orig = to_categorical(Y_test_orig, num_classes=2) Y_train_orig = to_categorical(Y_train_orig, num_classes=2)
# 3 layer ConvNetmodel = models.Sequential()model.add(layers.Conv2D(32, (3, 3), activation='relu',input_shape=(32,32,1)))model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))model.add(layers.MaxPooling2D((2, 2)))
#dense layermodel.add(layers.Flatten())
#add the regulizermodel.add(layers.Dense(128, activation='linear', activity_regularizer=l2(0.0003)))model.add(layers.Dense(128, activation='relu'))model.add(layers.Dense(2, activation='sigmoid'))
model.summary()
from keras.optimizers import Adamopt = Adam(lr=0.001)model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
ntrain = len(X_train_orig)nval = len(X_test_orig)X_train_orig = X_train_orig.reshape((len(X_train_orig),32,32,1)) X_test_orig = X_test_orig.reshape((len(X_test_orig),32,32,1))
train_datagen = ImageDataGenerator(rescale = 1./255,rotation_range = 40, width_shift_range = .2, height_shift_range = .2, shear_range = .2, zoom_range = .2, horizontal_flip = True)
val_datagen = ImageDataGenerator(rescale = 1./255)
train_generator = train_datagen.flow(X_train_orig,Y_train_orig,batch_size=32)val_generator = val_datagen.flow(X_test_orig,Y_test_orig,batch_size = 32)
#X_train_orig, X_test_orig, Y_train_orig,Y_test_orighistory = model.fit_generator(train_generator,steps_per_epoch = ntrain/32, epochs = 64, validation_data = val_generator, validation_steps = nval/32 )
第3步中,我們將把所有內(nèi)容整合在一個Sklearn pipeline中,并通過predict函數(shù)將其公開。我們沒有介紹的一個重要功能是將復(fù)選框或單選按鈕與文檔中相應(yīng)的文本相關(guān)聯(lián)。在實際應(yīng)用中,僅僅檢測沒有關(guān)聯(lián)的元素是毫無用處的。
來源 | 小白學(xué)視覺
if len(image_arr) > 0:
for index,original_image in enumerate(image_arr):
#to store extracted images extracted_quad = [] image = original_image.copy()
#grayscale only if its not already if len(image.shape) > 2: gray = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY) else: gray = image.copy()
#image preprocessing for quadrilaterals img_dilate = self.do_quad_imageprocessing(gray,self.blocksize,self.thresh_const,self.kernelsize)
if len(img_dilate) > 0:
try: #detect contours cnts = cv2.findContours(img_dilate.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts)
#loop through detected contours for c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, (self.epsilon)* peri, True)
#bounding rec cordinates (x, y, w, h) = cv2.boundingRect(approx)
#get the aspect ratio aspectratio = float(w/h) area = cv2.contourArea(c) if area < self.rec_max_area and area > self.rec_min_area and (aspectratio >= self.aspect_ratio[0] and aspectratio <= self.aspect_ratio[1]):
#check if there are 4 corners in the polygon if len(approx) == 4: cv2.drawContours(original_image,[c], 0, (0,255,0), 2) roi = original_image[y:y h, x:x w] extracted_quad.append(roi)
except Exception as e: print('The following exception occured during quad shape detection: ',e)
self.extracted_img_data.append([original_image,extracted_quad,name_arr[index]])
else: print('No image is found during the extraction process')
images = []
#get the pdf file for x in os.listdir(dirname): if (dirname.split('.')[1]) == 'pdf': pdf_filename = x images_from_path = convert_from_path(os.path.join(dirname),dpi=300, poppler_path = r'C:\Program Files (x86)\poppler-0.68.0_x86\poppler-0.68.0\bin')for image in images_from_path: images.append(np.array(image))
return images
-
勾選復(fù)選框
-
空復(fù)選框
-
其他
Y_test_orig = to_categorical(Y_test_orig, num_classes=2) Y_train_orig = to_categorical(Y_train_orig, num_classes=2)
# 3 layer ConvNetmodel = models.Sequential()model.add(layers.Conv2D(32, (3, 3), activation='relu',input_shape=(32,32,1)))model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))model.add(layers.MaxPooling2D((2, 2)))
#dense layermodel.add(layers.Flatten())
#add the regulizermodel.add(layers.Dense(128, activation='linear', activity_regularizer=l2(0.0003)))model.add(layers.Dense(128, activation='relu'))model.add(layers.Dense(2, activation='sigmoid'))
model.summary()
from keras.optimizers import Adamopt = Adam(lr=0.001)model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
ntrain = len(X_train_orig)nval = len(X_test_orig)X_train_orig = X_train_orig.reshape((len(X_train_orig),32,32,1)) X_test_orig = X_test_orig.reshape((len(X_test_orig),32,32,1))
train_datagen = ImageDataGenerator(rescale = 1./255,rotation_range = 40, width_shift_range = .2, height_shift_range = .2, shear_range = .2, zoom_range = .2, horizontal_flip = True)
val_datagen = ImageDataGenerator(rescale = 1./255)
train_generator = train_datagen.flow(X_train_orig,Y_train_orig,batch_size=32)val_generator = val_datagen.flow(X_test_orig,Y_test_orig,batch_size = 32)
#X_train_orig, X_test_orig, Y_train_orig,Y_test_orighistory = model.fit_generator(train_generator,steps_per_epoch = ntrain/32, epochs = 64, validation_data = val_generator, validation_steps = nval/32 )
第3步中,我們將把所有內(nèi)容整合在一個Sklearn pipeline中,并通過predict函數(shù)將其公開。我們沒有介紹的一個重要功能是將復(fù)選框或單選按鈕與文檔中相應(yīng)的文本相關(guān)聯(lián)。在實際應(yīng)用中,僅僅檢測沒有關(guān)聯(lián)的元素是毫無用處的。





