.   
 


 #3
            .            ,      ,    .





 

.   





 


 ,       ,    (NLP)      .          ,        ,      ,       .

          ,  ,       .         ,       .

      ,       ,        .

    ,    .  ,     ,            .     , -,    -       .

         ,       .    ,   ,  ,  NLP    .

    ,     .         ,      .       !




 1:       


    (NLP)   

   (NLP-Natural Language Processing)     ,        , ,      ,    .           ,    ,   ,     .

 NLP     ,          .    ,     :

1.   :

   (NLP)     ,         .            ,    .  NLP   ,      .

  NLP,       ,   ,      .      -         .

NLP    ,         , ,      .  ,   ,     ,    , ,      .

  :

 :   ,   Siri, Google Assistant  Amazon Alexa,    ,          .          .

  :  NLP        ,          .         .

  :   ,      ,         .      .

 , NLP      ,      ,      .             ,    .

2.    : 

              .     , , , ,  ,    .           .          (NLP),          .

 NLP  ,    ,      :

   .   ,    ,       .   NLP         ,        .

   . NLP       .           ,      ,      .

   . NLP         ,         .         ,        .

    .   ,         . NLP        ,     .

   .         . NLP     ,  ,    ,        .

NLP           .         ,    ,          .

3.  :

  ,       ,             .        ,        .

      (NLP)      ,        .    ,    NLP,        ,    .     ,    ,       ,     .

 ,      NLP           .       ,           .

  NLP       ,      ,   ,    .      ,    .

, NLP         .   -                 .

 ,   NLP     ,   ,     , ,      .       ,    ,       .

4.    :

          ,   ,    .        (NLP)      ,    ,   ,          .

              .            ,     .       ,      .

     ,       . ,         ,  ,   .    ,      ,   ,      .

 ,             .         ,       .

, NLP   ,                 .       ,    ,          .

5.  :

   (NLP)     ,          .      ,     ,        .

    .

  NLP            .       ,     ,         .    , ,     ,     .

   .

NLP      ,    ,  , -  .    NLP   ,     ,      .

   .

NLP       , ,     .        ,       .            .

    .

         - ,            .

 ,   NLP      ,       .   ,  NLP      ,               .

6.    :

   (NLP)      ,          .       , ,      ,          .

      NLP :

    :  ,    ,      ,     . NLP           ,             .

  :             . NLP       ,       .

  : NLP        ,        ,     .          .

  : NLP       ,       - .            .

  :           . NLP         ,  


     .

7.   (NER): 

    (NER)     (NLP)       ,     .            ,        .      :

    (NER)?

         ,       .      :

 : , " ".

 : , "Google"  "  ".

: , ""  "".

: , "10  1990 ".

: , "$100"  "1,000 ".

: , "20%"  "50 ".

 : , "  ".

  : , "iPhone"  "Coca-Cola".

         NLP   :

 :       ,      .

 :    ,      ,    .

  :     ,      .

   :           .

           ,               ,   ,        .

8.  : 

    ,            .          ,        .      :

1.  :    -    ,            .

2.   :               ,        .

3.     :         ,     ,        .

             ,       ,         .

9. -:

-    ,          .     ,           ,       .          -.     :

1.     -**:

   (RNN): RNN    -      .               .

   (CNN): CNN     ,         .

,   BERT  GPT,    -          .

2.     -:

  :    -         ,        .          .

  :    -      ,               .

3.  -

 : -          ,   ,      ,     .

-: -     ,        .

  :      -   ,      .

   : -              .

 : -      ,  ,      .

-,   ,         .           ,           .

 , NLP      ,     ,         .

     ,   ,          ,       .     ,   ,        ,    .






 2:     NLP



2.1.   ,   NLP,     

   (NLP)    ,      .  NLP    ,     .     :    (RNN)     (CNN).

   (RNN)

RNN     ,     ,   ,    .   RNN   ,     ,          .   RNN    ,           .

  RNN   :

1.   (Hidden State):            (RNN).      ,        .     :

 :

   RNN          .   (,   )     ,        .  ,       ,    ,    ,     .

  :

  RNN    ,   ,      .        ,     :

*:         ,      .

*:         , ,       .

*:    ,      ,      .

  :

         .       ,     .

  :

      . ,    ,                   .    ,                .

  :

 ,    RNN    ,           .         RNN,   LSTM  GRU,         .

   RNN                  .   RNN,   LSTM  GRU,                .

         (RNN),   ,     ,  ,   .

,      : "   ."    RNN       ,    .

1.   :

           , ,  .          ,      .

2.   :

       ,   .    RNN    ,        .    RNN  ,  ""  ""   ""  "",         .

3.  :

          ,    .    ,          .

4.  :

, RNN         , , ,    "".

    ( 1)         ( 2  3)      RNN    .         ,   RNN    NLP.

,  ,     ,   LSTM  GRU,       RNN       ,         .

     (RNN)    Python      TensorFlow,    .            RNN:

```python

import tensorflow as tf

from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

from tensorflow.keras.models import Sequential

from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.preprocessing.sequence import pad_sequences

#     

texts = ["   .", "   .", " ,   ."]

labels = [1, 0, 1] # 1  , 0  

#        

tokenizer = Tokenizer()

tokenizer.fit_on_texts(texts)

sequences = tokenizer.texts_to_sequences(texts)

#      

max_sequence_length = max([len(seq) for seq in sequences])

sequences = pad_sequences(sequences, maxlen=max_sequence_length)

#   RNN

model = Sequential()

model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_sequence_length))

model.add(SimpleRNN(32))

model.add(Dense(1, activation='sigmoid')) #  

#  

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#  

model.fit(sequences, labels, epochs=10, batch_size=1)

#     

test_texts = ["  .", "   ."]

test_sequences = tokenizer.texts_to_sequences(test_texts)

test_sequences = pad_sequences(test_sequences, maxlen=max_sequence_length)

#  

predictions = model.predict(test_sequences)

for i, text in enumerate(test_texts):

sentiment = "" if predictions[i] > 0.5 else ""

print(f": {text},  : {sentiment}")

```

     RNN     .  ,           .

2.   (Feedback Loops):

  (Feedback Loops)         (RNN)      .              ,       .    ,   :

1.  :

        ,   ,    ,      .

2.  :

     RNN      (Hidden State).     RNN           .

3.   :

       .             .

4.  :

    : "  .     ."    , RNN     "",         .      "",       "",   "". ,     "",          .    ,  ""  ,   ,    .

5.    :

 ,        ,      .      ( )    ( ),  RNN   .        RNN,   LSTM  GRU,       .

      RNN       ,        ,     .

     (Feedback Loops)     (RNN),    . ,    "" ,     ,        ,      .     :

 :     : 2 + 2.   ,  4,   .

 : ,      ,        .          .

 : 3 + 3.  ,      2 + 2 = 4.   ,       ,     .    6  .

  :  .     ,           .

 ,     (  )        (  )       ( )  .    ,     RNN         ,       .

3. ,  : 

,  ,         (RNN).     ,              ,    .     :

1.  :

 (Weights):      RNN.   ,                .

 (Biases):      ,    ,      .

2.  :  RNN       .               .

3.  :    RNN        (loss function)   .      ,     (gradient descent).

4.      ,          .    ()          ,    .

5.  :  RNN      .       ,       .

6.  :     RNN   ,        ,     .

7.  :  ,    RNN   ,           ,       .

,  ,  RNN      ,       ,    ,   ,      .

           Python    TensorFlow.       RNN     .

```python

import numpy as np

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import SimpleRNN, Dense

#    

np.random.seed(0)

n_steps = 100

time = np.linspace(0, 10, n_steps)

series = 0.1 * time + np.sin(time)

#     RNN

n_steps = 30 #      

n_samples = len(series)  n_steps

X = [series[i:i+n_steps] for i in range(n_samples)]

y = series[n_steps:]

X = np.array(X).reshape(-1, n_steps, 1)

y = np.array(y)

#   RNN

model = Sequential()

model.add(SimpleRNN(10, activation="relu", input_shape=[n_steps, 1]))

model.add(Dense(1))

#  

model.compile(optimizer="adam", loss="mse")

#  

model.fit(X, y, epochs=10)

#    

future_steps = 10

future_x = X[-1, :, :]

future_predictions = []

for _ in range(future_steps):

future_pred = model.predict(future_x.reshape(1, n_steps, 1))

future_predictions.append(future_pred[0, 0])

future_x = np.roll(future_x, shift=-1)

future_x[-1] = future_pred[0, 0]

#  

import matplotlib.pyplot as plt

plt.plot(np.arange(n_steps), X[-1, :, 0], label=" ")

plt.plot(np.arange(n_steps, n_steps+future_steps), future_predictions, label="")

plt.xlabel(" ")

plt.ylabel("")

plt.legend()

plt.show()

```




  :

   RNN   ,          .

     "adam"   "mse" (Mean Squared Error).

       ,       .

  ,   ,   :

1.   ( ):     ,   .   ,    (0.1 * time)     (np.sin(time)).

2.  ( ):   ,   RNN  .                   (future_steps).

   ,   RNN            .         .

    ,    ,         .

             RNN    Dense.       ,      .

         ,          .

 RNN   ,        (vanishing gradients).     ,    RNN  (     )    ,    .   ,    ""       .

          RNN:

Long Short-Term Memory (LSTM):

Long Short-Term Memory (LSTM)             (RNN).               .   ,   LSTM:

  LSTM:   LSTM      ,         .        (   ,  ,          ).

  (Forget Gate):   ,            .             0  1   ,  ,      .

  (Input Gate):   ,           .         ,  ,    .

  (Cell State Update):               .          .

  (Output Gate):   ,          .       ,  ,     .

 :     , LSTM      .             ,   .

 LSTM: LSTM    ,    ,    ,   ,  ,     .               .

  ,   Long Short-Term Memory (LSTM),         .     LSTM      Python    TensorFlow   pandas:

```python

import numpy as np

import tensorflow as tf

import pandas as pd

import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import LSTM, Dense

#     ()

timesteps = np.linspace(0, 100, 400)

series = np.sin(timesteps)

#     

df = pd.DataFrame({'timesteps': timesteps, 'series': series})

window_size = 10 #      

batch_size = 32 #  

#        

def create_sequences(series, window_size, batch_size):

dataset = tf.data.Dataset.from_tensor_slices(series)

dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)

dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))

dataset = dataset.shuffle(1000).map(lambda window: (window[:-1], window[-1]))

dataset = dataset.batch(batch_size).prefetch(1)

return dataset

train_dataset = create_sequences(series, window_size, batch_size)

#   LSTM

model = Sequential([

LSTM(50, return_sequences=True, input_shape=[None, 1]),

LSTM(50),

Dense(1)

])

#  

model.compile(loss='mse', optimizer='adam')

#  

model.fit(train_dataset, epochs=10)

#    

future_timesteps = np.arange(100, 140, 1)

future_series = []

for i in range(len(future_timesteps)  window_size):

window = series[i:i + window_size]

prediction = model.predict(window[np.newaxis])

future_series.append(prediction[0, 0])

#  

plt.figure(figsize=(10, 6))

plt.plot(timesteps, series, label=" ", linewidth=2)

plt.plot(future_timesteps[:-window_size], future_series, label="", linewidth=2)

plt.xlabel("")

plt.ylabel("")

plt.legend()

plt.show()

```

  ,    LSTM    .    LSTM,           .  ,           .




      :

1.    ( ):   ,       .

2.   ( ):   ,   LSTM  .          .       .

   ,   LSTM             .   ,  LSTM        ,         .

2. Gated Recurrent Unit (GRU):

GRU (Gated Recurrent Unit)       (RNN), ,   ,          LSTM (Long Short-Term Memory). GRU       ,         RNN.

   GRU:

1.   (Gating Mechanisms): GRU    ,  LSTM,    .         (Reset Gate)    (Update Gate).

2.   (Reset Gate):   ,       .   (reset)  1,     .    0,    .

3.   (Update Gate):   ,       .       1,       .    0,     .

4.   (Hidden State): GRU    ,        . ,    LSTM, GRU    ,     .

5.  : GRU  ,      ,       RNN.   , GRU             .

6. : GRU      ,      .         ,        .

  GRU  LSTM         ,          . ,  ,  LSTM          ,    .

   ,    GRU    .        TensorFlow:

```python

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt

#     (  )

np.random.seed(0)

n_steps = 100

time = np.linspace(0, 10, n_steps)

series = 0.1 * time + np.sin(time) + np.random.randn(n_steps) * 0.1

#     GRU

n_steps = 30 #      

n_samples = len(series)  n_steps

X = [series[i:i+n_steps] for i in range(n_samples)]

y = series[n_steps:]

X = np.array(X).reshape(-1, n_steps, 1)

y = np.array(y)

#   GRU

model = tf.keras.Sequential([

tf.keras.layers.GRU(10, activation="relu", input_shape=[n_steps, 1]),

tf.keras.layers.Dense(1)

])

#  

model.compile(optimizer="adam", loss="mse")

#  

model.fit(X, y, epochs=10)

#    

future_steps = 10

future_x = X[-1, :, :]

future_predictions = []

for _ in range(future_steps):

future_pred = model.predict(future_x.reshape(1, n_steps, 1))

future_predictions.append(future_pred[0, 0])

future_x = np.roll(future_x, shift=-1)

future_x[-1] = future_pred[0, 0]

#  

plt.plot(np.arange(n_steps), X[-1, :, 0], label=" ")

plt.plot(np.arange(n_steps, n_steps+future_steps), future_predictions, label="")

plt.xlabel(" ")

plt.ylabel("")

plt.legend()

plt.show()

```




        GRU    ,      .         .

     ,    :

1.   ( ):     ,   .   ,       .

2.  ( ):   ,   GRU  .                   (future_steps).

    ,         .         .        ,            .

GRU        ,     .

3. Bidirectional RNN (BiRNN): 

Bidirectional RNN (BiRNN)       (RNN),             .                 .

      Bidirectional RNN:

1.  (Bidirectionality):     ,        ,      (forward),  ,      (backward).        ,        .

2.  :      ,       .         .      ,     ,        .

3.   :   ,        .     ,    ,      , ,     (NLP),      .

4. : BiRNN       ,      ,  ,    ,    .    , BiRNN    .



   ,   Bidirectional RNN (BiRNN)   ,     .

: - 

 :    (,   )    ,   .

 : ,       ,    ,    ,    .

   BiRNN:

1.  :    .         .       , ,    Word2Vec   .        .

2.  BiRNN:    BiRNN    . BiRNN    : RNN,      (forward),  RNN,      (backward).  RNN   .

3.  :        ,    .    BiRNN   ,    (, , )   .             .

4.  :          ,  ,   , , F1-  .          .

5. :                .

 BiRNN    :

BiRNN       ,         ,      .       ,     .

      ,      .

BiRNN          ,        -.

 ,  BiRNN   -              .

      -    Bidirectional RNN (BiRNN)   TensorFlow.               ,     ,      .  ,              .

```python

import numpy as np

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense

from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.preprocessing.sequence import pad_sequences

#   ()

texts = ["   .", "    .", "  ."]

labels = [0, 1, 1] # 0   , 1   

#       

tokenizer = Tokenizer()

tokenizer.fit_on_texts(texts)

sequences = tokenizer.texts_to_sequences(texts)

word_index = tokenizer.word_index

#    

max_sequence_length = max([len(seq) for seq in sequences])

sequences = pad_sequences(sequences, maxlen=max_sequence_length)

#   BiRNN

model = Sequential()

model.add(Embedding(len(word_index) + 1, 128, input_length=max_sequence_length))

model.add(Bidirectional(LSTM(64)))

model.add(Dense(1, activation='sigmoid'))

#  

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#  

X = np.array(sequences)

y = np.array(labels)

model.fit(X, y, epochs=5)

# 

new_texts = ["  ,   !", "     .", "  ."]

new_sequences = tokenizer.texts_to_sequences(new_texts)

new_sequences = pad_sequences(new_sequences, maxlen=max_sequence_length)

predictions = model.predict(new_sequences)

for i, text in enumerate(new_texts):

sentiment = "" if predictions[i] > 0.5 else ""

print(f": '{text}' : {sentiment}")

```




  ,  ,           ( )       .               .     .

      (       ) , ,      .   ""  ""      ( 0.5)    .

    ,    BiRNN    - .       ,     ,  BiRNN ,         .

 ,      ,          ,      .

,  ,  BiRNN       ,    RNN,          .

RNN, LSTM  GRU    NLP   ,    ,   ,    ,      .            ,         .



   ,     Bidirectional RNN (BiRNN).          .

 :   

 :,      .

 :     ,     ,        (, , ,   . .).

   BiRNN:

1.  :       ,    .        .

2.  BiRNN:   BiRNN   . BiRNN     ()       ,      .        ,   .

3.  :      .       ,   .

4.  :         ,   ,   F1-,       .

5.  :           .

   Python   TensorFlow  Keras         BiRNN:

import numpy as np

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Bidirectional, LSTM, Embedding, Dense

from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import accuracy_score

#    (  ,    )

texts = ["Bonjour, comment ?a va?", "Hello, how are you?", "?Hola, cmo ests?"]

labels = ["French", "English", "Spanish"]

#    

label_encoder = LabelEncoder()

y = label_encoder.fit_transform(labels)

#        

tokenizer = Tokenizer()

tokenizer.fit_on_texts(texts)

word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(texts)

#    ,  

max_sequence_length = max([len(seq) for seq in sequences])

padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

#       

x_train, x_test, y_train, y_test = train_test_split(padded_sequences, y, test_size=0.2, random_state=42)

#   BiRNN

model = Sequential()

model.add(Embedding(input_dim=len(word_index) + 1, output_dim=100, input_length=max_sequence_length))

model.add(Bidirectional(LSTM(50)))

model.add(Dense(len(set(y)), activation="softmax")) #     

#  

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

#  

model.fit(x_train, y_train, epochs=10, validation_split=0.2)

#     

y_pred = model.predict(x_test)

y_pred = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test, y_pred)

print(f": {accuracy:.4f}")








       :

1.  BiRNN         .

2.             (loss)    (accuracy)      .       .

3.         (accuracy)    ,  ,      .

4.    ,   `print(f": {accuracy:.4f}")`.     ,      ,     .

5.      BiRNN      : ,   .    `texts`       .

 ,      ,   ,        .               .

,     BiRNN        ,           .

   (CNN): 

CNN,      ,     NLP.    CNN      ,     ,    .     ,  Convolutional Neural Network for Text (CNN-text),      NLP:

 :



       (CNN)    ,   ,        .          . ,      ,         ,   "", "", ""  "".

     CNN:

 :

          .             ().

  :

   ,       ().     , ,    word embedding,   Word2Vec  GloVe.         .

 :

      .    ,         .

 CNN :

      (CNN).     ,      .        ,      .

          .

 :

   ,    .      -   ,      (accuracy).

 :

         .             .

  :

           . ,   ,   F1-,      .

 :

            .

   Python    TensorFlow  Keras      CNN:

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import accuracy_score

#    ( )

texts = [":   ", ":  ", ":  ", ":  "]

labels = ["", "", "", ""]

#    

label_encoder = LabelEncoder()

y = label_encoder.fit_transform(labels)

#    

tokenizer = Tokenizer()

tokenizer.fit_on_texts(texts)

word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(texts)

#    

max_sequence_length = max([len(seq) for seq in sequences])

padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

#      

x_train, x_test, y_train, y_test = train_test_split(padded_sequences, y, test_size=0.2, random_state=42)

#  CNN 

model = Sequential()

model.add(Embedding(input_dim=len(word_index) + 1, output_dim=100, input_length=max_sequence_length))

model.add(Conv1D(128, 3, activation="relu")) #      

model.add(GlobalMaxPooling1D())

model.add(Dense(len(set(y)), activation="softmax"))

#  

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

#  

model.fit(x_train, y_train, epochs=10, validation_split=0.2)

#  

y_pred = model.predict(x_test)

y_pred = tf.argmax(y_pred, axis=1).numpy()

accuracy = accuracy_score(y_test, y_pred)

print(f": {accuracy:.4f}")






  ,  ,          .        :

```python

accuracy = accuracy_score(y_test, y_pred)

```

`accuracy`    ,     .     0  1  ,          .

 :

   1.0,  ,               .

   0.0,  ,         .

    0.0  1.0,      . ,  0.8 ,     80% .

 ,        ,      .          ,    (precision),  (recall), F1- (F1-score)    (confusion matrix),         .

         CNN    .       .

   (1.0)        ,           .   ,    ,          :

```python

import numpy as np

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Embedding, LSTM, Dense

from sklearn.model_selection import train_test_split

#     

texts = ["  .", "  .", "    .", "   ."]

labels = [1, 1, 2, 0] # 0  , 1  , 2  

#     (  ,  )

tokenizer = tf.keras.layers.TextVectorization()

tokenizer.adapt(texts)

#   LSTM

model = Sequential()

model.add(tokenizer)

model.add(Embedding(input_dim=len(tokenizer.get_vocabulary()), output_dim=16, input_length=6))

model.add(LSTM(16))

model.add(Dense(3, activation="softmax")) #  : , , 

#  

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

#       

x_train, x_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

#  

model.fit(x_train, y_train, epochs=10, verbose=0) #        

#  

accuracy = model.evaluate(x_test, y_test)[1] #    

print(f": {accuracy:.4f}")

```




          ,      .  LSTM           1.0.        -      .

   :



   (Convolutional Neural Networks, CNN)     ,          .     CNN         ,        .

       :

1.  :     (),   ()   .   ,      ().     ,  ,     .         .

2.  :         . ,      ( ),     (  ).  ""  ,   ,   ,     .

3.   :   ,      (pooling).    ,     .  Max-Pooling, ,      ,      .

4.    :        ,       .    ,         .              .

          Python   TensorFlow/Keras:

import tensorflow as tf

from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

from tensorflow.keras.models import Sequential

from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.preprocessing.sequence import pad_sequences

import numpy as np

#     

texts = ["   !", " , .", "   ."]

#   (, )

labels = [0, 1, 0]

#    

tokenizer = Tokenizer(num_words=1000)

tokenizer.fit_on_texts(texts)

sequences = tokenizer.texts_to_sequences(texts)

#    

max_sequence_length = max([len(seq) for seq in sequences])

padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

#   CNN   

model = Sequential()

model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_sequence_length))

model.add(Conv1D(32, 3, activation='relu')) #    5  3     128  32

model.add(GlobalMaxPooling1D())

model.add(Dense(1, activation='sigmoid'))

#  

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#  

x_train = padded_sequences

y_train = np.array(labels)

model.fit(x_train, y_train, epochs=10)

#  

test_text = ["  ,   - !"]

test_sequence = tokenizer.texts_to_sequences(test_text)

padded_test_sequence = pad_sequences(test_sequence, maxlen=max_sequence_length)

result = model.predict(padded_test_sequence)

print("  :", result)








       0  1,     . ,    0.85,  ,         85%.     0,  ,     ,     1,     .

     CNN    .  ,               .

 : 

   (CNN),     ,       .         ,     (  )    .     ,       ,      (,    ).

     :

 :      .       ,            ().          .

   :      ,       ,     .  ,             .           (    ).

 :    CNN   ,      .      ( )  .          ,      .     ,    (feature map).

 (Pooling):        .       ,        .      (MaxPooling),      ,     .

 :               .      ,       .

  CNN              ,          .          ,     .

           .     1 ()  0 ().

    :

1.       (CNN),    .

2.        `texts`    `labels`.

3.              .

4.       `max_sequence_length`,        .

5.   CNN,    Embedding, Conv1D, GlobalMaxPooling1D  Dense.

6.  ,   "adam"   "binary_crossentropy".

7.        10 .

8.      (4  ).

          ,  ,          .            .    1.0 ,         .

import tensorflow as tf

import numpy as np

import matplotlib.pyplot as plt

#    

texts = ["  .", "  .", "   .", "   ."]

labels = [1, 0, 1, 0] # 1   , 0   

#        

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=1000, oov_token="<OOV>")

tokenizer.fit_on_texts(texts)

sequences = tokenizer.texts_to_sequences(texts)

#    CNN

max_sequence_length = max([len(seq) for seq in sequences])

padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_sequence_length)

#     numpy

labels = np.array(labels)

#   CNN

model = tf.keras.Sequential([

tf.keras.layers.Embedding(input_dim=1000, output_dim=16, input_length=max_sequence_length),

tf.keras.layers.Conv1D(128, 3, activation='relu'), #     3

tf.keras.layers.GlobalMaxPooling1D(),

tf.keras.layers.Dense(1, activation='sigmoid')

])

#  

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#  

history = model.fit(padded_sequences, labels, epochs=10, verbose=1)

#     

test_texts = ["  .", "   .", "  .", "  ."]

test_labels = [1, 0, 1, 0] #    

test_sequences = tokenizer.texts_to_sequences(test_texts)

padded_test_sequences = tf.keras.preprocessing.sequence.pad_sequences(test_sequences, maxlen=max_sequence_length)

test_labels = np.array(test_labels)

test_loss, test_accuracy = model.evaluate(padded_test_sequences, test_labels)

print(f"   : {test_accuracy:.4f}")

#  

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)

plt.plot(history.history['accuracy'], label='  ')

plt.xlabel('')

plt.ylabel('')

plt.legend()

plt.subplot(1, 2, 2)

plt.plot(history.history['loss'], label='  ')

plt.xlabel('')

plt.ylabel('')

plt.legend()

plt.show()

 ,     ,       .   :

1.     (  ):   ,        .    ,      .  ,       .   ,     ,     .

2.     (  ):   ,         .     ,        .    .        .

        :

        .

        .

      ,  ,       ,    .

 ,   ,   ,        ,     .








 ,    ,   RNN  CNN,      . ,    Transformer,     ,        NLP,   BERT  GPT.

 Transformer          (NLP)     .     ,      ,      :

1.  :     Transformer   .              .             .

2.    : Transformer     ,        .        .      ,          .

3.  : Transformer     ,  "",       .                .

4.  : Transformer     (multi-head attention),          .        .

5. :  Transformer          (  RNN),   .         .

 Transformer    (, BERT  GPT)        NLP,   ,  ,  , -    .                   .

BERT (Bidirectional Encoder Representations from Transformers)  GPT (Generative Pre-trained Transformer)           (Natural Language Processing, NLP).    Transformer    NLP,        .           .




  .


   .

   ,     (https://www.litres.ru/book/dzheyd-karter/neyroseti-obrabotka-estestvennogo-yazyka-69735148/chitat-onlayn/)  .

      Visa, MasterCard, Maestro,    ,   ,     ,  PayPal, WebMoney, ., QIWI ,       .


