Browse Source

Fixed installation

19-0009Cse 4 years ago
parent
commit
e4bc17c2b6

+ 5 - 1
.gitignore

@@ -1 +1,5 @@
-test
+test
+*.egg-info/
+dist/
+pandasToBrat/*.egg-info
+pandasToBrat/extract_tools/*.egg-info

+ 2 - 1
CHANGES.txt

@@ -1,6 +1,7 @@
 1.1.1 (11-10-2020)
     Adding the option to filter export for specified entities.
+    Fixed installation.
 1.1 (11-10-2020)
     Adding : export feature to ConLL format
     Adding : LICENSE
-1.0 Initial release
+1.0 Initial release

+ 1 - 1
README.md

@@ -249,4 +249,4 @@ You can also use Spacy tokenizer, in that case you should import the spacy_token
 You can restrict the export to a limited set of entities. For that, the list of entities are specified in the entities parameter. If set as None, which is the default value, all entities will we considered. If a word contains many entities, the last one is kept.
 
 Finally, the keep_empty option is defaultly set as False. This means that every empty tokens will be removed from the exported data.
-You can set it as True if you want to keep empty tokens.
+You can set it as True if you want to keep empty tokens.

BIN
__pycache__/extract_tools.cpython-38.pyc


BIN
__pycache__/pandasToBrat.cpython-38.pyc


+ 35 - 5
pandasToBrat.egg-info/PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: pandasToBrat
-Version: 1.0
+Version: 1.1.1
 Summary: Function for Brat folder administration from Python and Pandas object.
 Home-page: UNKNOWN
 Author: Ali BELLAMINE
@@ -15,7 +15,7 @@ Description: # pandasToBrat
         
         pandasToBrat is a library to manage brat configuration and brat data from a Python interface.
         
-        ### What can it do ?
+        ### What can it do ?
         
         - Reading brat annotations and relations configuration to python dictionnary
         - Writting brat annotations and relations configuration from python dictionnary
@@ -23,6 +23,7 @@ Description: # pandasToBrat
         - Writting brat text file from python pandas Series
         - Reading brat annotations and relations 
         - Writting brat annotations and relations from python pandas DataFrame
+        - Export data to ConLL-2003 format
         
         ### What it doesn't support ?
         
@@ -36,7 +37,7 @@ Description: # pandasToBrat
         
         Clone the current repository :
         ```
-            git clone [LIBRARY_PATH]
+            git clone https://gogs.alibellamine.me/alibell/pandasToBrat
         ```
         
         Install dependencies with pip.
@@ -105,7 +106,7 @@ Description: # pandasToBrat
         Each relation have a relation name and defined with a sub-dictionnary containing an args entrie.
         The args entrie contains a list of entities that are concerned by the relation.
         
-        #### Read and write parameters
+        ####  Read and write parameters
         
         ##### Getting parameters
         
@@ -190,7 +191,7 @@ Description: # pandasToBrat
         - relation : The relation Name
         - ArgX : The annotated entitie which a linked by the relation, each column refer to an entitie, the entitie id correspond to the annotations DataFrame "type_id" column
         
-        #### Read and write annotations
+        #### Read and write annotations
         
         
         ##### Getting annotations data
@@ -228,4 +229,33 @@ Description: # pandasToBrat
         The other columns should contains the type_id of related entities, as outputed by the read_annotation method.
         
         The overwrite option can be set as True to overwrite existing annotations, otherwise the dataframe's data are added to existing annotations data.
+        
+        ### Export data to standard format
+        
+        The only currently supported format is ConLL-2003.
+        
+        To export data, you can use the export method.
+        
+        ```
+            bratData.export(export_format = EXPORT_FORMAT, tokenizer = TOKENIZER, entities = ENTITIES_OPTION, keep_empty = KEEP_EMPTY_OPTION)
+        ```
+        
+        The export_format parameter is used to specify the export format. The only one, which is the default one, supported is ConLL-2003.
+        The tokenizer parameter contains the tokenizer functions. Tokenizers functions are stored in pandasToBrat.extract_tools. The aim of the function is to generate tokens and pos tag from text. The default one, _default_tokenizer_, is the simplest one, that split on space and new line character.
+        You can also use Spacy tokenizer, in that case you should import the spacy_tokenizer functions as demonstrated in this example :
+        
+        ```
+            from pandasToBrat.extract_tools import spacy_tokenizer
+            import spacy
+        
+            nlp = spacy.load(SPACY_MODEL)
+            spacy_tokenizer_loaded = spacy_tokenizer(nlp)
+        
+            bratData.export(tokenizer = spacy_tokenizer_loaded)
+        ```
+        
+        You can restrict the export to a limited set of entities. For that, the list of entities are specified in the entities parameter. If set as None, which is the default value, all entities will we considered. If a word contains many entities, the last one is kept.
+        
+        Finally, the keep_empty option is defaultly set as False. This means that every empty tokens will be removed from the exported data.
+        You can set it as True if you want to keep empty tokens.
 Platform: UNKNOWN

+ 1 - 0
pandasToBrat.egg-info/SOURCES.txt

@@ -2,6 +2,7 @@ CHANGES.txt
 MANIFEST.in
 README.md
 setup.py
+pandasToBrat/__init__.py
 pandasToBrat.egg-info/PKG-INFO
 pandasToBrat.egg-info/SOURCES.txt
 pandasToBrat.egg-info/dependency_links.txt

+ 1 - 1
pandasToBrat.egg-info/top_level.txt

@@ -1 +1 @@
-
+pandasToBrat

+ 4 - 4
pandasToBrat.py → pandasToBrat/__init__.py

@@ -2,7 +2,7 @@ import re
 import os
 import pandas as pd
 import numpy as np
-from pandasToBrat.extract_tools import default_tokenizer
+from pandasToBrat.extract_tools import default_tokenizer as _default_tokenizer
 
 def _getDictionnaryKeys(dictionnary):
     """
@@ -540,7 +540,7 @@ class pandasToBrat:
         
         return(connll_str)
 
-    def _get_tokenized_data(self, text_data, annotations_data, tokenizer = default_tokenizer, keep_empty = False):
+    def _get_tokenized_data(self, text_data, annotations_data, tokenizer = _default_tokenizer, keep_empty = False):
         
         '''
             Internal function that process text and annotation data to calculate token, pos and chunks.
@@ -662,7 +662,7 @@ class pandasToBrat:
         
         return(output_df)
 
-    def export(self, export_format = "conll-2003", tokenizer = default_tokenizer, keep_empty = False, entities = None):
+    def export(self, export_format = "conll-2003", tokenizer = _default_tokenizer, keep_empty = False, entities = None):
 
         '''
             Function that generate an export file.
@@ -710,4 +710,4 @@ class pandasToBrat:
         # Execute the export format associated function
         data_str = supported_export_format[export_format](data = data)
 
-        return(data_str)
+        return(data_str)

BIN
pandasToBrat/__pycache__/__init__.cpython-36.pyc


BIN
pandasToBrat/__pycache__/pandasToBrat.cpython-38.pyc


+ 0 - 0
extract_tools.py → pandasToBrat/extract_tools/__init__.py


BIN
pandasToBrat/extract_tools/__pycache__/__init__.cpython-36.pyc


+ 4 - 5
setup.py

@@ -1,13 +1,12 @@
 from setuptools import setup, find_packages
  
  
-setup(name='pandasToBrat',
- 
-      version='1.1',
+setup(name='pandasToBrat', 
+      version='1.1.1',
       license='',
       author='Ali BELLAMINE',
       author_email='contact@alibellamine.me',
       description='Function for Brat folder administration from Python and Pandas object.',
-      packages=find_packages(),
       long_description=open('README.md').read(),
-    )
+      packages = ["pandasToBrat"]
+    )