olivia_finder.data_source.csv_ds
1from __future__ import annotations 2import os 3from typing import List, Optional 4import pandas as pd 5import tqdm 6from .data_source import DataSource 7 8class CSVDataSource(DataSource): 9 """ 10 Class that implements the methods for loading a network from a CSV file. 11 Implements the **DataSource** abstract class. 12 13 """ 14 15 def __init__( 16 self, 17 file_path: str, 18 dependent_field: Optional[str] = None, 19 dependency_field: Optional[str] = None, 20 dependent_version_field: Optional[str] = None, 21 dependency_version_field: Optional[str] = None, 22 dependent_url_field: Optional[str] = None 23 ): 24 """ 25 Constructor of the class 26 27 Parameters 28 ---------- 29 file_path : str 30 The path to the CSV file 31 dependent_field : str, optional 32 The name of the field that contains the dependent packages, by default None 33 dependency_field : str, optional 34 The name of the field that contains the dependency packages, by default None 35 dependent_version_field : str, optional 36 The name of the field that contains the dependent packages versions, by default None 37 dependency_version_field : str, optional 38 The name of the field that contains the dependency packages versions, by default None 39 dependent_url_field : str, optional 40 The name of the field that contains the dependent packages urls, by default None 41 42 Raises 43 ------ 44 ValueError 45 If the file path is None, If the file is not a CSV file, If the dependent field is None, 46 """ 47 48 # Set the dataframe as None and the fields 49 self.data: Optional[pd.DataFrame] = None 50 self.dependent_field = dependent_field 51 self.dependency_field = dependency_field 52 self.dependent_version_field = dependent_version_field 53 self.dependency_version_field = dependency_version_field 54 self.dependent_url_field = dependent_url_field 55 self.file_path = file_path 56 57 # Initialize the logger 58 super().__init__() 59 60 # Load the data if the file path is setted 61 if self.file_path is not None: 62 self._load_data() 63 else: 64 self.logger.debug("File path is None. Data not loaded.") 65 raise ValueError("File path cannot be None.") 66 67 def _load_data(self): 68 """ 69 Loads the data from a CSV file like [name,version,url,dependency,dependency_version] 70 The dependent_version_field and dependent_url_field parameters are optional 71 72 Parameters 73 ---------- 74 file_path : str 75 The path to the CSV file 76 77 Raises 78 ------ 79 FileNotFoundError: Exception 80 If the file does not exist 81 ValueError: Exception 82 If the file path is None, If the file is not a CSV file, If the dependent field is None, 83 If the dependency field is None, If the dependent field and dependency field are the same 84 """ 85 86 # Check the file is valid 87 if self.file_path is None: 88 raise ValueError("File path cannot be None.") 89 90 if not os.path.exists(self.file_path): 91 raise FileNotFoundError(f"File {self.file_path} not found.") 92 93 if not self.file_path.endswith(".csv"): 94 raise ValueError(f"File {self.file_path} is not a CSV file.") 95 96 # Check if the mandatory fields are setted and are valid 97 if self.dependent_field is None: 98 raise ValueError("Dependent field cannot be None.") 99 100 if self.dependency_field is None: 101 raise ValueError("Dependency field cannot be None.") 102 103 if self.dependent_field == self.dependency_field: 104 raise ValueError("Dependent field and dependency field cannot be the same.") 105 106 # Load the data 107 self.data = pd.read_csv(self.file_path) 108 109 # Mandatory fields 110 if self.dependent_field not in self.data.columns: 111 raise ValueError(f"Field {self.dependent_field} not found on data.") 112 113 if self.dependency_field not in self.data.columns: 114 raise ValueError(f"Field {self.dependency_field} not found on data.") 115 116 # Optional fields 117 if self.dependent_version_field is not None and self.dependent_version_field not in self.data.columns: 118 raise ValueError(f"Field {self.dependent_version_field} not found on data.") 119 120 if self.dependency_version_field is not None and self.dependency_version_field not in self.data.columns: 121 raise ValueError(f"Field {self.dependency_version_field} not found on data.") 122 123 if self.dependent_url_field is not None and self.dependent_url_field not in self.data.columns: 124 raise ValueError(f"Field {self.dependent_url_field} not found on data.") 125 126 def obtain_package_names(self) -> List[str]: 127 """ 128 Obtains the list of packages from the data source, sorted alphabetically. 129 130 Returns 131 ------- 132 List[str] 133 The list of package names in the data source 134 135 Examples 136 -------- 137 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 138 >>> data_source.obtain_package_names() 139 ["package1", "package2", "package3"] 140 """ 141 142 # Check if the data is loaded 143 if self.data is None: 144 raise ValueError("Data is not loaded.") 145 146 # Return the list of packages 147 return sorted(self.data[self.dependent_field].unique()) 148 149 def obtain_package_data(self, package_name: str, override_previous: bool = True) -> dict: 150 """ 151 Obtains the package from the dataframe 152 153 Parameters 154 ---------- 155 package_name : str 156 The name of the package 157 override_previous : bool 158 If True, it will override the previous data with the same name but different version 159 160 Returns 161 ------- 162 dict 163 The data of the package in the form of a dictionary 164 165 Examples 166 -------- 167 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 168 >>> data_source.obtain_package_data("package1") 169 { 170 "name": "package1", 171 "version": "1.0.0", 172 "url": " 173 "dependencies": [ 174 { 175 "name": "package2", 176 "version": "1.0.0" 177 }, 178 ] 179 } 180 """ 181 182 # Check if the data is loaded 183 if self.data is None: 184 raise ValueError("Data is not loaded.") 185 186 # Get the rows of the package 187 package_rows = self.data[self.data[self.dependent_field] == package_name] 188 189 # Remove the previous data with the same name but different version 190 if override_previous: 191 # Get the last row 192 last_version = package_rows[self.dependent_version_field].max() 193 package_rows = package_rows[package_rows[self.dependent_version_field] == last_version] 194 195 if package_rows.empty: 196 self.logger.debug(f"Package {package_name} not found in data.") 197 return None 198 #raise ValueError(f"Package {package_name} not found in data.") 199 200 # Get the dependencies 201 dependencies = [] 202 203 # Get a list of rows 204 package_rows = package_rows.to_dict("records") 205 206 for row in package_rows: 207 # Get the dependency name and version 208 dependency_name = row[self.dependency_field] 209 dependency_version = row[self.dependency_version_field] if self.dependency_version_field is not None else None 210 211 # Build the dependency dictionary 212 # Iggnore {'name': nan, 'version': nan} 213 if pd.isna(dependency_name): 214 continue 215 216 dependency = { 217 "name": dependency_name, 218 "version": dependency_version 219 } 220 221 # Add the dependency to the list 222 dependencies.append(dependency) 223 224 # Return the data 225 return { 226 "name": package_name, 227 "version": package_rows[0][self.dependent_version_field] if self.dependent_version_field is not None else None, 228 "url": package_rows[0][self.dependent_url_field] if self.dependent_url_field is not None else None, 229 "dependencies": dependencies 230 } 231 232 def obtain_packages_data( 233 self, 234 package_names: List[str], 235 progress_bar: Optional[tqdm.tqdm] = None 236 ) -> tuple[List[dict], List[str]]: 237 ''' 238 Obtains the data of a list of package names from the CSV file 239 If the package name list is None, it will obtain the package names from the CSV file and load their data 240 241 Parameters 242 ---------- 243 package_names : List[str] 244 The list of package names to obtain the data from 245 progress_bar : tqdm.tqdm 246 The progress bar to update 247 248 Returns 249 ------- 250 tuple[List[dict], List[str]] 251 The list of packages data and the list of not found packages 252 253 Examples 254 -------- 255 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 256 >>> data_source.obtain_packages_data(["package1", "package2"]) 257 ( 258 [ 259 { 260 "name": "package1", 261 "version": "1.0.0", 262 "url": " 263 "dependencies": [ ... ] 264 }, 265 { 266 "name": "package2", 267 "version": "1.0.0", 268 "url": " 269 "dependencies": [ ... ] 270 } 271 ], 272 [] 273 ) 274 275 ''' 276 277 # Define the list of packages and the list of not found packages 278 packages = [] 279 not_found = [] 280 281 # Iterate over the package names and obtain the data 282 for package_name in package_names: 283 try: 284 packages.append(self.obtain_package_data(package_name)) 285 286 # If the package is not found, add it to the not found list, and continue 287 except ValueError: 288 self.logger.debug(f"Package {package_name} not found in data.") 289 not_found.append(package_name) 290 continue 291 292 if progress_bar is not None: 293 progress_bar.update(1) 294 295 return packages, not_found
10class CSVDataSource(DataSource): 11 """ 12 Class that implements the methods for loading a network from a CSV file. 13 Implements the **DataSource** abstract class. 14 15 """ 16 17 def __init__( 18 self, 19 file_path: str, 20 dependent_field: Optional[str] = None, 21 dependency_field: Optional[str] = None, 22 dependent_version_field: Optional[str] = None, 23 dependency_version_field: Optional[str] = None, 24 dependent_url_field: Optional[str] = None 25 ): 26 """ 27 Constructor of the class 28 29 Parameters 30 ---------- 31 file_path : str 32 The path to the CSV file 33 dependent_field : str, optional 34 The name of the field that contains the dependent packages, by default None 35 dependency_field : str, optional 36 The name of the field that contains the dependency packages, by default None 37 dependent_version_field : str, optional 38 The name of the field that contains the dependent packages versions, by default None 39 dependency_version_field : str, optional 40 The name of the field that contains the dependency packages versions, by default None 41 dependent_url_field : str, optional 42 The name of the field that contains the dependent packages urls, by default None 43 44 Raises 45 ------ 46 ValueError 47 If the file path is None, If the file is not a CSV file, If the dependent field is None, 48 """ 49 50 # Set the dataframe as None and the fields 51 self.data: Optional[pd.DataFrame] = None 52 self.dependent_field = dependent_field 53 self.dependency_field = dependency_field 54 self.dependent_version_field = dependent_version_field 55 self.dependency_version_field = dependency_version_field 56 self.dependent_url_field = dependent_url_field 57 self.file_path = file_path 58 59 # Initialize the logger 60 super().__init__() 61 62 # Load the data if the file path is setted 63 if self.file_path is not None: 64 self._load_data() 65 else: 66 self.logger.debug("File path is None. Data not loaded.") 67 raise ValueError("File path cannot be None.") 68 69 def _load_data(self): 70 """ 71 Loads the data from a CSV file like [name,version,url,dependency,dependency_version] 72 The dependent_version_field and dependent_url_field parameters are optional 73 74 Parameters 75 ---------- 76 file_path : str 77 The path to the CSV file 78 79 Raises 80 ------ 81 FileNotFoundError: Exception 82 If the file does not exist 83 ValueError: Exception 84 If the file path is None, If the file is not a CSV file, If the dependent field is None, 85 If the dependency field is None, If the dependent field and dependency field are the same 86 """ 87 88 # Check the file is valid 89 if self.file_path is None: 90 raise ValueError("File path cannot be None.") 91 92 if not os.path.exists(self.file_path): 93 raise FileNotFoundError(f"File {self.file_path} not found.") 94 95 if not self.file_path.endswith(".csv"): 96 raise ValueError(f"File {self.file_path} is not a CSV file.") 97 98 # Check if the mandatory fields are setted and are valid 99 if self.dependent_field is None: 100 raise ValueError("Dependent field cannot be None.") 101 102 if self.dependency_field is None: 103 raise ValueError("Dependency field cannot be None.") 104 105 if self.dependent_field == self.dependency_field: 106 raise ValueError("Dependent field and dependency field cannot be the same.") 107 108 # Load the data 109 self.data = pd.read_csv(self.file_path) 110 111 # Mandatory fields 112 if self.dependent_field not in self.data.columns: 113 raise ValueError(f"Field {self.dependent_field} not found on data.") 114 115 if self.dependency_field not in self.data.columns: 116 raise ValueError(f"Field {self.dependency_field} not found on data.") 117 118 # Optional fields 119 if self.dependent_version_field is not None and self.dependent_version_field not in self.data.columns: 120 raise ValueError(f"Field {self.dependent_version_field} not found on data.") 121 122 if self.dependency_version_field is not None and self.dependency_version_field not in self.data.columns: 123 raise ValueError(f"Field {self.dependency_version_field} not found on data.") 124 125 if self.dependent_url_field is not None and self.dependent_url_field not in self.data.columns: 126 raise ValueError(f"Field {self.dependent_url_field} not found on data.") 127 128 def obtain_package_names(self) -> List[str]: 129 """ 130 Obtains the list of packages from the data source, sorted alphabetically. 131 132 Returns 133 ------- 134 List[str] 135 The list of package names in the data source 136 137 Examples 138 -------- 139 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 140 >>> data_source.obtain_package_names() 141 ["package1", "package2", "package3"] 142 """ 143 144 # Check if the data is loaded 145 if self.data is None: 146 raise ValueError("Data is not loaded.") 147 148 # Return the list of packages 149 return sorted(self.data[self.dependent_field].unique()) 150 151 def obtain_package_data(self, package_name: str, override_previous: bool = True) -> dict: 152 """ 153 Obtains the package from the dataframe 154 155 Parameters 156 ---------- 157 package_name : str 158 The name of the package 159 override_previous : bool 160 If True, it will override the previous data with the same name but different version 161 162 Returns 163 ------- 164 dict 165 The data of the package in the form of a dictionary 166 167 Examples 168 -------- 169 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 170 >>> data_source.obtain_package_data("package1") 171 { 172 "name": "package1", 173 "version": "1.0.0", 174 "url": " 175 "dependencies": [ 176 { 177 "name": "package2", 178 "version": "1.0.0" 179 }, 180 ] 181 } 182 """ 183 184 # Check if the data is loaded 185 if self.data is None: 186 raise ValueError("Data is not loaded.") 187 188 # Get the rows of the package 189 package_rows = self.data[self.data[self.dependent_field] == package_name] 190 191 # Remove the previous data with the same name but different version 192 if override_previous: 193 # Get the last row 194 last_version = package_rows[self.dependent_version_field].max() 195 package_rows = package_rows[package_rows[self.dependent_version_field] == last_version] 196 197 if package_rows.empty: 198 self.logger.debug(f"Package {package_name} not found in data.") 199 return None 200 #raise ValueError(f"Package {package_name} not found in data.") 201 202 # Get the dependencies 203 dependencies = [] 204 205 # Get a list of rows 206 package_rows = package_rows.to_dict("records") 207 208 for row in package_rows: 209 # Get the dependency name and version 210 dependency_name = row[self.dependency_field] 211 dependency_version = row[self.dependency_version_field] if self.dependency_version_field is not None else None 212 213 # Build the dependency dictionary 214 # Iggnore {'name': nan, 'version': nan} 215 if pd.isna(dependency_name): 216 continue 217 218 dependency = { 219 "name": dependency_name, 220 "version": dependency_version 221 } 222 223 # Add the dependency to the list 224 dependencies.append(dependency) 225 226 # Return the data 227 return { 228 "name": package_name, 229 "version": package_rows[0][self.dependent_version_field] if self.dependent_version_field is not None else None, 230 "url": package_rows[0][self.dependent_url_field] if self.dependent_url_field is not None else None, 231 "dependencies": dependencies 232 } 233 234 def obtain_packages_data( 235 self, 236 package_names: List[str], 237 progress_bar: Optional[tqdm.tqdm] = None 238 ) -> tuple[List[dict], List[str]]: 239 ''' 240 Obtains the data of a list of package names from the CSV file 241 If the package name list is None, it will obtain the package names from the CSV file and load their data 242 243 Parameters 244 ---------- 245 package_names : List[str] 246 The list of package names to obtain the data from 247 progress_bar : tqdm.tqdm 248 The progress bar to update 249 250 Returns 251 ------- 252 tuple[List[dict], List[str]] 253 The list of packages data and the list of not found packages 254 255 Examples 256 -------- 257 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 258 >>> data_source.obtain_packages_data(["package1", "package2"]) 259 ( 260 [ 261 { 262 "name": "package1", 263 "version": "1.0.0", 264 "url": " 265 "dependencies": [ ... ] 266 }, 267 { 268 "name": "package2", 269 "version": "1.0.0", 270 "url": " 271 "dependencies": [ ... ] 272 } 273 ], 274 [] 275 ) 276 277 ''' 278 279 # Define the list of packages and the list of not found packages 280 packages = [] 281 not_found = [] 282 283 # Iterate over the package names and obtain the data 284 for package_name in package_names: 285 try: 286 packages.append(self.obtain_package_data(package_name)) 287 288 # If the package is not found, add it to the not found list, and continue 289 except ValueError: 290 self.logger.debug(f"Package {package_name} not found in data.") 291 not_found.append(package_name) 292 continue 293 294 if progress_bar is not None: 295 progress_bar.update(1) 296 297 return packages, not_found
Class that implements the methods for loading a network from a CSV file. Implements the DataSource abstract class.
CSVDataSource( file_path: str, dependent_field: Optional[str] = None, dependency_field: Optional[str] = None, dependent_version_field: Optional[str] = None, dependency_version_field: Optional[str] = None, dependent_url_field: Optional[str] = None)
17 def __init__( 18 self, 19 file_path: str, 20 dependent_field: Optional[str] = None, 21 dependency_field: Optional[str] = None, 22 dependent_version_field: Optional[str] = None, 23 dependency_version_field: Optional[str] = None, 24 dependent_url_field: Optional[str] = None 25 ): 26 """ 27 Constructor of the class 28 29 Parameters 30 ---------- 31 file_path : str 32 The path to the CSV file 33 dependent_field : str, optional 34 The name of the field that contains the dependent packages, by default None 35 dependency_field : str, optional 36 The name of the field that contains the dependency packages, by default None 37 dependent_version_field : str, optional 38 The name of the field that contains the dependent packages versions, by default None 39 dependency_version_field : str, optional 40 The name of the field that contains the dependency packages versions, by default None 41 dependent_url_field : str, optional 42 The name of the field that contains the dependent packages urls, by default None 43 44 Raises 45 ------ 46 ValueError 47 If the file path is None, If the file is not a CSV file, If the dependent field is None, 48 """ 49 50 # Set the dataframe as None and the fields 51 self.data: Optional[pd.DataFrame] = None 52 self.dependent_field = dependent_field 53 self.dependency_field = dependency_field 54 self.dependent_version_field = dependent_version_field 55 self.dependency_version_field = dependency_version_field 56 self.dependent_url_field = dependent_url_field 57 self.file_path = file_path 58 59 # Initialize the logger 60 super().__init__() 61 62 # Load the data if the file path is setted 63 if self.file_path is not None: 64 self._load_data() 65 else: 66 self.logger.debug("File path is None. Data not loaded.") 67 raise ValueError("File path cannot be None.")
Constructor of the class
Parameters
- file_path (str): The path to the CSV file
- dependent_field (str, optional): The name of the field that contains the dependent packages, by default None
- dependency_field (str, optional): The name of the field that contains the dependency packages, by default None
- dependent_version_field (str, optional): The name of the field that contains the dependent packages versions, by default None
- dependency_version_field (str, optional): The name of the field that contains the dependency packages versions, by default None
- dependent_url_field (str, optional): The name of the field that contains the dependent packages urls, by default None
Raises
- ValueError: If the file path is None, If the file is not a CSV file, If the dependent field is None,
def
obtain_package_names(self) -> List[str]:
128 def obtain_package_names(self) -> List[str]: 129 """ 130 Obtains the list of packages from the data source, sorted alphabetically. 131 132 Returns 133 ------- 134 List[str] 135 The list of package names in the data source 136 137 Examples 138 -------- 139 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 140 >>> data_source.obtain_package_names() 141 ["package1", "package2", "package3"] 142 """ 143 144 # Check if the data is loaded 145 if self.data is None: 146 raise ValueError("Data is not loaded.") 147 148 # Return the list of packages 149 return sorted(self.data[self.dependent_field].unique())
Obtains the list of packages from the data source, sorted alphabetically.
Returns
- List[str]: The list of package names in the data source
Examples
>>> data_source = CSVDataSource("test.csv", "name", "dependency")
>>> data_source.obtain_package_names()
["package1", "package2", "package3"]
def
obtain_package_data(self, package_name: str, override_previous: bool = True) -> dict:
151 def obtain_package_data(self, package_name: str, override_previous: bool = True) -> dict: 152 """ 153 Obtains the package from the dataframe 154 155 Parameters 156 ---------- 157 package_name : str 158 The name of the package 159 override_previous : bool 160 If True, it will override the previous data with the same name but different version 161 162 Returns 163 ------- 164 dict 165 The data of the package in the form of a dictionary 166 167 Examples 168 -------- 169 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 170 >>> data_source.obtain_package_data("package1") 171 { 172 "name": "package1", 173 "version": "1.0.0", 174 "url": " 175 "dependencies": [ 176 { 177 "name": "package2", 178 "version": "1.0.0" 179 }, 180 ] 181 } 182 """ 183 184 # Check if the data is loaded 185 if self.data is None: 186 raise ValueError("Data is not loaded.") 187 188 # Get the rows of the package 189 package_rows = self.data[self.data[self.dependent_field] == package_name] 190 191 # Remove the previous data with the same name but different version 192 if override_previous: 193 # Get the last row 194 last_version = package_rows[self.dependent_version_field].max() 195 package_rows = package_rows[package_rows[self.dependent_version_field] == last_version] 196 197 if package_rows.empty: 198 self.logger.debug(f"Package {package_name} not found in data.") 199 return None 200 #raise ValueError(f"Package {package_name} not found in data.") 201 202 # Get the dependencies 203 dependencies = [] 204 205 # Get a list of rows 206 package_rows = package_rows.to_dict("records") 207 208 for row in package_rows: 209 # Get the dependency name and version 210 dependency_name = row[self.dependency_field] 211 dependency_version = row[self.dependency_version_field] if self.dependency_version_field is not None else None 212 213 # Build the dependency dictionary 214 # Iggnore {'name': nan, 'version': nan} 215 if pd.isna(dependency_name): 216 continue 217 218 dependency = { 219 "name": dependency_name, 220 "version": dependency_version 221 } 222 223 # Add the dependency to the list 224 dependencies.append(dependency) 225 226 # Return the data 227 return { 228 "name": package_name, 229 "version": package_rows[0][self.dependent_version_field] if self.dependent_version_field is not None else None, 230 "url": package_rows[0][self.dependent_url_field] if self.dependent_url_field is not None else None, 231 "dependencies": dependencies 232 }
Obtains the package from the dataframe
Parameters
- package_name (str): The name of the package
- override_previous (bool): If True, it will override the previous data with the same name but different version
Returns
- dict: The data of the package in the form of a dictionary
Examples
>>> data_source = CSVDataSource("test.csv", "name", "dependency")
>>> data_source.obtain_package_data("package1")
{
"name": "package1",
"version": "1.0.0",
"url": "
"dependencies": [
{
"name": "package2",
"version": "1.0.0"
},
]
}
def
obtain_packages_data( self, package_names: List[str], progress_bar: Optional[tqdm.std.tqdm] = None) -> tuple[typing.List[dict], typing.List[str]]:
234 def obtain_packages_data( 235 self, 236 package_names: List[str], 237 progress_bar: Optional[tqdm.tqdm] = None 238 ) -> tuple[List[dict], List[str]]: 239 ''' 240 Obtains the data of a list of package names from the CSV file 241 If the package name list is None, it will obtain the package names from the CSV file and load their data 242 243 Parameters 244 ---------- 245 package_names : List[str] 246 The list of package names to obtain the data from 247 progress_bar : tqdm.tqdm 248 The progress bar to update 249 250 Returns 251 ------- 252 tuple[List[dict], List[str]] 253 The list of packages data and the list of not found packages 254 255 Examples 256 -------- 257 >>> data_source = CSVDataSource("test.csv", "name", "dependency") 258 >>> data_source.obtain_packages_data(["package1", "package2"]) 259 ( 260 [ 261 { 262 "name": "package1", 263 "version": "1.0.0", 264 "url": " 265 "dependencies": [ ... ] 266 }, 267 { 268 "name": "package2", 269 "version": "1.0.0", 270 "url": " 271 "dependencies": [ ... ] 272 } 273 ], 274 [] 275 ) 276 277 ''' 278 279 # Define the list of packages and the list of not found packages 280 packages = [] 281 not_found = [] 282 283 # Iterate over the package names and obtain the data 284 for package_name in package_names: 285 try: 286 packages.append(self.obtain_package_data(package_name)) 287 288 # If the package is not found, add it to the not found list, and continue 289 except ValueError: 290 self.logger.debug(f"Package {package_name} not found in data.") 291 not_found.append(package_name) 292 continue 293 294 if progress_bar is not None: 295 progress_bar.update(1) 296 297 return packages, not_found
Obtains the data of a list of package names from the CSV file If the package name list is None, it will obtain the package names from the CSV file and load their data
Parameters
- package_names (List[str]): The list of package names to obtain the data from
- progress_bar (tqdm.tqdm): The progress bar to update
Returns
- tuple[List[dict], List[str]]: The list of packages data and the list of not found packages
Examples
>>> data_source = CSVDataSource("test.csv", "name", "dependency")
>>> data_source.obtain_packages_data(["package1", "package2"])
(
[
{
"name": "package1",
"version": "1.0.0",
"url": "
"dependencies": [ ... ]
},
{
"name": "package2",
"version": "1.0.0",
"url": "
"dependencies": [ ... ]
}
],
[]
)