# -*- coding: utf-8 -*-
'''
Created on Nov. 12, 2012
@author: O. Doppelt
@author: E. Legros
@author: H. Menager
@author: B. Neron
@author: O. Sallou
@license: GPLv3
'''
import os, shutil, copy
import string, re
from mongokit import SchemaDocument
from .type import *
[docs]def safe_file_name(file_name, reserved = []):
"""
return a file name which will not cause any potential
basic security issue for job execution or collide with
"reserved" file names
"""
#do not allow non ascii chars
for car in file_name :
if car not in string.printable :
file_name = file_name.replace( car , '_')
#SECURITY: substitute shell special characters
file_name = re.sub( "[ ~%#\"\'<>&\*;$`\|()\[\]\{\}\?\s ]" , '_' , file_name )
#SECURITY: transform absolute path into relative path
file_name = re.sub( "^.*[\\\]", "" , file_name )
return file_name
[docs]class AbstractData(SchemaDocument):
"""
Abstract super class for all kinds of data
"""
structure = {
"_type": unicode,
}
def check_value(self):
raise NotImplementedError()
def clean(self):
pass
[docs] def expr_value(self):
"""
Get the value used for evaluated
expressions (precond, ctrl, format, etc.)
"""
raise NotImplementedError()
def import_to_job(self, job):
return copy.deepcopy(self)
@property
def type(self):
raise NotImplementedError()
[docs]class SimpleData(AbstractData):
"""
A data which has a simple type
"""
structure = {
"type": Type
}
@property
def type(self):
return self['type']
@connection.register
[docs]class RefData(SimpleData):
"""
A data whose value is stored on the file system
on one or more files
"""
structure = {'path': basestring,
'size': int,
}
[docs] def clean():
"""
Clean "text" data from Windows(TM) encoding
"""
#TODO only clean if format is text
return
[docs] def expr_value(self):
"""
Get the value used for evaluated
expressions (precond, ctrl, format, etc.)
i.e. the list of file names.
"""
return self['path']
def import_to_job(self, job, src_pref=None):
src_path = self['path']
#TODO, this is a naive implementation that does not handle
# any potential copy problem
if src_pref is not None:
src_path = os.path.join(src_pref, src_path)
# TODO, here insert "securing" of file name
dst_file_name = safe_file_name(self['path'])
dst_path = os.path.join(job.dir, dst_file_name)
shutil.copy(src_path, dst_path)
data_object = copy.deepcopy(self)
data_object['path'] = dst_file_name
return data_object
@connection.register
[docs]class ValueData(SimpleData):
"""
A data whose value is stored directly in the object
"""
structure = {
'value': None
}
def check_value(self):
self['type'].check_value(self['value'])
def expr_value(self):
return self['value']
@connection.register
[docs]class ListData(SimpleData):
"""
A data formed by a list of data sharing the same type/format
"""
structure = {
'value': [AbstractData]
}
[docs] def expr_value(self):
"""
Get the value used for evaluated
expressions (precond, ctrl, format, etc.)
i.e. the list of its elements expr_values.
"""
return [el.expr_value() for el in self['value']]
@connection.register
[docs]class StructData(AbstractData):
"""
A data formed by a list properties referencing different data
"""
structure = {
'properties': {basestring: AbstractData},
# Temporary storage to get list of files, waiting for user to map them
# with ontology terms
'files': None
}
[docs] def expr_value(self):
"""
Get the value used for evaluated
expressions (precond, ctrl, format, etc.)
i.e. the dictionary of properties with their
expr_values.
"""
return {prop_name: prop_val.expr_value() for prop_name, prop_val in self['properties'].items()}
@property
def type(self):
prop_types = {prop_name: prop_val.type for prop_name, prop_val in self['properties'].items()}
return StructType({'properties': prop_types})
def new_data(new_data_type):
if isinstance(new_data_type, FormattedType):
data = RefData()
elif isinstance(new_data_type, StructType):
data = StructData()
elif isinstance(new_data_type, ArrayType):
data = ListData()
else:
data = ValueData()
data['type'] = new_data_type
return data